spy_parser.py 2.73 KB
Newer Older
Edvard Rejthar's avatar
Edvard Rejthar committed
1
import logging
2
import os.path
Edvard Rejthar's avatar
Edvard Rejthar committed
3 4
from html import escape
from os import listdir
5
from os.path import isfile, join
6

Edvard Rejthar's avatar
Edvard Rejthar committed
7 8 9
from ..domains import url2domain, url2path

logger = logging.getLogger("mdmaug")
10

11 12 13 14

class SpyParser:
    """ Obohatit vysledky vyhledavani (objekt crawl) o spiona nebezpecnych prikazu"""

15
    def __init__(self, crawl):
16
        # spionazni funkce
17 18
        # spy = ""
        # spyfile = crawl.logDir + "spy.spy"
19 20
        for file in [f for f in listdir(crawl.cache_dir) if isfile(join(crawl.cache_dir, f)) and os.path.splitext(f)[1] == ".spy"]:
            path = crawl.cache_dir + file
21 22
            with open(path, 'r') as f:
                try:
23
                    url = f.readline().rstrip()  # prvni radek obsahuje url souboru (a odradkovani, o ktere nestojime)
Edvard Rejthar's avatar
Edvard Rejthar committed
24
                    if url == "":
25
                        raise ValueError
26
                except ValueError:  # v souboru chybi hlavicka, asi byl zabit prohlizec, co to psal, preskocit
Edvard Rejthar's avatar
Edvard Rejthar committed
27
                    logger.debug(f"({crawl.profile}) no contents fetched/found")
28 29
                    continue

30
                crawl[url2domain(url)].urls[url2path(url)].spyfiles.append(path)
31

32 33
                # spy = "<h3>Nebezpečné funkce</h3>"
                # with open(spyfile, 'r') as f:
34 35
                #    spy += f.read()

Edvard Rejthar's avatar
Edvard Rejthar committed
36
    @staticmethod
37
    def get_short(spyfile):
38 39 40 41
        """ V jednom .spy souboru muze byt vice js funkci. Vraci od kazde aspon 100 znaků.
        Vraci String + True/False, pokud jsme text zkrátili,
        """
        shorten = False
42 43
        with open(spyfile, "r") as f:
            events = ("".join(f.readlines()[1:])).split("\n\n\n")  # prvni radka jsou metadata, FF deli prikazy tremi mezerami.
44 45

            result = []
46
            event_count = 0
47
            for event in events:
48 49 50 51
                if not event:
                    continue  # last event is an empty string
                event_count += 1
                if event_count >= 10:
52 53 54
                    shorten = True
                    break
                try:
55
                    command, parameters = event.split(": ", 1)
56
                except ValueError:
Edvard Rejthar's avatar
Edvard Rejthar committed
57 58 59 60
                    logger.debug("Spy - valuer error. Nepouziju spy.")
                    logger.debug(event)
                    logger.debug(event.split(": ", 1))
                    # logger.debug(len(event.split(": ",1)))
61
                    # ff =event.split(": ",1)
62 63
                    continue

64
                if len(parameters) > 100:  # tento prikaz je delsi nez 100, zkratit
65 66
                    shorten = True
                    parameters = parameters[:100]
67
                result.append("<b>{}</b> {}".format(command, escape(parameters)))
68

69 70
            # return cgi.escape(("\n").join(result)), shorten
            return "\n,<br />".join(result), shorten