api.py 6.35 KB
Newer Older
1 2
import json
import logging
3
import subprocess
4
from glob import escape
5

Edvard Rejthar's avatar
Edvard Rejthar committed
6
from flask import request
7 8
from peewee import IntegrityError

Edvard Rejthar's avatar
Edvard Rejthar committed
9
from mdmaug.lib.domains import domain2dir
10
from mdmaug.lib.model.crawl import Crawl
11 12
from .scan_controller import ScanController
from ..config import Config
Edvard Rejthar's avatar
Edvard Rejthar committed
13
from ..model.dbp import Encounter, Whitelist, Status
14
from ..parser.traffic_log_parser import TrafficLogParser
15
from ...templates.crawl_view import CrawlView
16

Edvard Rejthar's avatar
Edvard Rejthar committed
17
logger = logging.getLogger("mdmaug")
18

19

20
class Api:
21

Edvard Rejthar's avatar
Edvard Rejthar committed
22
    def run(self, params):
23
        """ Accept command
Edvard Rejthar's avatar
Edvard Rejthar committed
24
        :type params: dict from URL request. /api/analyze=cache/http://example.com → {"api": True, "analyze": cache, "page": "http://example.com"}
25
        """
26

27 28
        crawl = None

Edvard Rejthar's avatar
Edvard Rejthar committed
29
        if "analyze" in params:
30
            map_ = {"fresh": None, "cached": 1, "weekcache": 7, "oldcache": True, True: None}
Edvard Rejthar's avatar
Edvard Rejthar committed
31 32
            if params["analyze"] in map_:
                days = map_[params["analyze"]]
33
            else:
Edvard Rejthar's avatar
Edvard Rejthar committed
34 35 36 37 38 39 40 41
                days = int(params["analyze"])
            crawl = ScanController().launch(params["page"], days, params.get("autoprune") in ["y", "1", True],
                                            params.get("creation_spree") in ["y", "1", True])
        elif "aggregate" in params:
            crawl = self.aggregate(params)
        elif "scan" in params:
            if "date" not in params:
                params["date"] = ""
Edvard Rejthar's avatar
Edvard Rejthar committed
42
            crawl = ScanController().get_scan(domain2dir(escape(params["scan"])), scan=escape(params["date"]))
Edvard Rejthar's avatar
Edvard Rejthar committed
43
        elif "prune" in params:
44
            return ScanController.prune()
Edvard Rejthar's avatar
Edvard Rejthar committed
45 46 47 48 49
        elif "nicify" in params:
            return TrafficLogParser.getStylesheet() + TrafficLogParser.nicify_file(params["page"])
        elif "vote" in params:  # /api/vote/block/example.org/10.0.0.1
            return Encounter.vote(params["vote"], params["page"])
        elif "reset" in params:
50
            self.reset()
51
            return "reset"
52
        else:
Edvard Rejthar's avatar
Edvard Rejthar committed
53
            return "Unknown API method."
54 55

        if crawl:
56 57
            if type(crawl) is str:
                return crawl  # containing an error message
Edvard Rejthar's avatar
Edvard Rejthar committed
58
            elif params["api"] == "json":
59 60 61
                return CrawlView.output_json(crawl)
            else:
                return CrawlView.output_html(crawl)
Edvard Rejthar's avatar
Edvard Rejthar committed
62 63
        else:
            return "Unspecified error :("
64

65
    @staticmethod
Edvard Rejthar's avatar
Edvard Rejthar committed
66 67 68
    def aggregate(params):
        date_from = int(params["from"])
        date_to = int(params["to"])
69 70 71
        crawl = Crawl()
        scan_count = set()
        domain_count = set()
Edvard Rejthar's avatar
Edvard Rejthar committed
72 73 74 75 76 77 78

        v = params.get("order", "origins")
        if v == "origins":
            # XX ignores aggregation dates, @see Encounter.by_origin_count
            limit = request.args.get('limit', default=params["paging"], type=int)
            offset = request.args.get('offset', default=0, type=int)

Edvard Rejthar's avatar
Edvard Rejthar committed
79
            domains, hosts, total = Encounter.by_origin_count(limit, offset, params)
Edvard Rejthar's avatar
Edvard Rejthar committed
80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
            for domain, host in zip(domains, hosts):
                if host in crawl:  # we already have a domain that connected to the host
                    continue
                domain = domain2dir(domain)
                for scan in ScanController.get_domain_scans(domain):
                    c = ScanController.get_scan(domain, scan)
                    if host in c:
                        crawl += c
                        scan_count.add("/".join([domain, scan]))
                        domain_count.add(domain)
                        break

            # filter out all the hosts that were in scans but we don't want to see them now (paging)
            for domain in list(crawl.keys()):
                if domain not in hosts:
                    del crawl[domain]

            # re-sort the host by count (we might have lost sorting if a scan reported two from the hosts -> random order)
            c2_ordered = Crawl()
            for host in hosts:
                if host in crawl:
                    c2_ordered[host] = crawl[host]
            crawl = c2_ordered
            crawl.paging = limit, offset, total

        elif v == "unordered":
            # scans everything
            for domain, scan in ScanController.get_all_scans():
                if date_from < int(scan) < date_to:
109 110 111
                    scan_count.add("/".join([domain, scan]))
                    domain_count.add(domain)
                    crawl += Crawl.load_from_scan(domain, scan)
Edvard Rejthar's avatar
Edvard Rejthar committed
112 113 114

        else:
            return "Unknown ordering. (NOT YET IMPLEMENTED for IPS and date seen)"
115 116 117

        crawl.title = f"Merged {len(scan_count)} scans from {len(domain_count)} domains"
        if not crawl:
Edvard Rejthar's avatar
Edvard Rejthar committed
118
            crawl = "No results with this conditions."
119 120
        return crawl

121
    @staticmethod
122
    def reset():
Edvard Rejthar's avatar
Edvard Rejthar committed
123 124
        logger.debug("resetting running browsers")
        with open(Config.config_file, 'w') as f:  # clear the queue
125
            json.dump({}, f)
126
        subprocess.call(["pkill", Config.browser])  # kill frozen browsers
Edvard Rejthar's avatar
Edvard Rejthar committed
127
        subprocess.call(["pkill", "Xvfb"])  # once, many Xvfb instances got open
128

129
    # prida 2ld domenu mezi whitelistovane
130
    def whitelist(self):
131
        logger.debug("whitelisting")
132
        # Db.cur = Db.connection.cursor()
133
        # self._logger.debug(Db.cur.execute("""REPLACE INTO whitelist set domain = %s""", (self.origin_domain, )))
134 135
        # Db.connection.commit()
        # Db.cur.close()
136
        return  # not yet implemented
137
        try:
138
            Whitelist.insert(domain=self.origin_domain).execute()
139
        except IntegrityError:
140
            pass  # already inserted
141

142 143
    @staticmethod
    def get_undecided():
144
        logger.debug("XXX not implemented yet - table of undecideds domain since last export")
145
        pass
146

147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
# elif "inspect" in request:
#     # XXX → migrate to dbp
#     output = []
#     for row in Config.db.execute_sql("SELECT url from encounter where host = %s", request["inspect"]):
#         output.append(row[0])
#     return "<br>".join(output)
# elif "decide" in request:  # XX deprecated?
#     return self.get_undecided()
# elif "whitelist" in request:  # XXX not implemented yet
#     """url = path.split("/", 3)
#     if len(url) > 3:
#         self._setWebsite(url[2])  # osetrit self.website, ze je URL, a nikoli shell
#         logger.debug("XXX nejsem si jist, zda url je spravne na url[2]")  # XXX
#         logger.debug(url)  # XXX
#         quit()  # XXX
#         logger.debug(self.website)
#         logger.debug(self.origin_domain)
#     return self.whitelist()"""
#     return "Implement first if needed."