dbp.py 9.73 KB
Newer Older
1 2 3
import datetime
import logging
import time
Edvard Rejthar's avatar
Edvard Rejthar committed
4
from collections import defaultdict
5

Edvard Rejthar's avatar
Edvard Rejthar committed
6
from peewee import Model, DateTimeField, IntegerField, CharField, JOIN, BigAutoField, fn
7

8
from ..config import Config
Edvard Rejthar's avatar
Edvard Rejthar committed
9
from ..domains import url2domain
10

11
# Do not print all queries to stderr.
Edvard Rejthar's avatar
Edvard Rejthar committed
12 13
logging.getLogger('peewee').setLevel(logging.WARNING)
logger = logging.getLogger("mdmaug")
14

15

16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
class DbModel(Model):
    # @staticmethod
    # def assure_connection():
    #     logger.debug("Assure connection.")
    #     try:  # zkousim spustit nejaky prikaz
    #         # logger.debug("1")
    #         Whitelist.select().count()
    #         # logger.debug("2")
    #     except:  # OperationalError:
    #         # logger.debug("3")
    #         Config.connect()
    #         # logger.debug("4")
    #         try:  # zkousim spustit nejaky prikaz
    #             # logger.debug("5")
    #             Whitelist.select().count()
    #             # logger.debug("6")
    #         except:
    #             logger.debug("7 - failed")
    #             raise
    #     logger.debug("Connection to DB assured.")
36 37

    """A base model that will use our MySQL database"""
38 39

    @staticmethod
40
    def connect():
Edvard Rejthar's avatar
Edvard Rejthar committed
41
        logger.debug("connecting db....")
42 43
        # DbModel.Meta.myDb = Config.db
        # Config.db.connect() # XX kupodivu toto neni potreba
Edvard Rejthar's avatar
Edvard Rejthar committed
44 45 46 47 48
        # logger.debug(Whitelist.select().count())
        # logger.debug("Connecting to DB.")
        # logger.debug("Start test.")
        # logger.debug(Whitelist.matches("www.mozilla.org"))
        # logger.debug("End test.")
49

50
    @staticmethod
51
    def disconnect():
Edvard Rejthar's avatar
Edvard Rejthar committed
52
        logger.debug("... db ends.")
53
        Config.db.close()
54 55

    class Meta:
56 57
        Config.connect()  # XX Performance: check if peewee is connecting right now. If yes :( , check deferring
        database = Config.db
58

59

Edvard Rejthar's avatar
Edvard Rejthar committed
60
# logger.debug("DVA")
61
# DbModel.connect()
62
# logger.debug(Config.db)
63 64

class Status(DbModel):
65
    id = BigAutoField()
66 67
    name = CharField(5)

Edvard Rejthar's avatar
Edvard Rejthar committed
68 69
    enum = {"block": 3, "log": 2, "allow": 1, "n/a": 0}

70
    @staticmethod
71
    def word2int(vote):
Edvard Rejthar's avatar
Edvard Rejthar committed
72 73 74 75 76 77 78 79 80 81 82 83
        if vote in Status.enum:
            return Status.enum[vote]
        else:
            return 0  # misto n/a se prenasi "na" (proto genericke else)
        # if vote == "block":
        #     return 3
        # elif vote == "log":
        #     return 2
        # elif vote == "allow":
        #     return 1
        # else:  # misto n/a se prenasi "na" (proto genericke else)
        #     return 0
84

85
    @staticmethod
86 87 88 89 90 91 92 93 94 95
    def int2word(status):
        if status == 0:
            return "n/a"
        if status == 1:
            return "allow"
        if status == 2:
            return "log"
        if status == 3:
            return "block"

96

97
class Export(DbModel):
98
    id = IntegerField(primary_key=True)
99 100
    timestamp = DateTimeField(datetime.datetime.now())

101 102 103
    @staticmethod
    def export_view(days: int = 7) -> str:
        """ CSV export. Lists all IPs that we log (status=2) or block (status=3). """
104
        delimiter = ","
105
        output = []
106
        with Config.lock:
107
            output.append(delimiter.join(["date", "ip", "port", "url", "block", "evil host", "other details"]))
108
            # Db.cur = Db.connection.cursor()
109
            # Db.cur.execute("""SELECT ip, port, url, `evil host`, `other details`,status from encounter where status > 1 and timestamp > (select case when MAX(timestamp IS NULL)=0 THEN max_scans(timestamp) ELSE 0 END from export)""")
110 111
            # rows = Db.cur.fetchall()
            # Db.cur.close()
112 113 114 115 116 117 118

            q = "SELECT ip, port, origin, `host`, status from encounter" \
                " where status > 1" \
                " and `ip` IS NOT NULL" \
                " and timestamp > NOW() - INTERVAL %s DAY" \
                " GROUP BY concat(`ip`,`port`) " \
                "ORDER BY `timestamp` DESC"  # group by concat may be a performance issue
Edvard Rejthar's avatar
Edvard Rejthar committed
119
            logger.debug(q)
120 121 122
            rq = Config.db.execute_sql(q, (days,))
        for r in rq:
            ip, port, origin, host, status = ['' if s is None else str(s) for s in r]
123
            date = time.strftime("%y%m%d")
124 125 126 127 128
            if status == "3" and host.endswith(Config.EXPORT_NOTBLOCK_TLD):
                status = "2"
            status = "LOG" if (status == "2") else "b"
            output.append(delimiter.join([date, ip, port, origin, status, host, '']))
        return "\n".join(output)
129

130

131
class Encounter(DbModel):
132
    id = BigAutoField()
133 134
    timestamp = DateTimeField(datetime.datetime.now())
    status = IntegerField()
135 136
    ip = CharField(45, default=None)
    port = IntegerField(default=0)
137 138
    origin = CharField(255, default="")
    host = CharField(255, default="")
139

140 141
    @staticmethod
    def get_vote(host=None, ip=None):
142 143 144
        status = None
        try:
            if host:
145 146
                status = Encounter.select().join(Status, JOIN.LEFT_OUTER, on=(Status.id == Encounter.status)).where(
                    Encounter.host == host).limit(1).get().status
147
            if ip:
148 149
                status = Encounter.select().join(Status, JOIN.LEFT_OUTER, on=(Status.id == Encounter.status)).where(
                    Encounter.ip == ip).limit(1).get().status
150 151 152 153
        except:
            status = 0
        return Status.int2word(status)

154
    @staticmethod
155
    def vote(vote, host):
156 157 158 159 160 161 162 163 164 165
        """
        User voted for remote domain maliciousness.
        1. Get list of IP of the host
        2. Update all the rows having the same IP (IP can have be associated with multiple hosts)
        3. Update all the rows with the host (because some host can have no more an IP)

        :param vote: Status.word
        :param host: domain to be voted
        :return: str Status message
        """
166 167 168
        status = Status.word2int(vote)
        try:
            try:
169
                rows = Encounter.select(Encounter.ip).where(Encounter.host == host)
170
            except:
171
                logger.error(f"domain {host} should have been inserted in the database, but it hasn't been")
172
                raise
173
            ip_list = list(set([o.ip for o in rows if o.ip is not None]))
Edvard Rejthar's avatar
Edvard Rejthar committed
174
            count = 0            
175 176 177 178 179 180
            if ip_list:
                count += Encounter.update(status=str(status)).where(Encounter.ip << ip_list).execute()
            # ovlivnit i host s IP = NULL
            count += Encounter.update(status=str(status)).where(Encounter.host == host).execute()
            logger.debug(f"vote: {vote} host: {host}  count: {count}")
            return f"{count} updated"
181
        except:
182 183
            return "no update, didn't find ip"

Edvard Rejthar's avatar
Edvard Rejthar committed
184 185
    @classmethod
    def get_related(cls, host, ignore_host=None):
186
        """ returns all domain 'origin' that call this remote host """
Edvard Rejthar's avatar
Edvard Rejthar committed
187 188
        return cls.relateds[host]

189
        l = []
Edvard Rejthar's avatar
Edvard Rejthar committed
190 191 192
        q = cls.select(cls.origin).where(cls.host == host).group_by(cls.origin)
        if ignore_host:  # ignore itself
            q = q.where(cls.origin != ignore_host)
193 194 195
        for el in q:
            l.append(el.origin)
        return l
196

Edvard Rejthar's avatar
Edvard Rejthar committed
197 198 199 200 201 202 203 204 205 206 207 208
    relateds = defaultdict(list)

    @classmethod
    def prepare_relateds(cls, domains, ignore_host=None):
        d = {k for k in domains if k not in cls.relateds}  # some might have been preloaded in by_origin_count
        if not d:
            return
        q = "SELECT host, group_concat(distinct origin) FROM `encounter` where origin in %s group by host ORDER BY count(*) DESC"
        for host, origins in Config.db.execute_sql(q, d, ):
            cls.relateds[host] = [i for i in origins.split(",") if i != ignore_host]

    @classmethod
Edvard Rejthar's avatar
Edvard Rejthar committed
209
    def by_origin_count(cls, limit=None, offset=None, params={}):
Edvard Rejthar's avatar
Edvard Rejthar committed
210 211 212 213 214 215 216
        """ Returns set of origins ordered by the count of potentially evil host connecting to them.
            XX DB should have scan_timestamp :( which differs from creation+vote updated timestamp; then we could filter
                by date from/to aggregation

            Returns domain that seen the tracked host and the host.
        """

Edvard Rejthar's avatar
Edvard Rejthar committed
217 218 219 220 221
        query = Encounter.select(Encounter.origin,
                                 Encounter.host,
                                 fn.Group_Concat(fn.Distinct(Encounter.origin))) \
            .group_by(Encounter.host) \
            .order_by(fn.Count("*").desc())
Edvard Rejthar's avatar
Edvard Rejthar committed
222

Edvard Rejthar's avatar
Edvard Rejthar committed
223 224 225
        vote_filter = params.get("filter", None)
        if vote_filter and vote_filter != "-":
            query = query.where(Encounter.status == int(vote_filter))
Edvard Rejthar's avatar
Edvard Rejthar committed
226

Edvard Rejthar's avatar
Edvard Rejthar committed
227 228 229 230 231 232 233 234 235
        host_regex = params.get("host_regex", None)
        if host_regex:
            query = query.where(Encounter.host.regexp(host_regex))

        host_regex_not = params.get("host_regex_not", None)
        if host_regex_not:
            query = query.where(~Encounter.host.regexp(host_regex_not))

        # cz|com|org|net
Edvard Rejthar's avatar
Edvard Rejthar committed
236

Edvard Rejthar's avatar
Edvard Rejthar committed
237
        print(query.sql())
Edvard Rejthar's avatar
Edvard Rejthar committed
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
        total = query.count()

        if limit:
            query = query.limit(int(limit))
            if offset:
                query = query.offset(int(offset))

        domains = []
        hosts = []
        for origin, host, origins in query.tuples().iterator():
            cls.relateds[host] = origins.split(",")
            domains.append(origin)
            hosts.append(host)
        return domains, hosts, total

253

254
class Whitelist(DbModel):
255
    id = BigAutoField()
256 257 258
    timestamp = DateTimeField(datetime.datetime.now())
    domain = CharField(255)

259
    @staticmethod
260
    def matches(url):
261
        """ Test if URL is in the 2nd domain whitelist """
262
        try:
Edvard Rejthar's avatar
Edvard Rejthar committed
263
            url = url2domain(url)
264
            s = url.split(".")
Edvard Rejthar's avatar
Edvard Rejthar committed
265 266
            # logger.debug(urlparse(url))
            # logger.debug(s)
267
        except AttributeError:
Edvard Rejthar's avatar
Edvard Rejthar committed
268
            logger.error("ATTRIBUTE ERROR! Whitelist.matches({})".format(url))
269 270
            return False

271
        if (len(s)) > 1:
272 273
            domain2nd = s[-2] + "." + s[-1]  # example + . + cz
        else:  # domain is rather IP, there is no do
274 275 276
            domain2nd = s
        with Config.lock:
            return Whitelist.select().where(Whitelist.domain == domain2nd).count() > 0