Commit f7bd1155 authored by Edvard Rejthar's avatar Edvard Rejthar

package format, installation steps

parent e24ff7f7
\ No newline at end of file
......@@ -9,7 +9,7 @@ PROFILE_COUNT=21
apt install software-properties-common
add-apt-repository "deb $(lsb_release -sc) main universe restricted multiverse"
apt update
apt install firefox python3 mariadb-server
apt install firefox python3 mariadb-server xvfb
pip3 install xvfbwrapper pymysql peewee jinja2 pyyaml bs4 pygments pillow requests
# current dir
......@@ -19,7 +19,7 @@ cd $DIR
# mariadb setup
systemctl start mariadb.service
mysql -u root < mdmaug-installation.sql # populate db
mysql -uroot -e "CREATE USER 'mdmaug'@'localhost' IDENTIFIED BY 'fidFDSs676'; GRANT ALL PRIVILEGES ON mdmaug. * TO 'mdmaug'@'%';" # new user
mysql -uroot -e "CREATE USER 'mdmaug'@'localhost' IDENTIFIED BY 'fidFDSs676'; GRANT ALL PRIVILEGES ON mdmaug. * TO 'mdmaug'@'localhost';" # new user
# adding user the server will be run under
useradd -m -d $DESTINATION mdmaug
......@@ -41,8 +41,11 @@ do
# adopt all files to the new user
chown mdmaug:mdmaug -R $DESTINATION
# make the new user able to use the display (needed on Ubuntu 17.10 at least)
xhost +local:mdmaug
......@@ -4,10 +4,13 @@ Scans a website for a sign of a parasite hosts or commands.
## Installation
1. ```git clone /tmp/mdmaug```
2. edit mdmaug/lib/
3. you should generate certificate `openssl req -new -x509 -keyout cert-mdmaug.pem -out cert-mdmaug.pem -days 365 -nodes` to `mdmaug/cert-mdmaug.pem`
4. ```/tmp/mdmaug/INSTALL```
1. Download ```git clone /tmp/mdmaug```
2. Edit mdmaug/lib/
3. You should generate certificate `openssl req -new -x509 -keyout cert-mdmaug.pem -out cert-mdmaug.pem -days 365 -nodes` to `mdmaug/cert-mdmaug.pem`
4. Perform installation: ```/tmp/mdmaug/INSTALL```
5. Everything should be located in `/opt/mdmaug`.
6. Launch under newly created `mdmaug` user: `su - mdmaug -c 'python3 -m mdmaug'`
7. Connect in the browser at: https://localhost:8000
### Notes
......@@ -15,6 +18,7 @@ Scans a website for a sign of a parasite hosts or commands.
* Certificate error: Make sure that the browser doesn't blockt the MDM-Augmented server if used from MDM.
* If you want other count of profiles than 21, change INSTALL + + profiles.ini
* You may put ```03 1,7,13,19 * * * ~/mdmaug-launch``` in ```crontab -e``` of user mdmaug.
* We are using Python3.6+
## What is done to Firefox profiles?
#!/usr/bin/env python3
import logging
import os
import ssl
import threading
import logging
logging.basicConfig(level=logging.DEBUG, format="%(message)s")
from http.server import HTTPServer
from xvfbwrapper import Xvfb
from lib.config import Config
from lib.controller.server import Server
from lib.controller.api import Api
# import ipdb; ipdb.set_trace()
#logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s",filename="logger.log")
from .lib.config import Config
from .lib.controller.server import Server
from .lib.controller.api import Api
# assure the logging dir
if not os.path.exists(Config.LOG_DIR):
# setup multithreading server
# server setup
httpd = HTTPServer(('', Config.APP_PORT), Server)
address = ''
httpd = HTTPServer((address, Config.APP_PORT), Server)
httpd.socket = ssl.wrap_socket(httpd.socket,
certfile= Config.DIR + 'python.pem', # together private + cert,
# together private + cert,
certfile=Config.DIR + 'cert-mdmaug.pem',
vdisplay = Xvfb()
display = Xvfb()
print('Listening at{}'.format(Config.APP_PORT))
print(f'Listening at https://{address}:{Config.APP_PORT}')
for _ in range(Config.profileCount):
except (KeyboardInterrupt, SystemExit):
How to debug mysql:
conn = pymysql.connect(host='localhost', user='root', passwd='lopuch', db='mdmaug', charset='utf8')
cur = conn.cursor()
\ No newline at end of file
import threading
import os
import logging
from glob import glob
import os
import threading
from peewee import MySQLDatabase
class Config:
profileCount = 21 # pocet profilu vytvorenych ve firefoxu. Tyto je treba vytvorit rucne. Nazev profilu je cislo - 0,1...
browser = 'firefox' # iceweasel, firefox. Ktery prohlizec se spousti.
configFile = '/opt/mdmaug/.cache/mdmaug-scans/_tmp/queue.cache' # RAM disk byl maly: '/tmp/mdm/queue.cache'
profileCount = 21 # number of Firefox profiles. Its name is just a number – 0,1...
browser = 'firefox' # iceweasel, firefox. What browser gets launched.
configFile = '/opt/mdmaug/.cache/mdmaug-scans/_tmp/queue.cache' # RAM disk was too small: '/tmp/mdm/queue.cache'
APP_PORT = 8000
LOG_DIR = "/opt/mdmaug/.cache/mdmaug-scans/_tmp/" # X /tmp/mdm/
APP_DOMAIN = '' + str(APP_PORT) #
LOG_DIR = "/opt/mdmaug/.cache/mdmaug-scans/_tmp/"
CACHE_DIR = "/opt/mdmaug/.cache/mdmaug-scans/"
DIR = os.path.dirname(os.path.realpath(__file__)) + "/../"
myDB = ""
myDB: None
lock = threading.RLock() # doufam, ze kdyz je lock tady, ze je funknci. Closure...? XX nejak otestovat
MAX_WHOIS_DOMAIN_THREADS = 10 # spusti maximalne 10 threadu doraz, jednou mi to totiz preteklo (kazda domena spusti jeste tolik threadu, kolik ma IP, ale tech byva jen par)
MAX_BROWSER_RUN_TIME = 25 # maximalni cas, ktery muze browser bezet
MAX_BROWSER_EXPIRATION = 15 # pocet vterin, ktere muzeme max cekat, nez se browser zavre (trva, nez zapise soubory)
MAX_BROWSER_RUN_TIME = 25 # maximum time for a browser to run
MAX_BROWSER_EXPIRATION = 15 # seconds that we wait before killing the browser (waiting for the files to be written)
def connect():
# XX resim problem peewee.OperationalError: (2006, "MySQL server has gone away (BrokenPipeError(32, 'Broken pipe'))") po 7 hodinach timeoutu
# XX kupodivu pripojeni nemuze byt v dbp DBModel.connect. Prestoze type je pak spravne (MySQLDatabase), nic udelat nejde a pokusy o select konci NoneType.
logging.debug("Connecting to DB.")
Config.myDB = MySQLDatabase("mdmaug", host='localhost', port=3306, user="mdmaug", passwd="fidFDSs676") # XX dal jsem pryc: , threadlocals=False
Config.myDB.register_fields({'primary_key': 'BIGINT AUTOINCREMENT'})
Config.myDB = MySQLDatabase("mdmaug", host='localhost', port=3306, user="mdmaug",
passwd="fidFDSs676") # XX dal jsem pryc: , threadlocals=False
import json
import subprocess
import logging
from lib.config import Config
from lib.controller.scan_controller import ScanController
from lib.model.dbp import Status, Export, Turris, Whitelist
from lib.analysis.parser.traffic_log_parser import TrafficLogParser
import subprocess
from peewee import IntegrityError
from ...templates.crawl_view import CrawlView
from .scan_controller import ScanController
from ..config import Config
from ..model.dbp import Turris, Whitelist
from ..parser.traffic_log_parser import TrafficLogParser
class Api:
website = "" #
websiteDomain = "" #
def __init__(self, path):
self.path = path
def run(self, request):
""" Accept command
:type path: dict from URL request. /api/analyze=cache/ → {"api": True, "analyze": cache, "page": ""}
def run(self, cmd):
""" Accept command """
if cmd == "analyze":
return ScanController().launch(self.path)
if cmd == "analyze=cached":
return ScanController().launch(self.path, cached = 1)
if cmd == "analyze=weekcache":
return ScanController().launch(self.path, cached = 7)
if cmd == "analyze=oldcache":
return ScanController().launch(self.path, cached = True)
elif cmd == "export=view": # XX deprecated?
return Export.exportView()
elif cmd == "export=confirm": # XX deprecated?
return Export.exportConfirm()
elif cmd == "decide": # XX deprecated?
return self.getUndecided()
elif cmd == "nicify":
url = self.path.split("/", 3)
return TrafficLogParser.getStylesheet() + TrafficLogParser.nicifyFile(url[3])
elif cmd == "vote": # /api/vote/block/
if "analyze" in request:
crawl = ScanController().launch(request["page"], {"cached": 1, "weekcache":7, "oldcache": True, True: None}[request["analyze"]])
if request["api"] == "json":
return CrawlView.output_json(crawl)
return CrawlView.output_html(crawl)
elif "decide" in request: # XX deprecated?
return self.get_undecided()
elif "nicify" in request:
return TrafficLogParser.getStylesheet() + TrafficLogParser.nicifyFile(request["page"])
elif "vote" in request: # /api/vote/block/
logging.debug("vote cmd")
url = self.path.split("/", 4)
return[3], url[4])
elif cmd == "whitelist": # XXX not implemented yet
url = self.path.split("/", 3)
return["vote"], request["page"])
elif "whitelist" in request: # XXX not implemented yet
"""url = path.split("/", 3)
if len(url) > 3:
self._setWebsite(url[2]) # osetrit, ze je URL, a nikoli shell
logging.debug("XXX nejsem si jist, zda url je spravne na url[2]") # XXX
......@@ -46,32 +42,32 @@ class Api:
quit() # XXX
return self.whitelist()
elif cmd == "reset":
return self.whitelist()"""
return "Implement first if needed."
elif "reset" in request:
return "reset"
def reset():
logging.debug("resetting running browsers")
with open(Config.configFile, 'w') as f: # clear the queue
json.dump({}, f)["pkill", Config.browser]) # kill frozen browsers
#prida 2ld domenu mezi whitelistovane
# prida 2ld domenu mezi whitelistovane
def whitelist(self):
#Db.cur = Db.connection.cursor()
#self._logging.debug(Db.cur.execute("""REPLACE INTO whitelist set domain = %s""", (self.websiteDomain, )))
except IntegrityError:pass # jiz je vlozeno
# Db.cur = Db.connection.cursor()
# self._logging.debug(Db.cur.execute("""REPLACE INTO whitelist set domain = %s""", (self.websiteDomain, )))
# Db.connection.commit()
# Db.cur.close()
except IntegrityError:
pass # jiz je vlozeno
def getUndecided(self):
def get_undecided():
logging.debug("XXX jeste jsem neudelal - ma vylezt tabulka vsech nerozhodlych domen od posledniho exportu")
This diff is collapsed.
from http.server import SimpleHTTPRequestHandler
from jinja2 import Environment
from jinja2 import FileSystemLoader
from lib.config import Config
from lib.controller.api import Api
from lib.model.dbp import DbModel
from lib.model.dbp import Export
import logging
import mimetypes
import os
from http.server import SimpleHTTPRequestHandler
from jinja2 import Environment
from jinja2 import FileSystemLoader
from ..config import Config
from ..controller.api import Api
from ..model.dbp import DbModel
from ..model.dbp import Export
env = Environment()
env.loader = FileSystemLoader(Config.DIR + "templates/")
class Server(SimpleHTTPRequestHandler):
def favicon(self):
with open('favicon.ico', 'rb') as f:
self.output(, "image/x-icon")
def render_template(self, filename, ** kwargs):
def render_template(self, filename, **kwargs):
def output(self, contents, contentType="text/html"):
def output(self, contents, content_type="text/html"):
self.send_header("Content-type", contentType)
self.send_header("Content-type", content_type)
......@@ -34,26 +37,61 @@ class Server(SimpleHTTPRequestHandler):
def static_file(self, url):
is_binary_string = lambda bytes: bool(bytes.translate(None, bytearray([7, 8, 9, 10, 12, 13, 27]) + bytearray(range(0x20, 0x100))))
type = 'rb' if is_binary_string(open('/usr/bin/python', 'rb').read(1024)) else 'r'
with open(url, type) as f:
self.output(, contentType=mimetypes.guess_type(url))
# 'rb' if is a binary string, else 'r'
type_ = 'rb' if bool(open('/usr/bin/python', 'rb').read(1024).translate(None,
bytearray([7, 8, 9, 10, 12, 13, 27]) + bytearray(
range(0x20, 0x100)))) else 'r'
with open(url, type_) as f:
self.output(, content_type=mimetypes.guess_type(url))
def do_GET(self):
path = self.path.split("/")
Routing table:
/ → homepage
/existing-file → return the static file from /static
/( → if set, the output will be HTML5-postMessaged to other tab at the destination (with https protocol)
/api(=json)/ → output might be either in JSON, or else in HTML
/export/(days) → CSV of last X days
_, path = self.path.split("/", 1)
path, *_ = path.split("?", 1)
logging.debug("Request: {}".format(path[1]))
if path[1] == "":
if path == "":
return self.homepage()
elif os.path.isfile(Config.DIR + "static/" + path[1]): #faviconka, nebo jiny existujici soubor
return self.static_file(Config.DIR + "static/" + path[1])
elif os.path.isfile(Config.DIR + "static/" + path): # favicon or any other existing file
return self.static_file(Config.DIR + "static/" + path)
if path[1] == "api": # /api/analyze/web
cmd = path[2]
api = Api(self.path)
# parse the request url into a friendly dictionary
request = {"page": ""}
page = False
for l in self.path.split("/")[1:]:
if not page:
c, *d = l.split("=", 1)
if c in ["http:", "https:"]:
page = True
request[c] = d[0] if len(d) else True
request["page"] += l + "/"
if request["page"]: # strip last slash
request["page"] = request["page"][:-1]
logging.debug("Request: {}".format(request))
if "api" in request: # /api/analyze/web
output = Api().run(request)
if "destination" in request:
# send everything up, we are in an iframe
self.render_template("_message.html",, cmd=cmd, url=self.path, destination="")
elif path[1] == "export": # /export/{days} - csv za poslednich 7 dni
self.render_template("_message.html", contents=output, cmd=request, url=self.path,
elif "export" in request: # /export/{days} - csv of last 7 days
url = self.path.split("/", 2)
\ No newline at end of file
import re
import socket
import logging
import re
from urllib.parse import parse_qs
from urllib.parse import urlparse
import socket
import urllib.request
class Domains:
""" webove nastroje """
def get_ips_for_host(host):
ips = socket.getaddrinfo(host, 80, 0, 0, socket.IPPROTO_TCP) # XXX co kdyz nepratelsky web reaguje jen na 80, 81, 8080
......@@ -23,8 +21,7 @@ class Domains:
except TypeError:
logging.debug("Domains/url2domain type error")
raise #return ""
raise # return ""
def url2path(url):
""" -> /url """
......@@ -33,8 +30,10 @@ class Domains:
return url
def assureUrl(url): # zajistit, ze se jedna o url a ne treba o shell
# XX co ostatni protokoly? smb, sftp? Hrozi tam nejake nebezpeci?
return'(((((http|https|ftp)://)?[\w\-_]+(?:(?:\.[\w\-_]+)+)))([\w\-\.,@?^=%&amp;:/~\+#]*[\w\-\@?^=%&amp;/~\+#])?)', url).group(0)
# XX what about other protocols? smb, sftp? Is there a danger?
def domain2dir(url): # friendly nazev adresare z domeny v url
url = url.lower()
......@@ -47,7 +46,7 @@ class Domains:
return '' + ip
def ip2pdnsDomains(ip):
return None #24 doesnt work
# XX mohl bych sortovat dle 2nd domeny. Pripadne oriznout 3rd domenu, nechat jen 2nd. Tam ale musim osetrit problemove dvojite tld -,
pdns = urllib.request.urlopen(Domains.getPdnsLink(ip)).read().decode("utf-8")
......@@ -56,20 +55,23 @@ class Domains:
except Exception as e:
logging.debug("chyba pri kontaktu s PDNS: " + str(e))
return None
return None # #24 doesnt work
def ip2countryAndCity(ip):
return None, None #23 service down
hostipApi = urllib.request.urlopen('' + ip + '&position=true').read().decode("utf-8").split("\n")
#['Country: CZECH REPUBLIC (CZ)', 'City: Prague', '', 'Latitude: 50.0833', 'Longitude: 14.4333', 'IP:', '']
hostipApi = urllib.request.urlopen('' + ip + '&position=true').read().decode(
# ['Country: CZECH REPUBLIC (CZ)', 'City: Prague', '', 'Latitude: 50.0833', 'Longitude: 14.4333', 'IP:', '']
return hostipApi[0].split(":")[1], hostipApi[1].split(":")[1]
except UnicodeDecodeError: # as e
logging.debug("neumim dekodovat")
except Exception as e:
logging.debug(" down: " + str(e))
return None, None
return None, None # #23 service down
# Kontaktuje sluzbu safebrowsing a snazi se z jejich nekonzistentnich udaju vycist, zda kdyz na URL clovek pristoupi, objevi se cervena stranka.
......@@ -77,13 +79,13 @@ class Domains:
# Taky je mozne, ze sluzba zmenila wording. Mnoho zdaru!
# @param format 'bool' Vraci bool True/False/None, nebo 'attr' vraci int "1"/"0"/"" pro atribut
def isSuspicious(domain, output='bool'):
#contents = urllib.request.urlopen('' + domain).read().decode("utf-8")
#with open("debugsf.tmp","a") as f:
def is_suspicious(domain, output='bool'):
# contents = urllib.request.urlopen('' + domain).read().decode("utf-8")
# with open("debugsf.tmp","a") as f:
# f.write(contents + "\n\n")
#if "Site is listed as suspicious" in contents:
#elif "This site is not currently listed as suspicious." in contents:
import requests, re, json
# if "Site is listed as suspicious" in contents:
# elif "This site is not currently listed as suspicious." in contents:
import requests
r = requests.get("" + domain, timeout=5)
if '"listed"' in r.text:
return True if output == 'bool' else "1"
from yaml import load, dump
from ..config import Config
from ..parser.spy_parser import SpyParser
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
from collections import defaultdict
from lib.config import Config
class Crawl(defaultdict):
""" Objekt Crawl udružuje výsledky vyhledávání """
......@@ -15,47 +19,45 @@ class Crawl(defaultdict):
r += "\n* " + key + " " + str(self[key])
return r
def saveToFile(self,filename):
def save_to_file(self, filename):
with open(filename, "w") as f:
f.write(dump(self.__getstate__(), Dumper=Dumper))
def loadFromFile(filename):
def load_from_file(filename):
with open(filename, 'r') as f:
return Crawl(state = load(, Loader=Loader))
return Crawl(state=load(, Loader=Loader))
def __init__(self, host = None, state = None, logDir = None, cacheDir = None):
def __init__(self, host=None, state=None, log_dir=None, cache_dir=None):
""" State muze obsahovat vystup __getstate__() (serializace YAMLem) """
self.default_factory = _Domain
self.screenfile = None # HTML output XXX
if host:
# if host: = host
if logDir:
self.logDir = logDir
if cacheDir:
self.cacheDir = cacheDir
# if log_dir:
self.logDir = log_dir
# if cache_dir:
self.cacheDir = cache_dir
if state:
def __getstate__(self):
state = self.__dict__.copy()
state["keys"] = [[x for x in (key, self[key].__getstate__())] for key in self.keys()]
return state
def __setstate__(self,state):
def __setstate__(self, state):
for tup in state["keys"]:
key, val = tup
del state["keys"]
self.__dict__ = state
class _Domain(defaultdict):
""" Navstivena domena behem crawlu """
def __str__(self):
r = "{} adres a {} url".format(len(self.addresses), len(self.urls))
for key in self.urls.keys():
......@@ -71,8 +73,8 @@ class _Domain(defaultdict):
return r
def __init__(self): = None
#self.urls = set()
# = None
# self.urls = set()
self.urls = defaultdict(_Url)
self.addresses = defaultdict(_Address)
self.pdns = set()
......@@ -82,8 +84,6 @@ class _Domain(defaultdict):
state["addresses"] = [[x for x in (key, self.addresses[key].__dict__)] for key in self.addresses]
state["urls"] = [[x for x in (key, self.urls[key].__dict__)] for key in self.urls]
return state
#return {'urls': self.urls, 'vote', 'addresses':
# [[x for x in (key, self.addresses[key].__dict__)] for key in self.addresses]}
def __setstate__(self, state):
for tup in state["addresses"]:
......@@ -103,31 +103,34 @@ class _Domain(defaultdict):
class _Url(set):
""" Unikatni navstivena url """
def __str__(self):
return str(self.__dict__)
#return "spyfile {} {}, vote {}".format(self.spyfile,,
def __init__(self):
self.spyfile = [] # cesta k souboru se podezrelym kodem poustenym strankou
self.sourcefile = [] # cesta k souboru se zdrojovym kodem. Muze jich byt vice, a jsou oba pod domenou
# paths to files with a suspicious code, run by the inspected page
self.spyfiles = []
# paths to files with source codes.
# Both '' and '' are under domain.
self.sourcefiles = []
def add_sourcefile(self, sourcefile):
def addSourcefile(self, sourcefile):
#if self.sourcefile != None:
# raise RuntimeError('sourcefile', 'uz bylo definovano ' + self.sourcefile) # na tohle nejsme pripraveni - asi funkce v main.js pro jmeno souboru je spatna, protoze je jina od Domains.funkce
def add_spyfile(self, spyfile):
def addSpyfile(self, spyfile):
#if self.spyfile != None:
# raise RuntimeError('spyfile', 'uz bylo definovano') # na tohle nejsme pripraveni - asi funkce v main.js pro jmeno souboru je spatna, protoze je jina od Domains.funkce
def list_spyfiles(self):
for file in self.spyfiles:
text, shorten = SpyParser.get_short(file)
yield file, text, shorten, Config.APP_DOMAIN + "/api/nicify/" + file
def list_sourcefiles(self):
for file in self.sourcefiles:
yield Config.APP_DOMAIN + "/api/nicify/" + file
class _Address(set):
""" Adresa navstivene domeny """
#def __getstate__(self):
# return {,,}
#def __setstate__(self):
# pass
def __str__(self):
return "K adrese je připojena geolokace {} {}".format(, # , vote {} ,
......@@ -135,14 +138,13 @@ class _Address(set):
def __init__(self): = None = None = None
Example (and yaml-serialization check):
c = Crawl()
c[""].urls["/aurl"].spyfile = "/soubor-spyfil"
c[""].urls["/aurl"].sourcefiles.add("/1.source") tady ma byt asi append, ne?
c[""].addresses[""].country = "preague"
......@@ -161,3 +163,31 @@ e.__setstate__(load(output, Loader=Loader))
logging.debug(str(c) == str(e))
XX should be implemented in another manner; jsonpicklable with getter methods
Crawl object reference guide:
"screenfile": crawl.screenfile,
"safebrowsing-suspicious": Domains.is_suspicious(, 'attr'),
"domains": {}
"addresses": {},
IP, "country":, "city":
"urls": {},
"spyfiles": {},
"sourcefiles": []
"pdns": list(crawl_domain.pdns),
"vote": Turris.getVote(host=domain),
"safebrowsing-suspicious": lambda
XXXpossible structure?:
"domains" : {"addresses": [(IP, country, city)],
"urls": [URL, [spyfile, ...], [sourcefile, ...]]
"pdns": []
"vote: lamda
"safebrowsing-suspicious": lambda
import datetime
from lib.config import Config
from import Domains
import logging
from peewee import *
from peewee import RawQuery
import time
from urllib.parse import urlparse
#myDB = MySQLDatabase("mdmaug", host='', port=3306, user="root", passwd="lopuch", threadlocals=False)
#myDB.register_fields({'primary_key': 'BIGINT AUTOINCREMENT'})
from peewee import Model, DateTimeField, IntegerField, CharField, JOIN, BigAutoField
from peewee import RawQuery
from ..config import Config
from import Domains
if(1): # Do not print all queries to stderr.
logger = logging.getLogger('peewee')