Commit 3db7b09e authored by Edvard Rejthar's avatar Edvard Rejthar

better deployment

parent c3e84053
...@@ -23,6 +23,7 @@ Scans a website for a sign of a parasite hosts or commands. ...@@ -23,6 +23,7 @@ Scans a website for a sign of a parasite hosts or commands.
## Tips ## Tips
* You may use /static/demopage.html as a testing page. * You may use /static/demopage.html as a testing page.
* You may launch MDMaug with environmental variable `PORT` to change the port the applicaction is bound to
### Troubleshooting ### Troubleshooting
......
#!/usr/bin/env python3 #!/usr/bin/env python3
import atexit
import datetime import datetime
import logging import logging
import os import os
...@@ -13,6 +14,15 @@ from xvfbwrapper import Xvfb ...@@ -13,6 +14,15 @@ from xvfbwrapper import Xvfb
from .lib.config import Config from .lib.config import Config
from .lib.controller.api import Api from .lib.controller.api import Api
fileHandler = logging.FileHandler("mdmmmmmmm.log")
fileHandler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
fileHandler.setLevel(logging.WARNING)
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
consoleHandler.setLevel(logging.WARNING)
handlers = [fileHandler, consoleHandler]
logging.basicConfig(level=logging.INFO, handlers=handlers)
# assure the logging dir # assure the logging dir
if not os.path.exists(Config.LOG_DIR): if not os.path.exists(Config.LOG_DIR):
os.makedirs(Config.LOG_DIR) os.makedirs(Config.LOG_DIR)
...@@ -47,13 +57,18 @@ if result is not 0: ...@@ -47,13 +57,18 @@ if result is not 0:
#Please run `sudo bin/nc-dumb.sh` to launch a server." #Please run `sudo bin/nc-dumb.sh` to launch a server."
exit() exit()
# we may want to specify another port for testing purposes I.E. to not compete with port defined in nginx configuration
if os.environ.get("PORT"):
Config.APP_PORT = int(os.environ.get("PORT"))
# run display and flask server # run display and flask server
run_display = False if os.environ.get("FIREFOX_DEBUG", 0) == "1" else True run_display = False if os.environ.get("FIREFOX_DEBUG", 0) == "1" else True
if run_display: if run_display:
display = Xvfb() display = Xvfb()
display.start() display.start()
try: if 1:
#try:
# print(f'Listening at https://{address}:{Config.APP_PORT}') # print(f'Listening at https://{address}:{Config.APP_PORT}')
from .lib.controller.server import app as server from .lib.controller.server import app as server
...@@ -66,6 +81,11 @@ try: ...@@ -66,6 +81,11 @@ try:
ssl_context=(Config.DIR + 'cert-mdmaug.pem', Config.DIR + 'key-mdmaug.pem'), threaded=True) ssl_context=(Config.DIR + 'cert-mdmaug.pem', Config.DIR + 'key-mdmaug.pem'), threaded=True)
# for _ in range(Config.profile_count): # for _ in range(Config.profile_count):
# threading.Thread(target=httpd.serve_forever).start() # threading.Thread(target=httpd.serve_forever).start()
except (KeyboardInterrupt, SystemExit): # except (KeyboardInterrupt, SystemExit):
if run_display: # if run_display:
display.stop() # display.stop()
# clean up - shut down the display.
# (This can't be caught as a simple exception since uWSGI runs multiple workers.
# Note that the display gets stopped for every worker (superfluously).)
atexit.register(lambda: display.stop() if run_display else None)
...@@ -27,5 +27,5 @@ while True: ...@@ -27,5 +27,5 @@ while True:
else: else:
method = "w" method = "w"
files_encountered.add(file) files_encountered.add(file)
with open(file, method +"b") as f: # without writing as bytes, some pages end up with UnicodeEncodeError: 'ascii' codec can't encode character '\xe9' in position 197: ordinal not in range(128) with open(file, method + "b") as f: # without writing as bytes, some pages end up with UnicodeEncodeError: 'ascii' codec can't encode character '\xe9' in position 197: ordinal not in range(128)
f.write(message["text"].encode("utf-8")) f.write(message["text"].encode("utf-8"))
...@@ -6,8 +6,8 @@ from glob import escape ...@@ -6,8 +6,8 @@ from glob import escape
from flask import request from flask import request
from peewee import IntegrityError from peewee import IntegrityError
from mdmaug.lib.domains import domain2dir from ..domains import domain2dir
from mdmaug.lib.model.crawl import Crawl from ..model.crawl import Crawl
from .scan_controller import ScanController from .scan_controller import ScanController
from ..config import Config from ..config import Config
from ..model.dbp import Encounter, Whitelist, Status from ..model.dbp import Encounter, Whitelist, Status
......
...@@ -75,7 +75,6 @@ class ScanController: ...@@ -75,7 +75,6 @@ class ScanController:
:param url: scanned url :param url: scanned url
:type cached: True = Any cached version, int = cached version X days old. If None or not found, site will be reanalysed :type cached: True = Any cached version, int = cached version X days old. If None or not found, site will be reanalysed
""" """
print("tttttttttt", url)
url = assure_url(url) url = assure_url(url)
if not url: if not url:
return f'Invalid URL {escape(url)} {url}' return f'Invalid URL {escape(url)} {url}'
...@@ -84,12 +83,15 @@ class ScanController: ...@@ -84,12 +83,15 @@ class ScanController:
if cached: if cached:
domain = domain2dir(url) domain = domain2dir(url)
scans = self.get_domain_scans(domain) # scans = self.get_domain_scans(domain)
if scans:
# get the most recent snapdir and check if it's not too old
prep = Config.CACHE_DIR + domain + "/" prep = Config.CACHE_DIR + domain + "/"
scan = max(scans, key=lambda s: os.path.getmtime(prep + s)) scans = sorted(self.get_domain_scans(domain), key=lambda s: os.path.getmtime(prep + s))
if not autoprune or not self.clean_scan(domain, scan): while scans:
# get the most recent snapdir and check if it's not too old
scan = scans.pop()
if autoprune and self.clean_scan(domain, scan):
continue
# if not autoprune or not self.clean_scan(domain, scan):
if cached is True or os.path.getmtime(prep + scan) > time.time() - (3600 * 24 * cached): if cached is True or os.path.getmtime(prep + scan) > time.time() - (3600 * 24 * cached):
if creation_spree: if creation_spree:
return f"Scan for {domain} already exists." return f"Scan for {domain} already exists."
...@@ -98,7 +100,7 @@ class ScanController: ...@@ -98,7 +100,7 @@ class ScanController:
crawl = Crawl.load_from_scan(domain, scan) crawl = Crawl.load_from_scan(domain, scan)
return crawl return crawl
except ValueError: except ValueError:
pass break
logger.debug(f"({-1}) Convenient cached analysis not found for url {url}") logger.debug(f"({-1}) Convenient cached analysis not found for url {url}")
# perform fresh analysis # perform fresh analysis
...@@ -134,24 +136,23 @@ class ScanController: ...@@ -134,24 +136,23 @@ class ScanController:
# ,nsSocketTransport:5,nsStreamPump:5,nsHostResolver:5 # ,nsSocketTransport:5,nsStreamPump:5,nsHostResolver:5
logger.debug("({}) FF -P {} -no-remote {}".format(self.profile, self.profile, self.url)) logger.debug("({}) FF -P {} -no-remote {}".format(self.profile, self.profile, self.url))
# http://localhost/redirect/ gets stripped by the extension # http://localhost/redirect/ gets stripped by the extension
command = f"export NSPR_LOG_MODULES=timestamp,nsHttp:5 ; export NSPR_LOG_FILE={logfile} ;" \ command = f"NSPR_LOG_MODULES=timestamp,nsHttp:5 NSPR_LOG_FILE={logfile} CACHE_DIR={cache_dir} PROFILE={self.profile}" \
f" export CACHE_DIR={cache_dir}; export PROFILE={self.profile};" \ f" {Config.browser} -P {self.profile} -no-remote 'http://localhost/redirect/{self.url}'"
f"{Config.browser} -P {self.profile} -no-remote 'http://localhost/redirect/{self.url}'"
# terminate Config.browser if he's not able to # terminate Config.browser if he's not able to
# (everything has to be in single command because there is no heritance of $! amongst subprocesses) # (everything has to be in single command because there is no inheritance of $! amongst subprocesses)
command += f" & echo $!;ii=0; while [ -n \"`ps -p $! | grep {Config.browser}`\" ];" \ command += f" & echo $!;ii=0; while [ -n \"`ps -p $! | grep {Config.browser}`\" ];" \
f"do echo \"({self.profile}) running\" ;ii=$((ii+1)); if [ $ii -gt {Config.MAX_BROWSER_RUN_TIME} ];" \ f" do echo \"({self.profile}) running\" ;ii=$((ii+1)); if [ $ii -gt {Config.MAX_BROWSER_RUN_TIME} ];" \
f" then echo '({self.profile}) kill';kill $!; break;fi; sleep 1; done" # > /dev/null f" then echo '({self.profile}) kill';kill $!; break;fi; sleep 1; done" # > /dev/null
logger.debug(command) logger.debug(command)
subprocess.call([command], shell=True) subprocess.call([command], shell=True)
logger.debug(f"({self.profile}) stopped!") logger.debug(f"({self.profile}) stopped!")
# shromazdit informace z analyz # gather analysis information
crawl = Crawl(host=self.url, log_dir=log_dir, cache_dir=cache_dir, profile=self.profile) crawl = Crawl(host=self.url, log_dir=log_dir, cache_dir=cache_dir, profile=self.profile)
expiration = 0 expiration = 0
while not os.path.isfile(logfile): # i po zavreni FF nekdy trva, nez se soubor zapise while not os.path.isfile(logfile): # it may take some time to write the file even after the FF closes
expiration += 1 expiration += 1
logger.debug(f"({self.profile}) waiting to close...") logger.debug(f"({self.profile}) waiting to close...")
if expiration > Config.MAX_BROWSER_EXPIRATION: if expiration > Config.MAX_BROWSER_EXPIRATION:
...@@ -284,13 +285,14 @@ class ScanController: ...@@ -284,13 +285,14 @@ class ScanController:
@classmethod @classmethod
def clean_scan(cls, domain, scan: str): def clean_scan(cls, domain, scan: str):
""" If there is only 1 file in the directory, deletes files of a scan. """ If analysis.json is missing or there is only 1 file in the directory, deletes files of a scan.
If that was the only scan, domain directory is deleted as well. (DB stays intact.) If that was the only scan, domain directory is deleted as well. (DB stays intact.)
Because if a scan fails, there is only analysis.json or nothing in the dir. Because if a scan fails, there is only analysis.json or nothing in the dir.
Return True if scan was deleted, False if nothing was deleted.
""" """
scan_path = os.path.join(Config.CACHE_DIR, domain, scan) scan_path = os.path.join(Config.CACHE_DIR, domain, scan)
if len(os.listdir(scan_path)) <= 1: if not os.path.isfile(os.path.join(scan_path, Config.CRAWL_FILE)) or len(os.listdir(scan_path)) <= 1:
shutil.rmtree(scan_path) shutil.rmtree(scan_path)
domain_path = os.path.join(Config.CACHE_DIR, domain) domain_path = os.path.join(Config.CACHE_DIR, domain)
if len(os.listdir(domain_path)) == 0: if len(os.listdir(domain_path)) == 0:
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
# except ImportError: # except ImportError:
# from yaml import Loader, Dumper # from yaml import Loader, Dumper
import logging import logging
from os.path import join
from collections import defaultdict from collections import defaultdict
import jsonpickle import jsonpickle
...@@ -48,7 +49,7 @@ class Crawl(defaultdict): ...@@ -48,7 +49,7 @@ class Crawl(defaultdict):
@staticmethod @staticmethod
def load_from_scan(domain, scan): def load_from_scan(domain, scan):
filename = Config.CACHE_DIR + domain + "/" + scan + "/" + Config.CRAWL_FILE filename = join(Config.CACHE_DIR, domain, scan, Config.CRAWL_FILE)
try: try:
with open(filename, 'r') as f: with open(filename, 'r') as f:
# return Crawl(state=load(f.read(), Loader=Loader)) # return Crawl(state=load(f.read(), Loader=Loader))
......
...@@ -14,7 +14,7 @@ class ScreenshotParser: ...@@ -14,7 +14,7 @@ class ScreenshotParser:
screenfile = crawl.cache_dir+'screenshot.base64' screenfile = crawl.cache_dir+'screenshot.base64'
if os.path.isfile(screenfile): if os.path.isfile(screenfile):
with open(screenfile,"r+") as f: with open(screenfile, "r+") as f:
data = (b64decode(f.read())) data = (b64decode(f.read()))
im = Image.open(io.BytesIO(data)) im = Image.open(io.BytesIO(data))
im.thumbnail(Config.THUMBNAIL_SIZE) # 0.3 s im.thumbnail(Config.THUMBNAIL_SIZE) # 0.3 s
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment