Commit fb6ebd63 authored by Edvard Rejthar's avatar Edvard Rejthar

systemd +nginx deployment design

* we do not pretend anymore we fetched URLs that were not live at the analysis time
* screenshot file metadata info optimization
* old files cleanup
parent c3e84053
{"name": "firefox_mdmaug_writer", "description": "Firefox disk writer", "path": "/opt/mdmaug/mdmaug/bin/firefox_mdmaug_writer.py", "type": "stdio", "allowed_extensions": [ "mdmaug@csirt.cz" ] }
{"name": "firefox_mdmaug_writer", "description": "Firefox disk writer", "path": "/opt/mdmaug/firefox_mdmaug_writer.py", "type": "stdio", "allowed_extensions": [ "mdmaug@csirt.cz" ] }
......@@ -32,6 +32,8 @@ echo " ***** Copying files into" $DESTINATION
mkdir $DESTINATION
cp -r mdmaug $DESTINATION
cp -r .mozilla $DESTINATION
cp misc/production.ini $DESTINATION
cp misc/firefox_mdmaug_writer.py $DESTINATION
cp *.md $DESTINATION
cd $DESTINATION
......
......@@ -4,25 +4,25 @@ Scans a website for a sign of a parasite hosts or commands.
## Installation
### First-run
1. Download ```git clone git@gitlab.labs.nic.cz:csirt/mdmaug.git /tmp/mdmaug```
2. Edit mdmaug/lib/config.py
3. You should generate a certificate to `mdmaug/cert-mdmaug.pem`, at least a self-signed one (non recommended): `openssl req -x509 -newkey rsa:4096 -nodes -out cert-mdmaug.pem -keyout key-mdmaug.pem`
4. Perform installation: ```/tmp/mdmaug/INSTALL```
5. Everything should be located in `/opt/mdmaug`.
6. For testing purposes, launch it under newly created `mdmaug` user: `su - mdmaug -c 'python3 -m mdmaug'`
7. Connect in the browser at: https://127.0.0.1:5000
8. Try analysing `https://127.0.0.1:5000/static/demopage.html` on local server
9. For deployment, configure nginx properly to be used with flask
7. Connect in the browser at: https://127.0.0.1:8000
8. Try analysing `https://127.0.0.1:8000/static/demopage.html` on local server
9. For deployment, configure nginx (`sudo apt install nginx`) properly to be used with flask:
* If you are using systemd you may want to copy `misc/mdmaug.service` to `/etc/systemd/system/` so that MDMaug runs after restart (or with `sudo service mdmaug start`)
* `misc/mdmaug.nginx` can be integrated to nginx `/etc/nginx/sites-available/` (& symlinked to `/etc/nginx/sites-enabled/`)
### Notes
* If you want other count of profiles than 21, change `./INSTALL` + `mdmaug/lib/config.py` + `.mozilla/firefox/profiles.ini`
* You may put ```03 1,7,13,19 * * * ~/mdmaug-launch``` in ```crontab -e``` of user mdmaug.
* We are using Python3.6+, Firefox 62.0
## Tips
* You may use /static/demopage.html as a testing page.
* You may launch MDMaug with environmental variable `PORT` to change the port the application is bound to
### Troubleshooting
......@@ -31,9 +31,8 @@ Scans a website for a sign of a parasite hosts or commands.
#### Debugging session
I'm launching it like this:
`su - mdmaug -c 'export FLASK_APP=mdmaug.__main__:app && export PYTHONPATH=/opt/mdmaug/mdmaug && ./local/bin/flask run'`
* I'm launching it like this:: `su - mdmaug -c 'LC_ALL=C.UTF-8 FLASK_ENV=development FLASK_APP=mdmaug.__main__:app flask run -h 217.31.202.41 -p 8001`
* or `su - mdmaug -c 'PORT=8001 python3 -m mdmaug`
#### Wanna see what Firefox is really doing?
* Shell into mdmaug. (`ssh -X ...`, `su - mdmaug`)
......@@ -41,4 +40,4 @@ I'm launching it like this:
* If no Firefox window appears try
* `xhost +local:mdmaug` if you're on the same machine
* `root@mdmaugmachine$xauth list` on remote root and `mdmaug$xauth add ...` display cookie
* When Firefox window appear, run MDMaug with `export FIREFOX_DEBUG=1`. Now, instead of virtual display your monitor should be used.
\ No newline at end of file
* When Firefox window appear, run MDMaug with `FIREFOX_DEBUG=1`. Now, instead of virtual display your monitor should be used.
#!/usr/bin/env python3
import atexit
import datetime
import logging
import os
......@@ -13,6 +14,15 @@ from xvfbwrapper import Xvfb
from .lib.config import Config
from .lib.controller.api import Api
fileHandler = logging.FileHandler("mdmmmmmmm.log")
fileHandler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
fileHandler.setLevel(logging.WARNING)
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logging.Formatter('%(levelname)s - %(message)s'))
consoleHandler.setLevel(logging.WARNING)
handlers = [fileHandler, consoleHandler]
logging.basicConfig(level=logging.INFO, handlers=handlers)
# assure the logging dir
if not os.path.exists(Config.LOG_DIR):
os.makedirs(Config.LOG_DIR)
......@@ -47,13 +57,18 @@ if result is not 0:
#Please run `sudo bin/nc-dumb.sh` to launch a server."
exit()
# we may want to specify another port for testing purposes I.E. to not compete with port defined in nginx configuration
if os.environ.get("PORT"):
Config.APP_PORT = int(os.environ.get("PORT"))
# run display and flask server
run_display = False if os.environ.get("FIREFOX_DEBUG", 0) == "1" else True
if run_display:
display = Xvfb()
display.start()
try:
if 1:
#try:
# print(f'Listening at https://{address}:{Config.APP_PORT}')
from .lib.controller.server import app as server
......@@ -66,6 +81,11 @@ try:
ssl_context=(Config.DIR + 'cert-mdmaug.pem', Config.DIR + 'key-mdmaug.pem'), threaded=True)
# for _ in range(Config.profile_count):
# threading.Thread(target=httpd.serve_forever).start()
except (KeyboardInterrupt, SystemExit):
if run_display:
display.stop()
# except (KeyboardInterrupt, SystemExit):
# if run_display:
# display.stop()
# clean up - shut down the display.
# (This can't be caught as a simple exception since uWSGI runs multiple workers.
# Note that the display gets stopped for every worker (superfluously).)
atexit.register(lambda: display.stop() if run_display else None)
#!/usr/bin/env python3
Works well but process requests one by one:
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
import threading
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
message = threading.currentThread().getName()
self.wfile.write(message)
self.wfile.write('\n')
return
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
"""Handle requests in a separate thread."""
if __name__ == '__main__':
server = ThreadedHTTPServer(('localhost', 80), Handler)
print('Starting server, use <Ctrl-C> to stop')
server.serve_forever()
exit()
from http.server import HTTPServer, SimpleHTTPRequestHandler
class Server(SimpleHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write("MDMaug: dumb server used as a landing page for the browser extension. " \
"Because this needs to be loaded before any web page but needs a web page to be loaded.".encode("UTF-8"))
httpd = HTTPServer(('127.0.0.1', 80), Server)
# httpd.socket = ssl.wrap_socket(httpd.socket,
# server_side=True,
# certfile= Config.DIR + 'python.pem', # together private + cert, http://stackoverflow.com/questions/19705785/python-3-https-webserver
# ssl_version=ssl.PROTOCOL_TLSv1)
httpd.serve_forever()
Couldnt handle much requests at once
##!/usr/bin/env bash
#while true; do { echo -e "HTTP/1.1 200 OK\r\n$(date)\r\n\r\nMDMaug: dumb server used as a landing page for the browser extension. Because this needs to be loaded before any web page but needs a web page to be loaded." | nc -vl 80; } done
\ No newline at end of file
# XX shouldnt I delete this file?
su - mdmaug -c 'cd /home/mdmaug/mdmaug/ ; python3 mdmaug.py'
pkill python3 #pri Ctrl+C v prikazu su se uzavre jen terminal, ale ne uz python skript. Takhle to zas zabije veskery Python, ale sandboxovy-zavirovany uzivatel mdmaug stejne ma byt jen na spousteni tohohle skriptu.
\ No newline at end of file
......@@ -24,9 +24,9 @@ class Config:
profile_count = 21 # number of Firefox profiles. Its name is just a number – 0,1...
browser = 'firefox' # iceweasel, firefox. What browser gets launched.
config_file = '/opt/mdmaug/.cache/mdmaug-scans/_tmp/queue.cache' # RAM disk was too small: '/tmp/mdm/queue.cache'
APP_PORT = 5000
APP_IP = "127.0.0.1" # CHANGE HERE IF NOT RUN ON LOCALHOST
APP_HOST = f'http://{APP_IP}:{APP_PORT}' # YOU MAY NEED TO CHANGE **https**
APP_PORT = 8000
APP_IP = "217.31.202.41" # "127.0.0.1" # CHANGE HERE IF NOT RUN ON LOCALHOST
APP_HOST = f'https://{APP_IP}:{APP_PORT}' # YOU MAY NEED TO CHANGE **https**
LOG_DIR = "/opt/mdmaug/.cache/mdmaug-scans/_tmp/"
CACHE_DIR = "/opt/mdmaug/.cache/mdmaug-scans/"
ALLOWED_DESTINATION = {"mdm.nic.cz": "https://mdm.nic.cz",
......@@ -37,10 +37,11 @@ class Config:
lock = threading.RLock()
THUMBNAIL_SIZE = 640, 640
MAX_WHOIS_DOMAIN_THREADS = 10 # spusti maximalne 10 threadu doraz, jednou mi to totiz preteklo (kazda domena spusti jeste tolik threadu, kolik ma IP, ale tech byva jen par)
MAX_BROWSER_RUN_TIME = 25 # maximum time for a browser to run
MAX_BROWSER_RUN_TIME = 45 # maximum time for a browser to run
MAX_BROWSER_EXPIRATION = 15 # seconds that we wait before killing the browser (waiting for the files to be written)
EXPORT_NOTBLOCK_TLD = ".cz" # lowercase; this TLD is ignored in the export
CRAWL_FILE = "analysis.json"
SCREENSHOT_FILE = "screenshot.base64"
@staticmethod
def connect():
......
......@@ -6,8 +6,8 @@ from glob import escape
from flask import request
from peewee import IntegrityError
from mdmaug.lib.domains import domain2dir
from mdmaug.lib.model.crawl import Crawl
from ..domains import domain2dir
from ..model.crawl import Crawl
from .scan_controller import ScanController
from ..config import Config
from ..model.dbp import Encounter, Whitelist, Status
......
......@@ -75,7 +75,6 @@ class ScanController:
:param url: scanned url
:type cached: True = Any cached version, int = cached version X days old. If None or not found, site will be reanalysed
"""
print("tttttttttt", url)
url = assure_url(url)
if not url:
return f'Invalid URL {escape(url)} {url}'
......@@ -84,21 +83,24 @@ class ScanController:
if cached:
domain = domain2dir(url)
scans = self.get_domain_scans(domain)
if scans:
# scans = self.get_domain_scans(domain)
prep = Config.CACHE_DIR + domain + "/"
scans = sorted(self.get_domain_scans(domain), key=lambda s: os.path.getmtime(prep + s))
while scans:
# get the most recent snapdir and check if it's not too old
prep = Config.CACHE_DIR + domain + "/"
scan = max(scans, key=lambda s: os.path.getmtime(prep + s))
if not autoprune or not self.clean_scan(domain, scan):
if cached is True or os.path.getmtime(prep + scan) > time.time() - (3600 * 24 * cached):
if creation_spree:
return f"Scan for {domain} already exists."
try:
logger.debug(f"Returning a previous crawl from: {domain}/{scan}")
crawl = Crawl.load_from_scan(domain, scan)
return crawl
except ValueError:
pass
scan = scans.pop()
if autoprune and self.clean_scan(domain, scan):
continue
# if not autoprune or not self.clean_scan(domain, scan):
if cached is True or os.path.getmtime(prep + scan) > time.time() - (3600 * 24 * cached):
if creation_spree:
return f"Scan for {domain} already exists."
try:
logger.debug(f"Returning a previous crawl from: {domain}/{scan}")
crawl = Crawl.load_from_scan(domain, scan)
return crawl
except ValueError:
break
logger.debug(f"({-1}) Convenient cached analysis not found for url {url}")
# perform fresh analysis
......@@ -134,24 +136,23 @@ class ScanController:
# ,nsSocketTransport:5,nsStreamPump:5,nsHostResolver:5
logger.debug("({}) FF -P {} -no-remote {}".format(self.profile, self.profile, self.url))
# http://localhost/redirect/ gets stripped by the extension
command = f"export NSPR_LOG_MODULES=timestamp,nsHttp:5 ; export NSPR_LOG_FILE={logfile} ;" \
f" export CACHE_DIR={cache_dir}; export PROFILE={self.profile};" \
f"{Config.browser} -P {self.profile} -no-remote 'http://localhost/redirect/{self.url}'"
command = f"NSPR_LOG_MODULES=timestamp,nsHttp:5 NSPR_LOG_FILE={logfile} CACHE_DIR={cache_dir} PROFILE={self.profile}" \
f" {Config.browser} -P {self.profile} -no-remote 'http://localhost/redirect/{self.url}'"
# terminate Config.browser if he's not able to
# (everything has to be in single command because there is no heritance of $! amongst subprocesses)
# (everything has to be in single command because there is no inheritance of $! amongst subprocesses)
command += f" & echo $!;ii=0; while [ -n \"`ps -p $! | grep {Config.browser}`\" ];" \
f"do echo \"({self.profile}) running\" ;ii=$((ii+1)); if [ $ii -gt {Config.MAX_BROWSER_RUN_TIME} ];" \
f" do echo \"({self.profile}) running\" ;ii=$((ii+1)); if [ $ii -gt {Config.MAX_BROWSER_RUN_TIME} ];" \
f" then echo '({self.profile}) kill';kill $!; break;fi; sleep 1; done" # > /dev/null
logger.debug(command)
subprocess.call([command], shell=True)
logger.debug(f"({self.profile}) stopped!")
# shromazdit informace z analyz
# gather analysis information
crawl = Crawl(host=self.url, log_dir=log_dir, cache_dir=cache_dir, profile=self.profile)
expiration = 0
while not os.path.isfile(logfile): # i po zavreni FF nekdy trva, nez se soubor zapise
while not os.path.isfile(logfile): # it may take some time to write the file even after the FF closes
expiration += 1
logger.debug(f"({self.profile}) waiting to close...")
if expiration > Config.MAX_BROWSER_EXPIRATION:
......@@ -284,13 +285,14 @@ class ScanController:
@classmethod
def clean_scan(cls, domain, scan: str):
""" If there is only 1 file in the directory, deletes files of a scan.
""" If analysis.json is missing or there is only 1 file in the directory, deletes files of a scan.
If that was the only scan, domain directory is deleted as well. (DB stays intact.)
Because if a scan fails, there is only analysis.json or nothing in the dir.
Return True if scan was deleted, False if nothing was deleted.
"""
scan_path = os.path.join(Config.CACHE_DIR, domain, scan)
if len(os.listdir(scan_path)) <= 1:
if not os.path.isfile(os.path.join(scan_path, Config.CRAWL_FILE)) or len(os.listdir(scan_path)) <= 1:
shutil.rmtree(scan_path)
domain_path = os.path.join(Config.CACHE_DIR, domain)
if len(os.listdir(domain_path)) == 0:
......
......@@ -8,6 +8,7 @@
# except ImportError:
# from yaml import Loader, Dumper
import logging
from os.path import join
from collections import defaultdict
import jsonpickle
......@@ -17,7 +18,6 @@ from ..parser.spy_parser import SpyParser
logger = logging.getLogger("mdmaug")
# from yaml import load, dump
class Crawl(defaultdict):
""" Analysis results data model"""
......@@ -48,7 +48,7 @@ class Crawl(defaultdict):
@staticmethod
def load_from_scan(domain, scan):
filename = Config.CACHE_DIR + domain + "/" + scan + "/" + Config.CRAWL_FILE
filename = join(Config.CACHE_DIR, domain, scan, Config.CRAWL_FILE)
try:
with open(filename, 'r') as f:
# return Crawl(state=load(f.read(), Loader=Loader))
......
......@@ -12,9 +12,9 @@ class ScreenshotParser:
def __init__(self, crawl):
screenfile = crawl.cache_dir+'screenshot.base64'
screenfile = crawl.cache_dir + Config.SCREENSHOT_FILE
if os.path.isfile(screenfile):
with open(screenfile,"r+") as f:
with open(screenfile, "r+") as f:
data = (b64decode(f.read()))
im = Image.open(io.BytesIO(data))
im.thumbnail(Config.THUMBNAIL_SIZE) # 0.3 s
......@@ -26,4 +26,5 @@ class ScreenshotParser:
f.write(b64encode(data.getvalue()).decode("utf-8"))
f.truncate()
#"<img class='thumbnail' src='data:image/png;base64,{}' />".format(b64encode(data.getvalue()).decode("utf-8"))
crawl.screenfile = screenfile
os.rename(screenfile, crawl.cache_dir + Config.SCREENSHOT_FILE)
crawl.screenfile = 1 # screenfile
......@@ -49,7 +49,9 @@ class TrafficLogParser:
continue
# logger.debug(Domains.url2domain(url), Domains.url2path(url), path)
crawl[url2domain(url)].urls[url2path(url)].sourcefiles.append(path)
o = crawl[url2domain(url)].urls[url2path(url)]
if f.readline() != "": # some content has been fetched
o.sourcefiles.append(path)
@staticmethod
def nicify_file(sourcefile):
......
#!/bin/bash
# This file may be launched by CRON. 03 1,7,13,19 * * * ~/mdmaug/mdmaug-launch
echo "mdmaug-launch start" >> ~/log.log
pkill -f mdmaug.py
pkill -f Xvfb
export PYTHONPATH=$PYTHONPATH:/opt/mdmaug/mdmaug/
cd /opt/mdmaug/mdmaug && ./mdmaug.py 2>&1 | /usr/bin/logger -t mdmaugtag
whoami >> ~/log.log
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
......@@ -6,6 +6,7 @@ from ..lib.controller.scan_controller import ScanController
from ..lib.domains import is_suspicious, url2domain, domain2dir
from ..lib.model.dbp import Encounter
from ..lib.model.dbp import Whitelist
from ..lib.config import Config
class CrawlView:
......@@ -13,10 +14,11 @@ class CrawlView:
@staticmethod
def output_json(crawl, expand_screenfile=False):
# print(crawl)
if expand_screenfile and crawl.screenfile:
with open(crawl.screenfile, "r") as f:
crawl.screenfile = f.read()
if crawl.screenfile:
crawl.screenfile = crawl.cache_dir + Config.SCREENSHOT_FILE # expand '1' to screnshot file path
if expand_screenfile:
with open(crawl.screenfile, "r") as f:
crawl.screenfile = f.read()
output = {"screenfile": crawl.screenfile,
"host": crawl.host,
......
......@@ -105,7 +105,6 @@
<script>
var APP_HOST = "{{ APP_HOST }}"; // "http://localhost:5000"
</script>
<script src="static/homepage_script.js"></script>
<script src="static/mdmaug-analysis.js"></script>
......
......@@ -27,5 +27,5 @@ while True:
else:
method = "w"
files_encountered.add(file)
with open(file, method +"b") as f: # without writing as bytes, some pages end up with UnicodeEncodeError: 'ascii' codec can't encode character '\xe9' in position 197: ordinal not in range(128)
with open(file, method + "b") as f: # without writing as bytes, some pages end up with UnicodeEncodeError: 'ascii' codec can't encode character '\xe9' in position 197: ordinal not in range(128)
f.write(message["text"].encode("utf-8"))
##
# You should look at the following URL's in order to grasp a solid understanding
# of Nginx configuration files in order to fully unleash the power of Nginx.
# https://www.nginx.com/resources/wiki/start/
# https://www.nginx.com/resources/wiki/start/topics/tutorials/config_pitfalls/
# https://wiki.debian.org/Nginx/DirectoryStructure
#
# In most cases, administrators will remove this file from sites-enabled/ and
# leave it as reference inside of sites-available where it will continue to be
# updated by the nginx packaging team.
#
# This file will automatically load configuration files provided by other
# applications, such as Drupal or Wordpress. These applications will be made
# available underneath a path with that package name, such as /drupal8.
#
# Please see /usr/share/doc/nginx-doc/examples/ for more detailed examples.
##
# Default server configuration
#
server {
listen 80 default_server;
listen [::]:80 default_server;
# SSL configuration
#
listen 8000 ssl default_server;
listen [::]:8000 ssl default_server;
ssl_certificate /opt/mdmaug/mdmaug/cert-mdmaug.pem;
ssl_certificate_key /opt/mdmaug/mdmaug/key-mdmaug.pem;
#
# Note: You should disable gzip for SSL traffic.
# See: https://bugs.debian.org/773332
#
# Read up on ssl_ciphers to ensure a secure configuration.
# See: https://bugs.debian.org/765782
#
# Self signed certs generated by the ssl-cert package
# Don't use them in a production server!
#
# include snippets/snakeoil.conf;
root /var/www/html;
# Add index.php to the list if you are using PHP
index index.html index.htm index.nginx-debian.html;
server_name _;
merge_slashes off; # we need to allow double slash in: /api=json/analyze/http://example.com
location / {
try_files $uri @mdmaug;
}
location @mdmaug {
include uwsgi_params;
uwsgi_pass unix:/tmp/mdmaug.sock;
}
}
[Unit]
Description=uWSGI server instance configured to serve MDMaug
[Service]
Environment="PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/mdmaug/mdmaug"
WorkingDirectory=/opt/mdmaug
ExecStart=/usr/local/bin/uwsgi -s /tmp/mdmaug.sock --manage-script-name --ini /opt/mdmaug/production.ini
User=mdmaug
Group=www-data
Restart=on-failure
RestartSec=10s
[Install]
WantedBy=multi-user.target
[uwsgi]
module = mdmaug.__main__
callable = app
master = true
processes = 21
socket = /tmp/mdmaug.sock
chmod-socket = 660
vacuum = true
die-on-term = true
UWSGI_SCHEME = https
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment