Commit 4c9fd222 authored by Edvard Rejthar's avatar Edvard Rejthar

#29 autoprune, prune, creation spree, frontend threads ...

Signed-off-by: Edvard Rejthar's avatarEdvard Rejthar <edvard.rejthar@nic.cz>
parent 611f231d
# Mozilla User Preferences
// nepamatovat si historii (Preferences / Privacy / Firefox will use custom settings for history / Clear history when closes / Setting / All)
// Template for Firefox preferences.
/* Do not edit this file.
*
* If you make changes to this file while the application is running,
......@@ -39,7 +41,7 @@ user_pref("browser.safebrowsing.enabled", false);
user_pref("browser.safebrowsing.malware.enabled", false);
user_pref("browser.search.countryCode", "CZ");
user_pref("browser.search.region", "CZ");
user_pref("browser.selfsupport.url", "");
user_pref("browser.selfsupport.url", ""); // tato moznost normalne v about:config neni, ale omezuje to nejake zbytecnou telemetrii, viz Mozilla Heartbeat
user_pref("browser.sessionstore.max_resumed_crashes", -1);
user_pref("browser.sessionstore.resume_from_crash", false);
user_pref("browser.slowStartup.averageTime", 30464);
......@@ -63,7 +65,7 @@ user_pref("distribution.canonical.bookmarksProcessed", true);
user_pref("e10s.rollout.cohort", "test");
user_pref("e10s.rollout.cohortSample", "0.639855");
user_pref("experiments.activeExperiment", false);
user_pref("extensions.autoDisableScopes", 0);
user_pref("extensions.autoDisableScopes", 0); // moznost instalovat ze vsech umisteni
user_pref("extensions.blocklist.pingCountVersion", 0);
user_pref("extensions.bootstrappedAddons", "{\"firefox@getpocket.com\":{\"version\":\"1.0.5\",\"type\":\"extension\",\"descriptor\":\"/usr/lib/firefox/browser/features/firefox@getpocket.com.xpi\",\"multiprocessCompatible\":true,\"runInSafeMode\":true,\"dependencies\":[],\"hasEmbeddedWebExtension\":false},\"e10srollout@mozilla.org\":{\"version\":\"1.7\",\"type\":\"extension\",\"descriptor\":\"/usr/lib/firefox/browser/features/e10srollout@mozilla.org.xpi\",\"multiprocessCompatible\":true,\"runInSafeMode\":true,\"dependencies\":[],\"hasEmbeddedWebExtension\":false},\"webcompat@mozilla.org\":{\"version\":\"1.0\",\"type\":\"extension\",\"descriptor\":\"/usr/lib/firefox/browser/features/webcompat@mozilla.org.xpi\",\"multiprocessCompatible\":true,\"runInSafeMode\":true,\"dependencies\":[],\"hasEmbeddedWebExtension\":false},\"aushelper@mozilla.org\":{\"version\":\"1.0\",\"type\":\"extension\",\"descriptor\":\"/usr/lib/firefox/browser/features/aushelper@mozilla.org.xpi\",\"multiprocessCompatible\":true,\"runInSafeMode\":true,\"dependencies\":[],\"hasEmbeddedWebExtension\":false},\"langpack-cs@firefox.mozilla.org\":{\"version\":\"51.0.1\",\"type\":\"locale\",\"descriptor\":\"/usr/lib/firefox/browser/extensions/langpack-cs@firefox.mozilla.org.xpi\",\"runInSafeMode\":false,\"dependencies\":[],\"hasEmbeddedWebExtension\":false},\"langpack-en-GB@firefox.mozilla.org\":{\"version\":\"51.0.1\",\"type\":\"locale\",\"descriptor\":\"/usr/lib/firefox/browser/extensions/langpack-en-GB@firefox.mozilla.org.xpi\",\"runInSafeMode\":false,\"dependencies\":[],\"hasEmbeddedWebExtension\":false},\"langpack-en-ZA@firefox.mozilla.org\":{\"version\":\"51.0.1\",\"type\":\"locale\",\"descriptor\":\"/usr/lib/firefox/browser/extensions/langpack-en-ZA@firefox.mozilla.org.xpi\",\"runInSafeMode\":false,\"dependencies\":[],\"hasEmbeddedWebExtension\":false}}");
user_pref("extensions.databaseSchema", 19);
......@@ -78,7 +80,7 @@ user_pref("extensions.lastPlatformVersion", "51.0.1");
user_pref("extensions.pendingOperations", false);
user_pref("extensions.systemAddonSet", "{\"schema\":1,\"addons\":{}}");
user_pref("extensions.xpiState", "{\"app-system-defaults\":{\"firefox@getpocket.com\":{\"d\":\"/usr/lib/firefox/browser/features/firefox@getpocket.com.xpi\",\"e\":true,\"v\":\"1.0.5\",\"st\":1485985013000},\"e10srollout@mozilla.org\":{\"d\":\"/usr/lib/firefox/browser/features/e10srollout@mozilla.org.xpi\",\"e\":true,\"v\":\"1.7\",\"st\":1485985012000},\"webcompat@mozilla.org\":{\"d\":\"/usr/lib/firefox/browser/features/webcompat@mozilla.org.xpi\",\"e\":true,\"v\":\"1.0\",\"st\":1485985013000},\"aushelper@mozilla.org\":{\"d\":\"/usr/lib/firefox/browser/features/aushelper@mozilla.org.xpi\",\"e\":true,\"v\":\"1.0\",\"st\":1485985012000}},\"app-system-user\":{\"cinetickets@jetpack\":{\"d\":\"/home/edvard/.mozilla/extensions/{ec8030f7-c20a-464f-9b0e-13a3a9e97384}/cinetickets@jetpack\",\"e\":false,\"v\":\"0.1\",\"st\":1468510464000,\"mt\":1457095010000},\"mdmaug@jetpack\":{\"d\":\"/home/edvard/.mozilla/extensions/{ec8030f7-c20a-464f-9b0e-13a3a9e97384}/mdmaug@jetpack\",\"e\":false,\"v\":\"0.1\",\"st\":1457095010000,\"mt\":1457095010000}},\"app-global\":{\"{972ce4c6-7e08-4474-a285-3208198ce6fd}\":{\"d\":\"/usr/lib/firefox/browser/extensions/{972ce4c6-7e08-4474-a285-3208198ce6fd}.xpi\",\"e\":true,\"v\":\"51.0.1\",\"st\":1485985012000},\"langpack-cs@firefox.mozilla.org\":{\"d\":\"/usr/lib/firefox/browser/extensions/langpack-cs@firefox.mozilla.org.xpi\",\"e\":true,\"v\":\"51.0.1\",\"st\":1485988894000},\"langpack-en-GB@firefox.mozilla.org\":{\"d\":\"/usr/lib/firefox/browser/extensions/langpack-en-GB@firefox.mozilla.org.xpi\",\"e\":true,\"v\":\"51.0.1\",\"st\":1485988909000},\"langpack-en-ZA@firefox.mozilla.org\":{\"d\":\"/usr/lib/firefox/browser/extensions/langpack-en-ZA@firefox.mozilla.org.xpi\",\"e\":true,\"v\":\"51.0.1\",\"st\":1485988909000}},\"app-system-share\":{\"ubufox@ubuntu.com\":{\"d\":\"/usr/share/mozilla/extensions/{ec8030f7-c20a-464f-9b0e-13a3a9e97384}/ubufox@ubuntu.com.xpi\",\"e\":true,\"v\":\"3.2\",\"st\":1442597402000}}}");
user_pref("security.insecure_field_warning.contextual.enabled", false); // wrong https certificate
user_pref("security.insecure_field_warning.contextual.enabled", false); // ignore wrong https certificate
user_pref("media.gmp-gmpopenh264.abi", "x86_64-gcc3");
user_pref("media.gmp-gmpopenh264.lastUpdate", 1486578560);
user_pref("media.gmp-gmpopenh264.version", "1.6");
......@@ -86,7 +88,7 @@ user_pref("media.gmp-manager.buildID", "20170201180315");
user_pref("media.gmp-manager.lastCheck", 1486578560);
user_pref("media.gmp.storage.version.observed", 1);
user_pref("network.cookie.prefsMigrated", true);
user_pref("network.http.accept-encoding", "");
user_pref("network.http.accept-encoding", ""); // ukladame streamy, ale neumim je rozzipovat
user_pref("network.http.accept-encoding.secure", "");
user_pref("network.predictor.cleaned-up", true);
user_pref("pdfjs.migrationVersion", 2);
......@@ -106,7 +108,7 @@ user_pref("privacy.sanitize.sanitizeInProgress", "[\"cache\",\"cookies\",\"offli
user_pref("privacy.sanitize.sanitizeOnShutdown", true);
user_pref("signon.importedFromSqlite", true);
user_pref("toolkit.startup.last_success", 1486578497);
user_pref("toolkit.startup.max_resumed_crashes", -1);
user_pref("toolkit.startup.max_resumed_crashes", -1); // protoze i kdyz prohlizec nekdy killnu, nesmi me pri spusteni otravovat gui popupem
user_pref("toolkit.telemetry.cachedClientID", "8b5a3bbc-dcb3-44f6-bd86-f0fa1fd2fa15");
user_pref("toolkit.telemetry.previousBuildID", "20170201180315");
user_pref("toolkit.telemetry.reportingpolicy.firstRun", false);
......
......@@ -6,31 +6,37 @@ DESTINATION=/opt/mdmaug
PROFILE_COUNT=21
# sometimes, there is no repository with pip on the machine, trying to add some sources
echo " ***** Installing dependencies..."
apt install software-properties-common
add-apt-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main universe restricted multiverse"
apt update
apt install firefox python3 mariadb-server xvfb
pip3 install xvfbwrapper pymysql peewee flask wtforms jsonpickle bs4 pygments pillow requests humanize filelock
apt install firefox python3 mariadb-server xvfb python3-flask
pip3 install xvfbwrapper pymysql peewee flask wtforms jsonpickle bs4 pygments pillow requests humanize filelock ipdb
# current dir
# current_threads dir
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
cd $DIR
# mariadb setup
echo " ***** Setting up database..."
systemctl start mariadb.service
mysql -u root < mdmaug-installation.sql # populate db
mysql -uroot -e "CREATE USER \"mdmaug\"@\"localhost\" IDENTIFIED BY \"fidFDSs676\"; GRANT ALL PRIVILEGES ON mdmaug. * TO \"mdmaug\"@\"localhost\";" # new user
# adding user the server will be run under
echo " ***** Creating mdmaug user..."
useradd -m -d $DESTINATION mdmaug
# copy all important files
echo " ***** Copying files into" $DESTINATION
mkdir $DESTINATION
cp -r mdmaug $DESTINATION
cp -r .mozilla $DESTINATION
cp *.md $DESTINATION
cd $DESTINATION
# copy firefox profiles (about:config is stored at pref.js file)
echo " ***** Creating firefox profiles..."
for(( i=1; i<=$((PROFILE_COUNT-1)); i++ ))
do
DEST=$DESTINATION/.mozilla/firefox/$i/
......@@ -42,7 +48,9 @@ do
done
# adopt all files to the new user
echo " ***** Setting up mdmaug user..."
chown mdmaug:mdmaug -R $DESTINATION
chmod g+w -R $DESTINATION
# make the new user able to use the display (needed on Ubuntu 17.10 at least; after every restart :( not sure what to do)
xhost +local:mdmaug
......
......@@ -9,8 +9,10 @@ Scans a website for a sign of a parasite hosts or commands.
3. You should generate a certificate to `mdmaug/cert-mdmaug.pem`, at least a self-signed one (non recommended): `openssl req -x509 -newkey rsa:4096 -nodes -out cert-mdmaug.pem -keyout key-mdmaug.pem`
4. Perform installation: ```/tmp/mdmaug/INSTALL```
5. Everything should be located in `/opt/mdmaug`.
6. Launch under newly created `mdmaug` user: `su - mdmaug -c 'python3 -m mdmaug'`
7. Connect in the browser at: https://localhost:8000
6. For testing purposes, launch it under newly created `mdmaug` user: `su - mdmaug -c 'python3 -m mdmaug'`
7. Connect in the browser at: https://localhost:5000
8. Try analysing `/static/demopage.html` on local server
9. For deployment, configure nginx properly to be used with flask
### Notes
......@@ -18,22 +20,24 @@ Scans a website for a sign of a parasite hosts or commands.
* Certificate error: Make sure that the browser doesn't blockt the MDM-Augmented server if used from MDM.
* If you want other count of profiles than 21, change INSTALL + config.py + profiles.ini
* You may put ```03 1,7,13,19 * * * ~/mdmaug-launch``` in ```crontab -e``` of user mdmaug.
* We are using Python3.6+
* We are using Python3.6+, Firefox 62.0
## Tips
* You may use /static/demopage.html as a testing page.
### Troubleshooting
* Analysis stopped working after restart? Maybe you need to launch `xhost +local:mdmaug` command from a common user shell after every system restart :( I'm not sure.
## What is done to Firefox profiles?
We want no block nor safebrowsing warning. If you created the profiles manually, you'd use ```firefox -P```, the profiles names being: 0,1...
For about:config changes, see pref.js. IE:
* toolkit.startup.max_resumed_crashes = -1 (protoze i kdyz prohlizec nekdy killnu, nesmi me pri spusteni otravovat gui popupem)
* network.http.accept-encoding = "" # ukladame streamy, ale neumim je rozzipovat
* extensions.autoDisableScopes = "0" # moznost instalovat ze vsech umisteni
* browser.selfsupport.url = "" # tato moznost normalne v about:config neni, ale omezuje to nejake zbytecnou telemetrii, viz Mozilla Heartbeat
* # nepamatovat si historii (Preferences / Privacy / Firefox will use custom settings for history / Clear history when closes / Setting / All)
* # nejsem si jist, nakolik to funguje, zrejme dost
* ...
#### Debugging session
I'm launching it like this:
`su - mdmaug -c 'export FLASK_APP=mdmaug.__main__:app && export PYTHONPATH=/opt/mdmaug/mdmaug && ./local/bin/flask run'`
#### Wanna see what Firefox is really doing?
* Shell into mdmaug. (`ssh -X ...`, `su - mdmaug`)
* Try launching Firefox manually by `firefox -P`, the profiles names being: 0,1...
* If no Firefox window appears try
* `xhost +local:mdmaug` if you're on the same machine
* `root@mdmaugmachine$xauth list` on remote root and `mdmaug$xauth add ...` display cookie
* When Firefox window appear, run MDMaug with `export FIREFOX_DEBUG=1`. Now, instead of virtual display your monitor should be used.
\ No newline at end of file
......@@ -33,7 +33,7 @@ browser.runtime.onMessage.addListener(function (message) {
/**
* traffic listener
*/
// let trafficBlacklist = ["http://www.google.com/adsense/", "https://fbstatic-a.akamaihd.net/rsrc.php", "http://clients1.google.com/ocsp", "https://safebrowsing-cache.google.com/safebrowsing/", "https://safebrowsing.google.com/safebrowsing/", "https://tiles.services.mozilla.com/", "http://localhost/redirect/"];
let trafficBlacklist = ["http://www.google.com/adsense/", "https://fbstatic-a.akamaihd.net/rsrc.php", "http://clients1.google.com/ocsp", "https://safebrowsing-cache.google.com/safebrowsing/", "https://safebrowsing.google.com/safebrowsing/", "https://tiles.services.mozilla.com/", "http://localhost/redirect/"];
browser.webRequest.onBeforeRequest.addListener(
function (details) {
// log resources that we want
......
......@@ -35,7 +35,7 @@ CREATE TABLE `encounter` (
`host` varchar(255) COLLATE utf8_bin NOT NULL,
`ip` varchar(45) COLLATE utf8_bin DEFAULT NULL,
`port` int(11) NOT NULL,
`url` varchar(255) COLLATE utf8_bin NOT NULL,
`origin` varchar(255) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`id`),
KEY `status` (`status`),
CONSTRAINT `encounter_ibfk_1` FOREIGN KEY (`status`) REFERENCES `status` (`id`) ON UPDATE CASCADE
......
......@@ -2,8 +2,10 @@
import datetime
import logging
import os
#logging.basicConfig(level=logging.DEBUG, format="%(message)s") # init before flask so that logging is shown in the Terminal
# logging.basicConfig(level=logging.DEBUG, format="%(message)s") # init before flask so that logging is shown in the Terminal
import socket
import threading
from http.server import HTTPServer, SimpleHTTPRequestHandler
from flask import Flask
from xvfbwrapper import Xvfb
......@@ -19,37 +21,37 @@ logger = logging.getLogger("mdmaug")
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())
# server setup
# kill any running browsers from last time
Api.reset()
# address = '0.0.0.0'
# init flask and jinja2 template filters
app = Flask(__name__, static_url_path="/static")
app.secret_key = b'as8!r"afERaa5'
# app.config["preferences"] = {
# "safebrowsing": True,
# "pdns": True,
# "geoip": False
# }
@app.template_filter('format_datetime')
def format_datetime(time, format='%y%m%d%H%M%S', target='%d.%m.%Y %H:%M'):
return datetime.datetime.strptime(time, format).strftime(target)
#app.jinja_env.filters['datetime'] = format_datetime
"""
httpd = HTTPServer((address, Config.APP_PORT), Server)
httpd.socket = ssl.wrap_socket(httpd.socket,
server_side=True,
# together private + cert, http://stackoverflow.com/questions/19705785/python-3-https-webserver
certfile=Config.DIR + 'cert-mdmaug.pem',
ssl_version=ssl.PROTOCOL_TLSv1)
"""
display = Xvfb()
display.start()
# check dumb server is running
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
result = sock.connect_ex(('localhost', 80)) # unfortunately, I wasn't able to listen on localhost:80 here, flask's interfering
if result is not 0:
print("The browser extension needs the address http://localhost/redirect exists."
" It seems you don't have any server running on localhost:80. \n\n"
"We've done tests with nc and simple python scripts serving as servers but none of them could handle bigger workload."
"\n\n Please run nginx.")
#Please run `sudo bin/nc-dumb.sh` to launch a server."
exit()
# run display and flask server
run_display = False if os.environ.get("FIREFOX_DEBUG", 0) == "1" else True
if run_display:
display = Xvfb()
display.start()
try:
# print(f'Listening at https://{address}:{Config.APP_PORT}')
......@@ -57,8 +59,13 @@ try:
app.register_blueprint(server)
if __name__ == "__main__":
app.run(host=Config.APP_IP, port=Config.APP_PORT, ssl_context=(Config.DIR + 'cert-mdmaug.pem', Config.DIR + 'key-mdmaug.pem'), threaded=True)
# Xrun a dump localhost:80 server so that browser extension works
# Xalright, any service running on localhost:80 will do
app.run(host=Config.APP_IP, port=Config.APP_PORT,
ssl_context=(Config.DIR + 'cert-mdmaug.pem', Config.DIR + 'key-mdmaug.pem'), threaded=True)
# for _ in range(Config.profile_count):
# threading.Thread(target=httpd.serve_forever).start()
except (KeyboardInterrupt, SystemExit):
display.stop()
if run_display:
display.stop()
#!/usr/bin/env python3
Works well but process requests one by one:
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
import threading
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
message = threading.currentThread().getName()
self.wfile.write(message)
self.wfile.write('\n')
return
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
"""Handle requests in a separate thread."""
if __name__ == '__main__':
server = ThreadedHTTPServer(('localhost', 80), Handler)
print('Starting server, use <Ctrl-C> to stop')
server.serve_forever()
exit()
from http.server import HTTPServer, SimpleHTTPRequestHandler
class Server(SimpleHTTPRequestHandler):
def do_GET(self):
self.send_response(200)
self.end_headers()
self.wfile.write("MDMaug: dumb server used as a landing page for the browser extension. " \
"Because this needs to be loaded before any web page but needs a web page to be loaded.".encode("UTF-8"))
httpd = HTTPServer(('127.0.0.1', 80), Server)
# httpd.socket = ssl.wrap_socket(httpd.socket,
# server_side=True,
# certfile= Config.DIR + 'python.pem', # together private + cert, http://stackoverflow.com/questions/19705785/python-3-https-webserver
# ssl_version=ssl.PROTOCOL_TLSv1)
httpd.serve_forever()
Couldnt handle much requests at once
##!/usr/bin/env bash
#while true; do { echo -e "HTTP/1.1 200 OK\r\n$(date)\r\n\r\nMDMaug: dumb server used as a landing page for the browser extension. Because this needs to be loaded before any web page but needs a web page to be loaded." | nc -vl 80; } done
\ No newline at end of file
......@@ -11,6 +11,8 @@ class Pref:
safebrowsing = False # True
pdns = False # True
geoip = False
autoprune = True
creation_spree = False
@classmethod
def val2html(cls, param):
......
import json
import logging
import os
import subprocess
from builtins import FileNotFoundError
from glob import escape
from os import walk
from peewee import IntegrityError
from mdmaug.lib.model.crawl import Crawl
from .scan_controller import ScanController
from ..config import Config
from ..domains import domain2dir
from ..model.dbp import Encounter, Whitelist
from ..parser.traffic_log_parser import TrafficLogParser
from ...templates.crawl_view import CrawlView
......@@ -34,37 +31,20 @@ class Api:
days = map_[request["analyze"]]
else:
days = int(request["analyze"])
crawl = ScanController().launch(request["page"], days)
elif "inspect" in request:
# XXX → migrate to dbp
output = []
for row in Config.db.execute_sql("SELECT url from encounter where host = %s", request["inspect"]):
output.append(row[0])
return "<br>".join(output)
crawl = ScanController().launch(request["page"], days, request.get("autoprune") in ["y", "1", True],
request.get("creation_spree") in ["y", "1", True])
elif "aggregate" in request:
crawl = self.aggregate(request)
elif "decide" in request: # XX deprecated?
return self.get_undecided()
elif "scan" in request:
if "date" not in request:
request["date"] = ""
crawl = ScanController().get_scan(escape(request["scan"]), scan=escape(request["date"]))
elif "prune" in request:
return ScanController.prune()
elif "nicify" in request:
return TrafficLogParser.getStylesheet() + TrafficLogParser.nicify_file(request["page"])
elif "vote" in request: # /api/vote/block/example.org/10.0.0.1
logger.debug("vote cmd")
return Encounter.vote(request["vote"], request["page"])
elif "scan" in request:
if "date" not in request:
request["date"] = ""
crawl = ScanController().get_scan(escape(request["scan"]), date=escape(request["date"]))
elif "whitelist" in request: # XXX not implemented yet
"""url = path.split("/", 3)
if len(url) > 3:
self._setWebsite(url[2]) # osetrit self.website, ze je URL, a nikoli shell
logger.debug("XXX nejsem si jist, zda url je spravne na url[2]") # XXX
logger.debug(url) # XXX
quit() # XXX
logger.debug(self.website)
logger.debug(self.origin_domain)
return self.whitelist()"""
return "Implement first if needed."
elif "reset" in request:
self.reset()
return "reset"
......@@ -76,8 +56,6 @@ class Api:
return crawl # containing an error message
elif request["api"] == "json":
return CrawlView.output_json(crawl)
#elif request["api"] == "mdmaug":
# return CrawlView.output_mdmaug(crawl)
else:
return CrawlView.output_html(crawl)
......@@ -86,30 +64,21 @@ class Api:
date_from = int(request["from"])
date_to = int(request["to"])
crawl = Crawl()
cwd = os.getcwd()
os.chdir(Config.CACHE_DIR)
scan_count = set()
domain_count = set()
for domain, scans, _ in walklevel(".", 1):
try:
for scan in scans:
if date_from < int(scan) < date_to:
print("importing", domain)
try:
scan_count.add(scan)
domain_count.add(domain)
crawl += Crawl.load_from_file("/".join([domain, scan, Config.CRAWL_FILE]))
except FileNotFoundError:
logger.warning("Wrong analysis stored at %s/%s", domain, scan)
pass
# print(roots, dirs)
except ValueError:
pass
for domain, scan in ScanController.get_all_scans():
if date_from < int(scan) < date_to:
try:
scan_count.add("/".join([domain, scan]))
domain_count.add(domain)
crawl += Crawl.load_from_scan(domain, scan)
except FileNotFoundError:
logger.warning("Wrong analysis stored at %s/%s", domain, scan)
pass
crawl.title = f"Merged {len(scan_count)} scans from {len(domain_count)} domains"
if not crawl:
crawl = "No scan between these dates."
os.chdir(cwd)
return crawl
@staticmethod
......@@ -126,7 +95,7 @@ class Api:
# self._logger.debug(Db.cur.execute("""REPLACE INTO whitelist set domain = %s""", (self.origin_domain, )))
# Db.connection.commit()
# Db.cur.close()
return # not yet implemented
return # not yet implemented
try:
Whitelist.insert(domain=self.origin_domain).execute()
except IntegrityError:
......@@ -137,13 +106,22 @@ class Api:
logger.debug("XXX not implemented yet - table of undecideds domain since last export")
pass
def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
\ No newline at end of file
# elif "inspect" in request:
# # XXX → migrate to dbp
# output = []
# for row in Config.db.execute_sql("SELECT url from encounter where host = %s", request["inspect"]):
# output.append(row[0])
# return "<br>".join(output)
# elif "decide" in request: # XX deprecated?
# return self.get_undecided()
# elif "whitelist" in request: # XXX not implemented yet
# """url = path.split("/", 3)
# if len(url) > 3:
# self._setWebsite(url[2]) # osetrit self.website, ze je URL, a nikoli shell
# logger.debug("XXX nejsem si jist, zda url je spravne na url[2]") # XXX
# logger.debug(url) # XXX
# quit() # XXX
# logger.debug(self.website)
# logger.debug(self.origin_domain)
# return self.whitelist()"""
# return "Implement first if needed."
......@@ -2,15 +2,17 @@ import datetime
import json
import logging
import os
import shutil
import subprocess
import time
import traceback
from glob import glob
from itertools import combinations_with_replacement
from json import JSONDecodeError
from random import randint
from flask import escape
from filelock import FileLock
from flask import escape
from ..config import Config
from ..domains import domain2dir, assure_url, url2domain
......@@ -35,61 +37,66 @@ class ScanController:
self.lock = None
@staticmethod
def get_scan(domain, date=""):
def get_scan(domain, scan: str = ""):
"""
:param domain: hostname
:param date: time of scan, if not specified, we return the last
:param scan: time of scan, if not specified, we return the last
:return: Crawl object
"""
scans = ScanController().get_domain_snapdirs(domain, full_dirs=True)
scans = ScanController().get_domain_scans(domain)
if not scans:
return False
else:
if date:
if date in scans:
scan = date
else:
if scan:
if scan not in scans:
return "Scan wasn't performed at this time."
else:
scan = scans[0]
return Crawl.load_from_file(scan+"/"+Config.CRAWL_FILE)
return Crawl.load_from_scan(domain, scan)
@staticmethod
def get_domain_snapdirs(domain, full_dirs=True):
def get_domain_scans(domain):
d = Config.CACHE_DIR + domain + "/"
if os.path.isdir(d):
return [str(d + subdir) if full_dirs else str(subdir) for subdir in os.listdir(d) # all possible snapshot directories
if os.path.isdir(str(d + subdir)) and os.path.isfile(d + subdir + "/" + Config.CRAWL_FILE)]
# Xfor i in os.scandir(Config.CACHE_DIR + ):
# all possible snapshot directories
return [scan for scan in os.listdir(d) if os.path.isdir(d + scan)]
# X and os.path.isfile(d + scan + "/" + Config.CRAWL_FILE)
def launch(self, url, cached=None):
def launch(self, url, cached=None, autoprune=False, creation_spree=False):
"""
:param creation_spree: if true and using cache, we'll skip an existing analysis. (Good when batch-analysing a large list.)
:type autoprune: bool, if true and using cache, we'd delete an old analysis that is not complete rather than returning it.
:param url: scanned url
:type cached: True = Any cached version, int = cached version X days old. If None or not found, site will be reanalysed
"""
if cached:
snapdirs = self.get_domain_snapdirs(domain2dir(url))
if snapdirs:
# get the most recent snapdir and check if it's not too old
cache_dir = max(snapdirs, key=os.path.getmtime) + "/"
if cached is True or os.path.getmtime(cache_dir) > time.time() - (3600 * 24 * cached):
try:
logger.debug(f"Returning a previous crawl from: {cache_dir + Config.CRAWL_FILE}")
crawl = Crawl.load_from_file(cache_dir + Config.CRAWL_FILE)
return crawl
except ValueError:
pass
logger.debug("({-1}) Convenient cached analysis not found")
u = assure_url(url)
if not u:
url = assure_url(url)
if not url:
return f'Invalid URL {escape(url)}'
else:
self.url = u
self.url = url
if cached:
domain = domain2dir(url)
scans = self.get_domain_scans(domain)
if scans:
# get the most recent snapdir and check if it's not too old
prep = Config.CACHE_DIR + domain + "/"
scan = max(scans, key=lambda s: os.path.getmtime(prep + s))
if not autoprune or not self.clean_scan(domain, scan):
if cached is True or os.path.getmtime(prep + scan) > time.time() - (3600 * 24 * cached):
if creation_spree:
return f"Scan for {domain} already exists."
try:
logger.debug(f"Returning a previous crawl from: {domain}/{scan}")
crawl = Crawl.load_from_scan(domain, scan)
return crawl
except ValueError:
pass
logger.debug(f"({-1}) Convenient cached analysis not found for url {url}")
# perform fresh analysis
self.lock = FileLock(Config.config_file + ".lock")
if self.queue(url): # /api/analyze/web - queue current analysis
if self.queue(url): # /api/analyze/web - queue current_threads analysis
print(f"({self.profile}) start crawl")
# noinspection PyBroadException
try:
......@@ -100,7 +107,7 @@ class ScanController:
# XX Pokud je potiz, ze JS zabiji FF, mozno experimentovat s ulimit -Sv 500000;
return f"PROFILE EXCEPTION ({self.profile}) {e} See logs, i.e. mdmaug/nohup.out. "
crawl.save_to_file(crawl.cache_dir + Config.CRAWL_FILE) # save search results
crawl.save_to_file() # save search results
return crawl
else:
......@@ -239,3 +246,58 @@ class ScanController:
time.sleep(randint(5, 10))
else:
return False
@classmethod
def prune(cls):
scan_count = 0
domain_count = set()
for domain, scan in cls.get_all_scans():
if cls.clean_scan(domain, scan):
scan_count += 1
domain_count.add(domain)
return f"Pruned {scan_count} scans at {len(domain_count)} domains."
@staticmethod
def get_all_scans():
""" Generates all domains/scans.
Notice: temporarily changes CWD.
(I don't think tha
t might be a problem and it's easy to implement.)
:rtype: (str, str)
"""
cwd = os.getcwd()
os.chdir(Config.CACHE_DIR)
for domain, scans, _ in walklevel(".", 1):
if domain in [".", "./_tmp"]: # ignore root dir and browser _tmp dir
continue
for scan in scans:
yield domain, scan
os.chdir(cwd)
@classmethod
def clean_scan(cls, domain, scan: str):
""" If there is only 1 file in the directory, deletes files of a scan.
If that was the only scan, domain directory is deleted as well. (DB stays intact.)
Because if a scan fails, there is only analysis.json or nothing in the dir.
"""
scan_path = os.path.join(Config.CACHE_DIR, domain, scan)
if len(os.listdir(scan_path)) <= 1:
shutil.rmtree(scan_path)
domain_path = os.path.join(Config.CACHE_DIR, domain)
if len(os.listdir(domain_path)) == 0:
shutil.rmtree(domain_path)
return True
return False
def walklevel(some_dir, level=1):
some_dir = some_dir.rstrip(os.path.sep)
assert os.path.isdir(some_dir)
num_sep = some_dir.count(os.path.sep)
for root, dirs, files in os.walk(some_dir):
yield root, dirs, files
num_sep_this = root.count(os.path.sep)
if num_sep + level <= num_sep_this:
del dirs[:]
import datetime
import json
import logging
import re
from html import escape
from flask import Blueprint, send_from_directory, render_template, request, make_response
from wtforms import Form
from wtforms import Form, Label
from wtforms.fields import BooleanField
from ..config import Config, Pref
from ..controller.api import Api
from ..model.dbp import Export
......@@ -31,8 +29,6 @@ def update_preferences():
# current_app.config["preferences"][k] = v
@app.route('/test')
def index():
resp = make_response("fds")
......@@ -41,23 +37,31 @@ def index():
return resp
def _generate_boolean(name, label=None, title=""):
if not label:
label = name.title()
return BooleanField(Label(name, label)(title=title), default=getattr(Pref, name), render_kw={"value": Pref.val2html(name)})
@app.route('/')
def homepage():
update_preferences()
class OptionsForm(Form):
# pref = defaultdict(bool, current_app.config["preferences"])
safebrowsing = BooleanField('Google Safebrowsing', default=Pref.safebrowsing,
render_kw={"value": Pref.val2html("safebrowsing")})
pdns = BooleanField('PDNS', default=Pref.pdns, render_kw={"value": Pref.val2html("pdns")})
geoip = BooleanField('geoip', default=Pref.geoip, render_kw={"value": Pref.val2html("geoip")})
safebrowsing = _generate_boolean("safebrowsing", 'Google Safebrowsing',
"Call Safebrowsing service for every object. (Slow)")
pdns = _generate_boolean("pdns", "PDNS")
geoip = _generate_boolean("geoip", "Geoip")
autoprune = _generate_boolean("autoprune", "Autoprune", "Do not return cached stubs for the scans the analysis failed")
creation_spree = _generate_boolean("creation_spree", "Creation spree",
"If the analysis exist, do not download. (Quicker for batch analyzing.)")