Commit ea4eaee1 authored by Edvard Rejthar's avatar Edvard Rejthar

-||-

parent 3f70ca62
# mdmaug@jetpack.xpi je treba nakopirovat do ~/.mozilla/firefox/[1-20]/extensions/mdmaug@jetpack.xpi. To same pro prefs.js
# Pouzita funkce copyfile stary xpi-soubor nahradi.
import os
from shutil import copyfile
#xpiSourceFilepath = "/home/edvard/edvard/skola/diplomka/mdmaug-addon/mdmaug.xpi" # zdrojova cesta k souboru vcetne jmena
#xpiFilename = "mdmaug@jetpack.xpi" # jak ma vypadat jmeno souboru
#profilesDir = "/mnt/mdmaug/home/mdmaug/.mozilla/firefox/" # cesta k rozcestniku profilu v .mozilla
sourceFilepath = "/mnt/mdmaug/home/mdmaug/.mozilla/firefox/0/prefs.js" # zdrojova cesta k souboru vcetne jmena
filenameNew = "prefs.js" # jak ma vypadat jmeno souboru
profilesDir = "/mnt/mdmaug/home/mdmaug/.mozilla/firefox/" # cesta k rozcestniku profilu v .mozilla
#quit("Edit before running")
for profileNum in range(1,21):
dir = profilesDir + str(profileNum) + "/"
if not os.path.exists(dir): os.makedirs(dir)
#dir += "extensions/"
#if not os.path.exists(dir): os.makedirs(dir)
print("From {} to {}".format(sourceFilepath, dir + filenameNew))
copyfile(sourceFilepath, dir + filenameNew)
\ No newline at end of file
su - mdmaug -c 'cd /home/mdmaug/mdmaug/ ; python3 mdmaug.py'
killall python3 #pri Ctrl+C v prikazu su se uzavre jen terminal, ale ne uz python skript. Takhle to zas zabije veskery Python, ale sandboxovy-zavirovany uzivatel mdmaug stejne ma byt jen na spousteni tohohle skriptu.
\ No newline at end of file
__author__ = "edvard"
__date__ = "$Apr 27, 2015 5:40:11 AM$"
\ No newline at end of file
from yaml import load, dump
try:
from yaml import CLoader as Loader, CDumper as Dumper
except ImportError:
from yaml import Loader, Dumper
from collections import defaultdict
from lib.config import Config
class Crawl(defaultdict):
""" Objekt Crawl udružuje výsledky vyhledávání """
def __str__(self):
r = "Výsledky vyhledávání - navštíveno {} domén".format(len(self))
for key in self.keys():
r += "\n* " + key + " " + str(self[key])
return r
def saveToFile(self,filename):
with open(filename, "w") as f:
f.write(dump(self.__getstate__(), Dumper=Dumper))
def loadFromFile(filename):
with open(filename, 'r') as f:
return Crawl(state = load(f.read(), Loader=Loader))
def __init__(self, state = None, logDir = None, cacheDir = None):
""" State muze obsahovat vystup __getstate__() (serializace YAMLem) """
self.default_factory = _Domain
self.screenfile = None # HTML output XXX
if logDir:
self.logDir = logDir
if cacheDir:
self.cacheDir = cacheDir
if state:
self.__setstate__(state)
pass
def __getstate__(self):
state = self.__dict__.copy()
state["keys"] = [[x for x in (key, self[key].__getstate__())] for key in self.keys()]
return state
def __setstate__(self,state):
#print("fdsfsfds",self.__dict__)
for tup in state["keys"]:
key, val = tup
self[key].__setstate__(val)
del state["keys"]
self.__dict__ = state
class _Domain(defaultdict):
""" Navstivena domena behem crawlu """
def __str__(self):
r = "{} adres a {} url".format(len(self.addresses), len(self.urls))
for key in self.urls.keys():
r += "\n " + key + " " + str(self.urls[key])
for key in self.addresses.keys():
r += "\n " + key + " " + str(self.addresses[key])
if self.pdns:
r += "Informace z PDNS:\n"
for key in self.pdns:
r += key + " "
else:
r += "\n Žádné informace z PDNS."
return r
def __init__(self):
#self.vote = None
#self.urls = set()
self.urls = defaultdict(_Url)
self.addresses = defaultdict(_Address)
self.pdns = set()
def __getstate__(self):
state = self.__dict__.copy()
state["addresses"] = [[x for x in (key, self.addresses[key].__dict__)] for key in self.addresses]
state["urls"] = [[x for x in (key, self.urls[key].__dict__)] for key in self.urls]
return state
#return {'urls': self.urls, 'vote':self.vote, 'addresses':
# [[x for x in (key, self.addresses[key].__dict__)] for key in self.addresses]}
def __setstate__(self, state):
for tup in state["addresses"]:
key, val = tup
self.addresses[key].__dict__ = val
del state["addresses"]
for tup in state["urls"]:
key, val = tup
self.urls[key].__dict__ = val
del state["urls"]
for key in state:
self.__dict__[key] = state[key]
return
class _Url(set):
""" Unikatni navstivena url """
def __str__(self):
return str(self.__dict__)
#return "spyfile {} {}, vote {}".format(self.spyfile, self.city, self.vote)
def __init__(self):
self.spyfile = None # cesta k souboru se podezrelym kodem poustenym strankou
self.sourcefile = None # cesta k souboru se zdrojovym kodem
def setSourcefile(self, sourcefile):
if self.sourcefile != None:
raise RuntimeError('sourcefile', 'uz bylo definovano') # na tohle nejsme pripraveni - asi funkce v main.js pro jmeno souboru je spatna, protoze je jina od Domains.funkce
self.sourcefile = sourcefile
def setSpyfile(self, spyfile):
if self.spyfile != None:
raise RuntimeError('spyfile', 'uz bylo definovano') # na tohle nejsme pripraveni - asi funkce v main.js pro jmeno souboru je spatna, protoze je jina od Domains.funkce
self.spyfile = spyfile
class _Address(set):
""" Adresa navstivene domeny """
#def __getstate__(self):
# return {self.country,self.city, self.vote}
#def __setstate__(self):
# pass
def __str__(self):
return "K adrese je připojena geolokace {} {}".format(self.country, self.city) # , vote {} , self.vote
def __init__(self):
self.country = None
self.city = None
#self.vote = None
"""
Example (and yaml-serialization check):
c = Crawl()
c["seznam.cz"].urls["/aurl"].spyfile = "/soubor-spyfil"
c["seznam.cz"].urls["/aurl"].sourcefiles.add("/1.source")
c["seznam.cz"].urls["/aurl"].sourcefiles.add("/2.source")
c["seznam.cz"].addresses["8.8.8.8"]
c["seznam.cz"].addresses["9.5.2.1"]
c["seznam.cz"].addresses["8.8.8.8"].country = "preague"
#c["centrum.cz"].addresses["8.8.8.8"].vote = "yes"
#c.__getstate__()
e = Crawl()
e.__setstate__( c.__getstate__() )
#e = dill.loads(dill.dumps(c))
print(str(c) == str(e))
print(c)
output = dump(c.__getstate__(), Dumper=Dumper)
e = Crawl()
e.__setstate__(load(output, Loader=Loader))
print(e)
print(str(c) == str(e))
"""
\ No newline at end of file
from pygments.lexer import include
from pygments.lexers import JavascriptLexer
from pygments.token import *
class MdmaugJsLexer(JavascriptLexer):
name = 'MdmaugJs'
aliases = ['mdmaug']
tokens = JavascriptLexer.tokens
tokens["root"].insert(0,(r'(eval|document.write|window.open|open|window.location|location|document.location|document.cookie)\b', Generic.Error))
\ No newline at end of file
DEBUG:root:This message should go to the log file
INFO:root:So should this
WARNING:root:And this, too
WARNING:root:warning1
DEBUG:root:debug1
import datetime
import logging
import threading
from lib.config import Config
from lib.dbp import Export
from lib.dbp import Status
from lib.dbp import Turris
from lib.dbp import Whitelist
from lib.domains import Domains
class MetadataParser:
""" Obohatit vysledky vyhledavani (objekt crawl) o whois informace a informace z db"""
###
## Prida do objektu crawl aktualni informace z db, vote jednotlivych domen.
#def addFreshData(self, crawl):
# for domain in crawl.keys():
# crawl[domain].vote = "0"
def __init__(self, crawl, websiteDomain):
self.websiteDomain = websiteDomain
#kazda domena vyvola vlastni thread - trva nacist jeji geoIP
domains = list(crawl.keys())
domainThreadNumber = 0
while len(domains): # spusti maximalne 10 threadu doraz, jednou mi to totiz preteklo (kazda domena spusti jeste tolik threadu, kolik ma IP, ale tech byva jen par)
threads = []
count = 0
while len(domains):#for domain in domains.pop:
count += 1
if count > Config.MAX_WHOIS_DOMAIN_THREADS:
break
domain = domains.pop()
domainThreadNumber += 1
t = threading.Thread(target=self.addDomain, args=(crawl[domain], domain, domainThreadNumber))
threads.append(t)
t.start()
#konsolidovat informace o domenach
for thread in threads:
thread.join()
#sesumirovat informace o domene
def addDomain(self, crawlDomain, domainEncountered,domainThreadNumber):
domainNaked = Domains.url2domain(domainEncountered) #domainEncountered[domainEncountered.find("//") + 2:] # http://seznam.cz -> seznam.cz; //ajax.googleapis.com -> ajax.googleapis.com
logging.debug("domena: " + domainNaked)
if domainNaked in [self.websiteDomain, "127.0.0.1", "localhost"]: #domena samu sebe ignoruje. A kdybych nekdy zablokoval localhost, popravili by me.
logging.debug("skip itself")
else:
if Whitelist.matches(domainEncountered): #je domena ve whitelistu 2ndLD domen?
logging.debug("skip whitelisted")
else:
#domena neni whitelistovana
# nacist IP
#vote = None
threads = []
#queueIp = queue.Queue()
#print("DOMAINNAKED {}".format(domainNaked))
threadNumber = 0
for ip_frame in Domains.get_ips_for_host(domainNaked): # (10, 1, 6, '', ('2001:888:2000:d::a2', 80, 0, 0))
threadNumber += 1
ip = ip_frame[4][0]
t = threading.Thread(target=self.addAddress, args=(crawlDomain.addresses[ip], ip, domainEncountered, crawlDomain.pdns, threadNumber,domainThreadNumber)) #r += self.addAddress(ip_frame)
threads.append(t)
t.start()
#konsolidovat informace o IP v domenach
for thread in threads:
thread.join()
if len(threads) == 0: #domena zadne IP nema, je pozastavena
#presto chceme evil host zapsat - alespon s ip null
#vote =
self.addAddress(None, None, domainEncountered,crawlDomain.pdns,0.0)
#if vote == None:vote = ""
#crawlDomain.vote = vote
##
# Stat a mesto IPcka - nacitame threadovane, kazdy zvlast trva vterinu
# crawlDomainIp je objekt Address
# ip je klic, ktery z objektu Domain vytahne nas objekt Address.
#
def addAddress(self, crawlDomainIp, ip, remoteHost, pdns, threadNumber,domainThreadNumber):
#vote = "n/a"
#import pdb;pdb.set_trace()
#logging.debug("!!("+str(threadNumber)+","+str(domainThreadNumber)+") REMOTE " + " host: " + str(remoteHost))
with Config.lock:
#logging.debug("!("+str(threadNumber)+","+str(domainThreadNumber)+") REMOTE " + " host: " + str(remoteHost))
#updatovat domene timestamp setkani, aby byla v dalsim exportu #X Db.cur.
#Db.cur = Db.connection.cursor()
if ((Turris.update(timestamp=datetime.datetime.now()).where(Turris.remoteHost == remoteHost).execute() == 0 #domena v db jeste nema jmeno (mozna tam ma IP) Xdomena nema ip, v databazi je tedy 1x, vyhledavame dle nazvu domeny
and Turris.select().where(Turris.remoteHost == remoteHost).count() == 0) # za tri hodiny jsem nezjistil proc, ale update vraci NULU. Ovsem jakmile zavolam pdb, uz vraci korektne treba osmicku. Sigr jeden. Tak sem davam tenhle select, funguje lip. Divne je, ze v nasledujicim radku u IP update vraci korektni cislo. Prehodit jsem je nezkousel.
or Turris.update(timestamp=datetime.datetime.now()).where(Turris.ip == ip).execute() == 0): #domena v db nema IP (mozna tam je zaznam s totoznym jmenem a treba jinou IP) Xdomena ma ip, kazdy je v db zvlast, vyhledavame dle ip
#if remoteHost == "www.corradorossi.it":
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "SELECT " + str(Turris.select().count()))
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "SELECT " + str())
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + str(Turris.update(timestamp=datetime.datetime.now()).where(Turris.remoteHost == remoteHost).execute()))
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + str(Turris.update(timestamp=datetime.datetime.now()).where(Turris.ip == ip).execute()))
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "done")
#import pdb;pdb.set_trace()
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "PRIDAVAM")
#adresu jsme jeste nepotkali, neni v tabulce turris
#pridat domenu do turris
#logging.debug("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "INSERT " + remoteHost)
Turris.insert(ip=ip, port=80, url=self.websiteDomain, remoteHost=remoteHost).execute() # XXX port muze byt jiny nez 80
#Db.cur.execute("""INSERT INTO turris (ip, port, url, `evil host`) VALUES (%s,%s,%s,%s)""", (ip, 80, self.websiteDomain, remoteHost))
#Db.connection.commit()
#Db.cur.close()
else:
#print("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "NEPRIDAVAM")
pass
#logging.debug("("+str(threadNumber)+","+str(domainThreadNumber)+") " + "ADDRESS " + " ip: " + str(ip)) #+ str(vote)
if ip != None:
#crawlDomainIp.vote = vote
#kontaktovat externi geoIP sluzbu
crawlDomainIp.country, crawlDomainIp.city = Domains.ip2countryAndCity(ip)
# kontaktovat PDNS
pdns.update(Domains.ip2pdnsDomains(ip))
return None
else: #zadna ip neni k dispozici, domena je asi propadla, hlas patri jmenu domeny
#return vote
return None
##try:
##if ip == None: #domena nema ip, v databazi je tedy 1x, vyhledavame dle nazvu domeny
#Db.cur.execute("""SELECT status from turris JOIN status ON status.id = turris.status WHERE `evil host` = %s LIMIT 1""", (remoteHost,))
##status = Turris.select().join(Status, on=(Status.id == Turris.status)).where(Turris.remoteHost == remoteHost).limit(1).get().status
##else: #domena ma ip, kazdy je v db zvlast, vyhledavame dle ip
#Db.cur.execute("""SELECT status from turris JOIN status ON status.id = turris.status WHERE `ip` = %s LIMIT 1""", (ip,))
##status = Turris.select().join(Status, on=(Status.id == Turris.status)).where(Turris.ip == ip).limit(1).get().status
#res = Db.cur.fetchone()
##vote = Status.int2word(status)
##except Exception: # genericka excepce je spatna, ale prece se z tech importu neze**ru X TurrisDoesNotExist
## pass
##logging.debug("vote " + str(vote))
#pass
#else:
\ No newline at end of file
import logging
import re
import os
import datetime
import time
from lib.config import Config
from lib.domains import Domains
class NsprLogParser:
""" Obohatit vysledky vyhledavani (objekt crawl) o url z NSPR log filu FF """
def __init__(self, logfile, crawl):
#analyzovat log file
with open(logfile, 'r') as f:
urls = re.findall('(((http|ftp|https):\/\/)([\w\-_]+(?:(?:\.[\w\-_]+)+))([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)', f.read())
logging.debug("log size: ")
logging.debug(os.path.getsize(logfile))
#XXXsubprocess.call(["rm",Config.LOG_DIR + "log"+str(ScanController.profile )+".txt"]) smazat logfile
for i in urls:
domain = i[1] + i[3]
path = i[4]
#crawl[domain] # XX pokud bude jisto, ze pokud je na domene -1, je tam i nejaa jina, tenhle radek muzu vynechat
if path == ":-1":
continue # nejaka interni hodnota FF, je na kazde domene
crawl[Domains.url2domain(domain)].urls[path]
\ No newline at end of file
from base64 import b64decode, b64encode
import os.path
from PIL import Image
import io
import os
from lib.config import Config
#path = '/tmp/mdm/tmpsym64q.mozrunner-log/'
#path = '/tmp/mdm/tmpFn4RU5.mozrunner-log/'
size = 640, 640
class ScreenshotParser:
""" Obohatit vysledky vyhledavani (objekt crawl) o screenshot
Zmenší screenshot generovaný FF na přijatelnou velikost a přeuloží ho do crawl.screenfile souboru
"""
def __init__(self, crawl):
screenfile = crawl.cacheDir+'screenshot_base64.txt'
if os.path.isfile(screenfile):
with open(screenfile,"r+") as f:
data = (b64decode(f.read()))
im = Image.open(io.BytesIO(data))
im.thumbnail(Config.THUMBNAIL_SIZE) # 0.3 s
data = io.BytesIO()
im.save(data, format="PNG") #outfile= im.save("/tmp/mdm/out.png", "PNG")
#obrazek zmensen, nyni ho preulozime
f.seek(0)
f.write(b64encode(data.getvalue()).decode("utf-8"))
f.truncate()
#"<img class='thumbnail' src='data:image/png;base64,{}' />".format(b64encode(data.getvalue()).decode("utf-8"))
crawl.screenfile = screenfile
\ No newline at end of file
import os.path
from os import listdir
from os.path import isfile, join
from lib.domains import Domains
import cgi
class SpyParser:
""" Obohatit vysledky vyhledavani (objekt crawl) o spiona nebezpecnych prikazu"""
def __init__(self,crawl):
# spionazni funkce
#spy = ""
#spyfile = crawl.logDir + "spy.spy"
for file in [f for f in listdir(crawl.cacheDir) if isfile(join(crawl.cacheDir, f)) and os.path.splitext(f)[1] == ".spy"]:
path = crawl.cacheDir + file
with open(path, 'r') as f:
try:
url = f.readline().rstrip() #prvni radek obsahuje url souboru (a odradkovani, o ktere nestojime)
if (url == ""):
raise ValueError
except ValueError: # v souboru chybi hlavicka, asi byl zabit prohlizec, co to psal, preskocit
print("no contents fetched/found")
continue
crawl[Domains.url2domain(url)].urls[Domains.url2path(url)].setSpyfile(path)
#spy = "<h3>Nebezpečné funkce</h3>"
#with open(spyfile, 'r') as f:
# spy += f.read()
def getShort(spyfile):
""" V jednom .spy souboru muze byt vice js funkci. Vraci od kazde aspon 100 znaků.
Vraci String + True/False, pokud jsme text zkrátili,
"""
shorten = False
with open(spyfile,"r") as f:
events = ("".join(f.readlines()[1:])).split("\n\n\n") # prvni radka jsou metadata, FF deli prikazy tremi mezerami.
result = []
eventCount = 0
for event in events:
if not event: continue # posledni event je ""
eventCount += 1
if eventCount >= 10:
shorten = True
break
try:
command, parameters = event.split(": ",1)
except ValueError:
print("Spy - valuer error. Nepouziju spy.")
print(event)
print(event.split(": ",1))
#print(len(event.split(": ",1)))
#ff =event.split(": ",1)
continue
if len(parameters) > 100: # tento prikaz je delsi nez 100, zkratit
shorten = True
parameters = parameters[:100]
result.append("<b>{}</b> {}".format(command, cgi.escape(parameters)) )
#return cgi.escape(("\n").join(result)), shorten
return ("\n,<br />").join(result), shorten
\ No newline at end of file
# Zpracuje log z firefoxu. Ten v tmp necha .tmp fily s html a js a screenshot.
from bs4 import BeautifulSoup
from contextlib import redirect_stdout
import io
import os
from os import listdir
from os.path import isfile, join
from pygments import highlight
from pygments.formatters import HtmlFormatter
from pygments.lexers import HtmlLexer
#from pygments.lexers import JavascriptLexer
#from pygments.styles import get_style_by_name
#import jsbeautifier
import sys
import cgi
from lib.analysis.mdmaug_js_lexer import MdmaugJsLexer
from lib.domains import Domains
from lib.dbp import Whitelist #Status, Export, Turris,
class TrafficLogParser:
""" Obohatit vysledky vyhledavani (objekt crawl) o odkazy na hezke vypisy zdrojovych kodu """
def __init__(self, crawl):
""" Projde soubory se zdrojovymi kody a pripoji je ke crawlu """
# log files
for file in [f for f in listdir(crawl.cacheDir) if isfile(join(crawl.cacheDir, f)) and os.path.splitext(f)[1] == ".tmp"]:
#if file in ('screenshot_base64.txt', 'screenshot_debug.html'): continue
#print(file)
path = crawl.cacheDir + file
with open(path, 'r') as f:
print(path)
mime = ""
try:
url = f.readline().rstrip().split(" ", 1) #prvni radek obsahuje url a mime-type dat
if(len(url) == 1 and url[0] == ""):
raise ValueError
if(len(url) == 2):
url, mime = url
except ValueError: # v souboru chybi hlavicka, asi byl zabit prohlizec, co to psal, preskocit
print("no contents fetched")
continue
# kdyz je domena whitelistovana, preskocime ji
if (Whitelist.matches(url)):
continue
crawl[Domains.url2domain(url)].urls[Domains.url2path(url)].setSourcefile(path)
def nicifyFile(sourcefile):
""" Vraci zhezceny vypis .tmp filu se zdrojovymi kodu """
nicefile = sourcefile + ".htm"
if os.path.isfile(nicefile):
with open(nicefile, "r") as f:
return f.read()
else:
with open(sourcefile, 'r') as f:
mime = ""
url = f.readline().rstrip().split(" ", 1) #prvni radek obsahuje url a mime-type dat
if(len(url) == 2):
url, mime = url
contents = f.read() #zbytek souboru obsahuje zdrojova data
buf = io.StringIO()
with redirect_stdout(buf): # print -> promenna
print("<h3>{}</h3>".format(url))
if "html" in mime:
TrafficLogParser.HtmlParse(contents)
elif "javascript" in mime:
TrafficLogParser.JsParse(contents)
else: # vypise neznamy soubor v plain-textu
print(cgi.escape(contents))
data = buf.getvalue()
with open(nicefile, "w") as f2: # zapsat hezke formatovani do souboru
f2.write(data) # cachovat
buf.close()
return data
#html parser
def HtmlParse(contents):
soup = BeautifulSoup(contents)
def pygment(contents):
try:
print(highlight(str(contents), HtmlLexer(), HtmlFormatter()))
except TypeError:
#with open("/tmp/mdm/pomoc","w") as f: f.write(contents)
#logging.warning("{}".format(contents))
print("QUIT")
quit("i should have never come here")#mozna rozbity js rozbil i tady pana
for tag in soup.find_all():
# vypise potencialne nebezpecne tagy
if bool(len([True for a in tag.attrs if (a.startswith('on'))])): #ma dynamicky js content v atributu
pygment(tag)
if tag.name in ["meta", "link", "frame", "iframe", "object", "embed"]: # XX link-rel only, meta-redirect only? Jsou jeste jine atributy nebezpecne
pygment(tag) #iframe, object, embed -> vypsat cele tagy
if tag.name == "img":
pygment("<img src={} />".format(tag.get("src"))) # zajima nas pouze atribut src, ale pro jasnost, ze jde o tag, tam vratime zobacky
for tag in soup.find_all("script"): #na konci vsechny skripty -> js parser
inner, tag.contents = tag.contents, []
pygment(tag) #vytisknout <script> bez vnitrku
if len(inner): #pokud mel script vnitrek, parsovat ho pomoci js
for subtag in inner: #subtag ma byt jen jeden, a to obsah skriptu. Nevylucuju ale existenci nevalidnich subtagu, ktere kdyby existovaly, tak o nich chci vedet. Proto nepouziju rovnou inner[0]
TrafficLogParser.JsParse(subtag)
#HtmlParse("gfdg<meta />fd <img src='LINK' onclick='test' /> konec <hr>")
#JS Parser
def JsParse(contents):
# eval,document.write, window.open, open, window.location, location, document.location , document.cookie
#return
#print(contents)
#contents = """<script>var a = 1;</script>"""
#print("<xmp>",jsbeautifier.beautify(str(contents)),"</xmp>")
#return
print (highlight(str(contents), MdmaugJsLexer(), HtmlFormatter()))
#def getOutput(self):return self.output
def getStylesheet():
style = ".gr {color:white!important;background:red;font-size:150%;}" # zvyrazneni podezrelych veci
return "<style>" + HtmlFormatter().get_style_defs('.highlight') + style + "</style>"
#from timeit import default_timer as timer
#start = timer()
#TrafficLogParser()
#end = timer()
#print(end- start)
# Sprava dat scannovani
from pprint import pprint
import json
import datetime
import time
import os
from glob import glob
from random import randint
import subprocess
import traceback
import logging
logging.basicConfig(level=logging.WARNING, format="%(message)s")
from lib.config import Config
from lib.domains import Domains
from lib.analysis.crawl import Crawl
from templates.crawl_view import CrawlView
from lib.analysis.parser.traffic_log_parser import TrafficLogParser
from lib.analysis.parser.nspr_log_parser import NsprLogParser
from lib.analysis.parser.metadata_parser import MetadataParser
from lib.analysis.parser.screenshot_parser import ScreenshotParser
from lib.analysis.parser.spy_parser import SpyParser
import pdb
class ScanController:
FF_INFO_FILE = "cache.dir"
CRAWL_FILE = "crawlSave.yaml"
profile = "-1" #bookovany profile firefoxu
queueFF = {}
#cacheDir = None
#logDir = None
##
# @param cached Pokud chceme zobrazit cachovanou verzi analyzy, dejme True. Pokud dame int, je to maximalni stari (ve dnech). Kdyz se nenalezne, zanazyzuje se znovu.
def launch(self, url, cached = None):
url = url.split("/", 3)
if len(url) <= 3:
return "Wrong url"
else:
url = url[3]
if cached:
# """ Pokud je k dispozici analyza, vratit ji """
dir = Config.CACHE_DIR + Domains.domain2dir(url) + "/"
if os.path.isdir(dir):
snapdirs = [str(dir + subdir) for subdir in os.listdir(dir) # adresare vsech moznych snapshotu
if os.path.isdir(str(dir + subdir)) and os.path.isfile(dir+subdir + "/"+ScanController.CRAWL_FILE)]
if snapdirs:
cacheDir = max(snapdirs, key = os.path.getmtime)+ "/" # nejnovejsi dir analyzy
if type(cached) != int or os.path.getmtime(cacheDir) > time.time()-3600*24*cached: # maximalni stari analyzy
try:
print("returning")
return CrawlView.outputHtml(Crawl.loadFromFile(cacheDir + ScanController.CRAWL_FILE)) #"crawlSave.tmp"
except ValueError:
pass
print("({-1}) Cachovana analyza nenalezena")
# provest novou analyzu
if self.queue(): # /rest/analyze/web - zaradi web do fronty
print ("({}) start crawl".format(self.profile))
self.url = Domains.assureUrl(url)
try:
crawl = self.analyze()
except Exception as e:
print("PROFILE EXCEPTION {}".format(self.profile))
print(traceback.format_exc())
# XX Pokud je potiz, ze JS zabiji FF, mozno experimentovat s ulimit -Sv 500000;
return ("PROFILE EXCEPTION ({}) {} See logs, i.e. mdmaug/nohup.out. ".format(self.profile, e))
crawl.saveToFile(crawl.cacheDir + ScanController.CRAWL_FILE) # ulozit vysledky hledani
return CrawlView.outputHtml(crawl)
else:# analyza se nepodarilo si zabookovat FF profil
logging.debug("no free slots")
result = "failed - no free slots. <a href='" + Config.APP_DOMAIN + "/reset'>Reset</a>" # volny profil jsme nenasli