Commit f7bd1155 authored by Edvard Rejthar's avatar Edvard Rejthar

package format, installation steps

parent e24ff7f7
.idea .idea
__pycache__
*.pem
\ No newline at end of file
...@@ -9,7 +9,7 @@ PROFILE_COUNT=21 ...@@ -9,7 +9,7 @@ PROFILE_COUNT=21
apt install software-properties-common apt install software-properties-common
add-apt-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main universe restricted multiverse" add-apt-repository "deb http://archive.ubuntu.com/ubuntu $(lsb_release -sc) main universe restricted multiverse"
apt update apt update
apt install firefox python3 mariadb-server apt install firefox python3 mariadb-server xvfb
pip3 install xvfbwrapper pymysql peewee jinja2 pyyaml bs4 pygments pillow requests pip3 install xvfbwrapper pymysql peewee jinja2 pyyaml bs4 pygments pillow requests
# current dir # current dir
...@@ -19,7 +19,7 @@ cd $DIR ...@@ -19,7 +19,7 @@ cd $DIR
# mariadb setup # mariadb setup
systemctl start mariadb.service systemctl start mariadb.service
mysql -u root < mdmaug-installation.sql # populate db mysql -u root < mdmaug-installation.sql # populate db
mysql -uroot -e "CREATE USER 'mdmaug'@'localhost' IDENTIFIED BY 'fidFDSs676'; GRANT ALL PRIVILEGES ON mdmaug. * TO 'mdmaug'@'%';" # new user mysql -uroot -e "CREATE USER 'mdmaug'@'localhost' IDENTIFIED BY 'fidFDSs676'; GRANT ALL PRIVILEGES ON mdmaug. * TO 'mdmaug'@'localhost';" # new user
# adding user the server will be run under # adding user the server will be run under
useradd -m -d $DESTINATION mdmaug useradd -m -d $DESTINATION mdmaug
...@@ -41,8 +41,11 @@ do ...@@ -41,8 +41,11 @@ do
fi fi
done done
# adopt all files to the new user
chown mdmaug:mdmaug -R $DESTINATION chown mdmaug:mdmaug -R $DESTINATION
# make the new user able to use the display (needed on Ubuntu 17.10 at least)
xhost +local:mdmaug
...@@ -4,10 +4,13 @@ Scans a website for a sign of a parasite hosts or commands. ...@@ -4,10 +4,13 @@ Scans a website for a sign of a parasite hosts or commands.
## Installation ## Installation
1. ```git clone git@gitlab.labs.nic.cz:csirt/mdmaug.git /tmp/mdmaug``` 1. Download ```git clone git@gitlab.labs.nic.cz:csirt/mdmaug.git /tmp/mdmaug```
2. edit mdmaug/lib/config.py 2. Edit mdmaug/lib/config.py
3. you should generate certificate `openssl req -new -x509 -keyout cert-mdmaug.pem -out cert-mdmaug.pem -days 365 -nodes` to `mdmaug/cert-mdmaug.pem` 3. You should generate certificate `openssl req -new -x509 -keyout cert-mdmaug.pem -out cert-mdmaug.pem -days 365 -nodes` to `mdmaug/cert-mdmaug.pem`
4. ```/tmp/mdmaug/INSTALL``` 4. Perform installation: ```/tmp/mdmaug/INSTALL```
5. Everything should be located in `/opt/mdmaug`.
6. Launch under newly created `mdmaug` user: `su - mdmaug -c 'python3 -m mdmaug'`
7. Connect in the browser at: https://localhost:8000
### Notes ### Notes
...@@ -15,6 +18,7 @@ Scans a website for a sign of a parasite hosts or commands. ...@@ -15,6 +18,7 @@ Scans a website for a sign of a parasite hosts or commands.
* Certificate error: Make sure that the browser doesn't blockt the MDM-Augmented server if used from MDM. * Certificate error: Make sure that the browser doesn't blockt the MDM-Augmented server if used from MDM.
* If you want other count of profiles than 21, change INSTALL + config.py + profiles.ini * If you want other count of profiles than 21, change INSTALL + config.py + profiles.ini
* You may put ```03 1,7,13,19 * * * ~/mdmaug-launch``` in ```crontab -e``` of user mdmaug. * You may put ```03 1,7,13,19 * * * ~/mdmaug-launch``` in ```crontab -e``` of user mdmaug.
* We are using Python3.6+
## What is done to Firefox profiles? ## What is done to Firefox profiles?
......
/__pycache__/
/lib/__pycache__/
/lib/analysis/__pycache__/
/lib/analysis/parser/__pycache__/
/nbproject/
/templates/__pycache__/
cert-mdmaug.pem
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
import os import os
import ssl import ssl
import threading import threading
import logging
logging.basicConfig(level=logging.DEBUG, format="%(message)s") logging.basicConfig(level=logging.DEBUG, format="%(message)s")
from http.server import HTTPServer from http.server import HTTPServer
from xvfbwrapper import Xvfb from xvfbwrapper import Xvfb
from lib.config import Config # import ipdb; ipdb.set_trace()
from lib.controller.server import Server
from lib.controller.api import Api
#logging.basicConfig(level=logging.WARNING, format="%(levelname)s: %(message)s",filename="logger.log") from .lib.config import Config
from .lib.controller.server import Server
from .lib.controller.api import Api
# assure the logging dir # assure the logging dir
if not os.path.exists(Config.LOG_DIR): if not os.path.exists(Config.LOG_DIR):
os.makedirs(Config.LOG_DIR) os.makedirs(Config.LOG_DIR)
# setup multithreading server # server setup
Api.reset() Api.reset()
httpd = HTTPServer(('0.0.0.0', Config.APP_PORT), Server) address = '0.0.0.0'
httpd = HTTPServer((address, Config.APP_PORT), Server)
httpd.socket = ssl.wrap_socket(httpd.socket, httpd.socket = ssl.wrap_socket(httpd.socket,
server_side=True, server_side=True,
certfile= Config.DIR + 'python.pem', # together private + cert, http://stackoverflow.com/questions/19705785/python-3-https-webserver # together private + cert, http://stackoverflow.com/questions/19705785/python-3-https-webserver
certfile=Config.DIR + 'cert-mdmaug.pem',
ssl_version=ssl.PROTOCOL_TLSv1) ssl_version=ssl.PROTOCOL_TLSv1)
vdisplay = Xvfb() display = Xvfb()
vdisplay.start() display.start()
try: try:
print('Listening at https://0.0.0.0:{}'.format(Config.APP_PORT)) print(f'Listening at https://{address}:{Config.APP_PORT}')
for _ in range(Config.profileCount): for _ in range(Config.profileCount):
threading.Thread(target=httpd.serve_forever).start() threading.Thread(target=httpd.serve_forever).start()
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
vdisplay.stop() display.stop()
''' '''
XX TO BE DELETED:
How to debug mysql: How to debug mysql:
conn = pymysql.connect(host='localhost', user='root', passwd='lopuch', db='mdmaug', charset='utf8') conn = pymysql.connect(host='localhost', user='root', passwd='lopuch', db='mdmaug', charset='utf8')
cur = conn.cursor() cur = conn.cursor()
...@@ -55,4 +57,4 @@ quit() ...@@ -55,4 +57,4 @@ quit()
#from urllib.parse import parse_qs #from urllib.parse import parse_qs
#from urllib.parse import urlparse #from urllib.parse import urlparse
#quit() #quit()
''' '''
\ No newline at end of file
/mnt/mdmaug/home/mdmaug/.mozilla/extensions/{ec8030f7-c20a-464f-9b0e-13a3a9e97384}/mdmaug@jetpack/resources/mdmaug/
\ No newline at end of file
import threading
import os
import logging import logging
from glob import glob import os
import threading
from peewee import MySQLDatabase from peewee import MySQLDatabase
class Config: class Config:
profileCount = 21 # pocet profilu vytvorenych ve firefoxu. Tyto je treba vytvorit rucne. Nazev profilu je cislo - 0,1... profileCount = 21 # number of Firefox profiles. Its name is just a number – 0,1...
browser = 'firefox' # iceweasel, firefox. Ktery prohlizec se spousti. browser = 'firefox' # iceweasel, firefox. What browser gets launched.
configFile = '/opt/mdmaug/.cache/mdmaug-scans/_tmp/queue.cache' # RAM disk byl maly: '/tmp/mdm/queue.cache' configFile = '/opt/mdmaug/.cache/mdmaug-scans/_tmp/queue.cache' # RAM disk was too small: '/tmp/mdm/queue.cache'
APP_PORT = 8000 APP_PORT = 8000
APP_DOMAIN = 'https://217.31.202.41:' + str(APP_PORT) #csirt.csirt.office.nic.cz APP_DOMAIN = 'https://217.31.202.41:' + str(APP_PORT) # csirt.csirt.office.nic.cz
LOG_DIR = "/opt/mdmaug/.cache/mdmaug-scans/_tmp/" # X /tmp/mdm/ LOG_DIR = "/opt/mdmaug/.cache/mdmaug-scans/_tmp/"
CACHE_DIR = "/opt/mdmaug/.cache/mdmaug-scans/" CACHE_DIR = "/opt/mdmaug/.cache/mdmaug-scans/"
DIR = os.path.dirname(os.path.realpath(__file__)) + "/../" DIR = os.path.dirname(os.path.realpath(__file__)) + "/../"
myDB = "" myDB: None
lock = threading.RLock() # doufam, ze kdyz je lock tady, ze je funknci. Closure...? XX nejak otestovat lock = threading.RLock() # doufam, ze kdyz je lock tady, ze je funknci. Closure...? XX nejak otestovat
THUMBNAIL_SIZE = 640, 640 THUMBNAIL_SIZE = 640, 640
MAX_WHOIS_DOMAIN_THREADS = 10 # spusti maximalne 10 threadu doraz, jednou mi to totiz preteklo (kazda domena spusti jeste tolik threadu, kolik ma IP, ale tech byva jen par) MAX_WHOIS_DOMAIN_THREADS = 10 # spusti maximalne 10 threadu doraz, jednou mi to totiz preteklo (kazda domena spusti jeste tolik threadu, kolik ma IP, ale tech byva jen par)
MAX_BROWSER_RUN_TIME = 25 # maximalni cas, ktery muze browser bezet MAX_BROWSER_RUN_TIME = 25 # maximum time for a browser to run
MAX_BROWSER_EXPIRATION = 15 # pocet vterin, ktere muzeme max cekat, nez se browser zavre (trva, nez zapise soubory) MAX_BROWSER_EXPIRATION = 15 # seconds that we wait before killing the browser (waiting for the files to be written)
def connect(): def connect():
# XX resim problem peewee.OperationalError: (2006, "MySQL server has gone away (BrokenPipeError(32, 'Broken pipe'))") po 7 hodinach timeoutu # XX resim problem peewee.OperationalError: (2006, "MySQL server has gone away (BrokenPipeError(32, 'Broken pipe'))") po 7 hodinach timeoutu
# XX kupodivu pripojeni nemuze byt v dbp DBModel.connect. Prestoze type je pak spravne (MySQLDatabase), nic udelat nejde a pokusy o select konci NoneType. # XX kupodivu pripojeni nemuze byt v dbp DBModel.connect. Prestoze type je pak spravne (MySQLDatabase), nic udelat nejde a pokusy o select konci NoneType.
logging.debug("Connecting to DB.") logging.debug("Connecting to DB.")
Config.myDB = MySQLDatabase("mdmaug", host='localhost', port=3306, user="mdmaug", passwd="fidFDSs676") # XX dal jsem pryc: , threadlocals=False Config.myDB = MySQLDatabase("mdmaug", host='localhost', port=3306, user="mdmaug",
Config.myDB.register_fields({'primary_key': 'BIGINT AUTOINCREMENT'}) passwd="fidFDSs676") # XX dal jsem pryc: , threadlocals=False
Config.connect() Config.connect()
import json import json
import subprocess
import logging import logging
from lib.config import Config import subprocess
from lib.controller.scan_controller import ScanController
from lib.model.dbp import Status, Export, Turris, Whitelist from peewee import IntegrityError
from lib.analysis.parser.traffic_log_parser import TrafficLogParser
from ...templates.crawl_view import CrawlView
from .scan_controller import ScanController
from ..config import Config
from ..model.dbp import Turris, Whitelist
from ..parser.traffic_log_parser import TrafficLogParser
class Api: class Api:
website = "" # http://site.cz website = "" # http://site.cz
websiteDomain = "" # site.cz websiteDomain = "" # site.cz
def __init__(self, path): def run(self, request):
self.path = path """ Accept command
:type path: dict from URL request. /api/analyze=cache/http://example.com → {"api": True, "analyze": cache, "page": "http://example.com"}
"""
def run(self, cmd): if "analyze" in request:
""" Accept command """ crawl = ScanController().launch(request["page"], {"cached": 1, "weekcache":7, "oldcache": True, True: None}[request["analyze"]])
if cmd == "analyze": if request["api"] == "json":
return ScanController().launch(self.path) return CrawlView.output_json(crawl)
if cmd == "analyze=cached": else:
return ScanController().launch(self.path, cached = 1) return CrawlView.output_html(crawl)
if cmd == "analyze=weekcache": elif "decide" in request: # XX deprecated?
return ScanController().launch(self.path, cached = 7) return self.get_undecided()
if cmd == "analyze=oldcache": elif "nicify" in request:
return ScanController().launch(self.path, cached = True) return TrafficLogParser.getStylesheet() + TrafficLogParser.nicifyFile(request["page"])
elif cmd == "export=view": # XX deprecated? elif "vote" in request: # /api/vote/block/example.org/10.0.0.1
return Export.exportView()
elif cmd == "export=confirm": # XX deprecated?
return Export.exportConfirm()
elif cmd == "decide": # XX deprecated?
return self.getUndecided()
elif cmd == "nicify":
url = self.path.split("/", 3)
return TrafficLogParser.getStylesheet() + TrafficLogParser.nicifyFile(url[3])
elif cmd == "vote": # /api/vote/block/example.org/10.0.0.1
logging.debug("vote cmd") logging.debug("vote cmd")
url = self.path.split("/", 4) return Turris.vote(request["vote"], request["page"])
logging.debug(url[3]) elif "whitelist" in request: # XXX not implemented yet
return Turris.vote(url[3], url[4]) """url = path.split("/", 3)
elif cmd == "whitelist": # XXX not implemented yet
url = self.path.split("/", 3)
if len(url) > 3: if len(url) > 3:
self._setWebsite(url[2]) # osetrit self.website, ze je URL, a nikoli shell self._setWebsite(url[2]) # osetrit self.website, ze je URL, a nikoli shell
logging.debug("XXX nejsem si jist, zda url je spravne na url[2]") # XXX logging.debug("XXX nejsem si jist, zda url je spravne na url[2]") # XXX
logging.debug(url) # XXX logging.debug(url) # XXX
quit() # XXX quit() # XXX
logging.debug(self.website) logging.debug(self.website)
logging.debug(self.websiteDomain) logging.debug(self.websiteDomain)
return self.whitelist() return self.whitelist()"""
elif cmd == "reset": return "Implement first if needed."
Server.reset() elif "reset" in request:
self.reset()
return "reset" return "reset"
@staticmethod
def reset(): def reset():
logging.debug("resetting running browsers") logging.debug("resetting running browsers")
with open(Config.configFile, 'w') as f: # clear the queue with open(Config.configFile, 'w') as f: # clear the queue
json.dump({}, f) json.dump({}, f)
subprocess.call(["pkill", Config.browser]) # kill frozen browsers subprocess.call(["pkill", Config.browser]) # kill frozen browsers
# prida 2ld domenu mezi whitelistovane
#prida 2ld domenu mezi whitelistovane
def whitelist(self): def whitelist(self):
logging.debug("whitelistuju") logging.debug("whitelistuju")
#Db.cur = Db.connection.cursor() # Db.cur = Db.connection.cursor()
#self._logging.debug(Db.cur.execute("""REPLACE INTO whitelist set domain = %s""", (self.websiteDomain, ))) # self._logging.debug(Db.cur.execute("""REPLACE INTO whitelist set domain = %s""", (self.websiteDomain, )))
#Db.connection.commit() # Db.connection.commit()
#Db.cur.close() # Db.cur.close()
try:Whitelist.insert(domain=self.websiteDomain).execute() try:
except IntegrityError:pass # jiz je vlozeno Whitelist.insert(domain=self.websiteDomain).execute()
except IntegrityError:
pass # jiz je vlozeno
def getUndecided(self): @staticmethod
def get_undecided():
logging.debug("XXX jeste jsem neudelal - ma vylezt tabulka vsech nerozhodlych domen od posledniho exportu") logging.debug("XXX jeste jsem neudelal - ma vylezt tabulka vsech nerozhodlych domen od posledniho exportu")
pass pass
\ No newline at end of file
This diff is collapsed.
from http.server import SimpleHTTPRequestHandler
from jinja2 import Environment
from jinja2 import FileSystemLoader
from lib.config import Config
from lib.controller.api import Api
from lib.model.dbp import DbModel
from lib.model.dbp import Export
import logging import logging
import mimetypes import mimetypes
import os import os
from http.server import SimpleHTTPRequestHandler
from jinja2 import Environment
from jinja2 import FileSystemLoader
from ..config import Config
from ..controller.api import Api
from ..model.dbp import DbModel
from ..model.dbp import Export
env = Environment() env = Environment()
env.loader = FileSystemLoader(Config.DIR + "templates/") env.loader = FileSystemLoader(Config.DIR + "templates/")
class Server(SimpleHTTPRequestHandler): class Server(SimpleHTTPRequestHandler):
def favicon(self): def favicon(self):
with open('favicon.ico', 'rb') as f: with open('favicon.ico', 'rb') as f:
self.output(f.read(), "image/x-icon") self.output(f.read(), "image/x-icon")
def render_template(self, filename, ** kwargs): def render_template(self, filename, **kwargs):
self.output(env.get_template(filename).render(kwargs)) self.output(env.get_template(filename).render(kwargs))
def output(self, contents, contentType="text/html"): def output(self, contents, content_type="text/html"):
self.send_response(200) self.send_response(200)
self.send_header("Content-type", contentType) self.send_header("Content-type", content_type)
self.end_headers() self.end_headers()
try: try:
self.wfile.write(contents) self.wfile.write(contents)
...@@ -34,26 +37,61 @@ class Server(SimpleHTTPRequestHandler): ...@@ -34,26 +37,61 @@ class Server(SimpleHTTPRequestHandler):
self.render_template("homepage.html") self.render_template("homepage.html")
def static_file(self, url): def static_file(self, url):
is_binary_string = lambda bytes: bool(bytes.translate(None, bytearray([7, 8, 9, 10, 12, 13, 27]) + bytearray(range(0x20, 0x100)))) # 'rb' if is a binary string, else 'r'
type = 'rb' if is_binary_string(open('/usr/bin/python', 'rb').read(1024)) else 'r' type_ = 'rb' if bool(open('/usr/bin/python', 'rb').read(1024).translate(None,
with open(url, type) as f: bytearray([7, 8, 9, 10, 12, 13, 27]) + bytearray(
self.output(f.read(), contentType=mimetypes.guess_type(url)) range(0x20, 0x100)))) else 'r'
with open(url, type_) as f:
self.output(f.read(), content_type=mimetypes.guess_type(url))
def do_GET(self): def do_GET(self):
path = self.path.split("/") """
Routing table:
/ → homepage
/existing-file → return the static file from /static
/(destination=example.com/)api... → if set, the output will be HTML5-postMessaged to other tab at the destination (with https protocol)
/api(=json)/ → output might be either in JSON, or else in HTML
/api/analyze(=...)/URI
/api/vote/...
/api/reset
/export/(days) → CSV of last X days
"""
_, path = self.path.split("/", 1)
path, *_ = path.split("?", 1)
logging.debug("Request: {}".format(path[1])) if path == "":
if path[1] == "":
return self.homepage() return self.homepage()
elif os.path.isfile(Config.DIR + "static/" + path[1]): #faviconka, nebo jiny existujici soubor elif os.path.isfile(Config.DIR + "static/" + path): # favicon or any other existing file
return self.static_file(Config.DIR + "static/" + path[1]) return self.static_file(Config.DIR + "static/" + path)
DbModel.assureConnection() DbModel.assureConnection()
if path[1] == "api": # /api/analyze/web
cmd = path[2] # parse the request url into a friendly dictionary
api = Api(self.path) request = {"page": ""}
# send everything up, we are in an iframe page = False
self.render_template("_message.html", contents=api.run(cmd), cmd=cmd, url=self.path, destination="https://mdm.nic.cz/") for l in self.path.split("/")[1:]:
elif path[1] == "export": # /export/{days} - csv za poslednich 7 dni if not page:
c, *d = l.split("=", 1)
if c in ["http:", "https:"]:
page = True
else:
request[c] = d[0] if len(d) else True
continue
request["page"] += l + "/"
if request["page"]: # strip last slash
request["page"] = request["page"][:-1]
logging.debug("Request: {}".format(request))
if "api" in request: # /api/analyze/web
output = Api().run(request)
if "destination" in request:
# send everything up, we are in an iframe
self.render_template("_message.html", contents=output, cmd=request, url=self.path,
destination=f"https://{request['destination']}/")
else:
self.output(output)
elif "export" in request: # /export/{days} - csv of last 7 days
url = self.path.split("/", 2) url = self.path.split("/", 2)
self.output(Export.exportView(days=url[2])) self.output(Export.export_view(days=url[2]))
\ No newline at end of file
import re
import socket import socket
import logging
import re import re
from urllib.parse import parse_qs import socket
from urllib.parse import urlparse
import urllib.request import urllib.request
class Domains: class Domains:
""" webove nastroje """ """ webove nastroje """
def get_ips_for_host(host): def get_ips_for_host(host):
try: try:
ips = socket.getaddrinfo(host, 80, 0, 0, socket.IPPROTO_TCP) # XXX co kdyz nepratelsky web reaguje jen na 80, 81, 8080 ips = socket.getaddrinfo(host, 80, 0, 0, socket.IPPROTO_TCP) # XXX co kdyz nepratelsky web reaguje jen na 80, 81, 8080
except socket.gaierror: except socket.gaierror:
ips = [] ips = []
return ips return ips
...@@ -23,53 +21,57 @@ class Domains: ...@@ -23,53 +21,57 @@ class Domains:
except TypeError: except TypeError:
logging.debug("Domains/url2domain type error") logging.debug("Domains/url2domain type error")
logging.debug(url) logging.debug(url)
raise #return "" raise # return ""
def url2path(url): def url2path(url):
""" http://seznam.cz/url -> /url """ """ http://seznam.cz/url -> /url """
url = re.sub('^(http://|https://|ftp://)', '', url) # odstrihnout protokol url = re.sub('^(http://|https://|ftp://)', '', url) # odstrihnout protokol
url = re.sub('^([^/])*', '', url) # stojim jen cestu, ne o domenu url = re.sub('^([^/])*', '', url) # stojim jen cestu, ne o domenu
return url return url
def assureUrl(url): # zajistit, ze se jedna o url a ne treba o shell def assureUrl(url): # zajistit, ze se jedna o url a ne treba o shell
# XX co ostatni protokoly? smb, sftp? Hrozi tam nejake nebezpeci? # XX what about other protocols? smb, sftp? Is there a danger?
return re.search('(((((http|https|ftp)://)?[\w\-_]+(?:(?:\.[\w\-_]+)+)))([\w\-\.,@?^=%&amp;:/~\+#]*[\w\-\@?^=%&amp;/~\+#])?)', url).group(0) return re.search(
'(((((http|https|ftp)://)?[\w\-_]+(?:(?:\.[\w\-_]+)+)))([\w\-\.,@?^=%&amp;:/~\+#]*[\w\-\@?^=%&amp;/~\+#])?)',
url).group(0)
def domain2dir(url): # friendly nazev adresare z domeny v url def domain2dir(url): # friendly nazev adresare z domeny v url
url = url.lower() url = url.lower()
url = re.sub('^(http://|https://|ftp://)', '', url) # odstrihnout protokol url = re.sub('^(http://|https://|ftp://)', '', url) # odstrihnout protokol
url = re.sub('(/.*)', '', url) # nestojim o cestu, jen o domene url = re.sub('(/.*)', '', url) # nestojim o cestu, jen o domene
url = re.sub('[^a-z0-9\.]', '', url) # nechat jen pratelske znaky url = re.sub('[^a-z0-9\.]', '', url) # nechat jen pratelske znaky
return url return url
def getPdnsLink(ip): def getPdnsLink(ip):
return 'http://pdns.cert.at/p/dns?qry=' + ip return 'http://pdns.cert.at/p/dns?qry=' + ip
def ip2pdnsDomains(ip): def ip2pdnsDomains(ip):
return None #24 doesnt work """
try: try:
# XX mohl bych sortovat dle 2nd domeny. Pripadne oriznout 3rd domenu, nechat jen 2nd. Tam ale musim osetrit problemove dvojite tld - co.uk, gov.ua... # XX mohl bych sortovat dle 2nd domeny. Pripadne oriznout 3rd domenu, nechat jen 2nd. Tam ale musim osetrit problemove dvojite tld - co.uk, gov.ua...
pdns = urllib.request.urlopen(Domains.getPdnsLink(ip)).read().decode("utf-8") pdns = urllib.request.urlopen(Domains.getPdnsLink(ip)).read().decode("utf-8")
items = re.findall("<div class='x[BA]'>(.*)</div>", pdns) items = re.findall("<div class='x[BA]'>(.*)</div>", pdns)
return items return items
except Exception as e: except Exception as e:
logging.debug("chyba pri kontaktu s PDNS: " + str(e)) logging.debug("chyba pri kontaktu s PDNS: " + str(e))
return None return None
"""
return None # #24 doesnt work
def ip2countryAndCity(ip): def ip2countryAndCity(ip):
return None, None #23 service down """
try: try:
hostipApi = urllib.request.urlopen('http://api.hostip.info/get_html.php?ip=' + ip + '&position=true').read().decode("utf-8").split("\n") hostipApi = urllib.request.urlopen('http://api.hostip.info/get_html.php?ip=' + ip + '&position=true').read().decode(
#['Country: CZECH REPUBLIC (CZ)', 'City: Prague', '', 'Latitude: 50.0833', 'Longitude: 14.4333', 'IP: 109.123.209.188', ''] "utf-8").split("\n")
# ['Country: CZECH REPUBLIC (CZ)', 'City: Prague', '', 'Latitude: 50.0833', 'Longitude: 14.4333', 'IP: 109.123.209.188', '']
return hostipApi[0].split(":")[1], hostipApi[1].split(":")[1] return hostipApi[0].split(":")[1], hostipApi[1].split(":")[1]
except UnicodeDecodeError: # as e except UnicodeDecodeError: # as e
logging.debug("neumim dekodovat") logging.debug("neumim dekodovat")
except Exception as e: except Exception as e:
logging.debug("hostip.info down: " + str(e)) logging.debug("hostip.info down: " + str(e))
return None, None return None, None
"""
return None, None # #23 service down
## ##
# Kontaktuje sluzbu safebrowsing a snazi se z jejich nekonzistentnich udaju vycist, zda kdyz na URL clovek pristoupi, objevi se cervena stranka. # Kontaktuje sluzbu safebrowsing a snazi se z jejich nekonzistentnich udaju vycist, zda kdyz na URL clovek pristoupi, objevi se cervena stranka.
...@@ -77,17 +79,17 @@ class Domains: ...@@ -77,17 +79,17 @@ class Domains:
# Taky je mozne, ze sluzba zmenila wording. Mnoho zdaru! # Taky je mozne, ze sluzba zmenila wording. Mnoho zdaru!
# #
# @param format 'bool' Vraci bool True/False/None, nebo 'attr' vraci int "1"/"0"/"" pro atribut # @param format 'bool' Vraci bool True/False/None, nebo 'attr' vraci int "1"/"0"/"" pro atribut
def isSuspicious(domain, output='bool'): def is_suspicious(domain, output='bool'):
#contents = urllib.request.urlopen('http://www.google.com/safebrowsing/diagnostic?site=' + domain).read().decode("utf-8") # contents = urllib.request.urlopen('http://www.google.com/safebrowsing/diagnostic?site=' + domain).read().decode("utf-8")
#with open("debugsf.tmp","a") as f: # with open("debugsf.tmp","a") as f:
# f.write(contents + "\n\n") # f.write(contents + "\n\n")
#if "Site is listed as suspicious" in contents: # if "Site is listed as suspicious" in contents:
#elif "This site is not currently listed as suspicious." in contents: # elif "This site is not currently listed as suspicious." in contents:
import requests, re, json import requests
r = requests.get("http://www.google.com/safebrowsing/diagnostic?output=jsonp&site=" + domain, timeout=5) r = requests.get("http://www.google.com/safebrowsing/diagnostic?output=jsonp&site=" + domain, timeout=5)
if '"listed"' in r.text: if '"listed"' in r.text:
return True if output == 'bool' else "1" return True if output == 'bool' else "1"
if '"unlisted"' in r.text: # vratilo to alespon neco rozumneho if '"unlisted"' in r.text: # vratilo to alespon neco rozumneho
return False if output == 'bool' else "0" return False if output == 'bool' else "0"
else: else:
return None if output == 'bool' else "" return None if output == 'bool' else ""
\ No newline at end of file
from yaml import load, dump from yaml import load, dump
from ..config import Config
from ..parser.spy_parser import SpyParser
try: try:
from yaml import CLoader as Loader, CDumper as Dumper from yaml import CLoader as Loader, CDumper as Dumper
except ImportError: except ImportError:
from yaml import Loader, Dumper from yaml import Loader, Dumper
from collections import defaultdict from collections import defaultdict
from lib.config import Config
class Crawl(defaultdict): class Crawl(defaultdict):
""" Objekt Crawl udružuje výsledky vyhledávání """ """ Objekt Crawl udružuje výsledky vyhledávání """
def __str__(self): def __str__(self):
r = "Výsledky vyhledávání - navštíveno {} domén".format(len(self)) r = "Výsledky vyhledávání - navštíveno {} domén".format(len(self))
for key in self.keys(): for key in self.keys():
r += "\n* " + key + " " + str(self[key]) r += "\n* " + key + " " + str(self[key])
return r return r
def save_to_file(self, filename):
def saveToFile(self,filename):
with open(filename, "w") as f: with open(filename, "w") as f:
f.write(dump(self.__getstate__(), Dumper=Dumper)) f.write(dump(self.__getstate__(), Dumper=Dumper))
def loadFromFile(filename): @staticmethod
def load_from_file(filename):
with open(filename, 'r') as f: with open(filename, 'r') as f:
return Crawl(state = load(f.read(), Loader=Loader)) return Crawl(state=load(f.read(), Loader=Loader))