Commit 1b3eda0f authored by Petr Špaček's avatar Petr Špaček

Merge branch 'progress_monitoring' into 'master'

All-in-one script

See merge request !6
parents 2e6fa1f6 4dedec83
Pipeline #40764 passed with stages
in 3 minutes and 17 seconds
.mypy_cache
*.pickle
__pycache__
ednscomp.input
......
......@@ -172,8 +172,8 @@ ldns-read-zone -s zone > zone.nodnssec
# it is recommended to collect at least 10 full runs to eliminate network noise
# (feel free to terminate the script with SIGTERM)
# result of each run is stored in file ednscompresult-<timestamp>
# Hint: You can run ./testedns.sh in parallel, possibly on multiple machines
PATH=$PATH:<path to genreport tool> ./testedns.sh
# Hint: You can run ./testedns.py in parallel, possibly on multiple machines
PATH=$PATH:<path to genreport tool> ./testedns.py
# (monitor number of ednscompresult- files and terminate as necessary;
# the script will do 10 full scans to eliminate random network failures)
......
#!/usr/bin/python3
import glob
import logging
import sys
import dns.name
import nsname2ipset, zone2pickle
import domain2ipset
import evalzone
import ednscomp2pickle
import genednscomp
import nsname2ipset
import testedns
import zone2pickle
def main():
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s')
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
try:
zone_fn = sys.argv[1]
......@@ -18,10 +26,87 @@ def main():
domain_nsset, nsnames, nsname_ipsets = zone2pickle.convert(zone_fn, zone_origin)
zone2pickle.save(domain_nsset, nsnames, nsname_ipsets)
prev_ip_cnt = len(nsname_ipsets) - 1
while prev_ip_cnt < len(nsname_ipsets):
logging.info('resolving NS names to IP addresses')
# repeat until
# a) all names are resolved
# b) last run did not bring new IP addresses
while True:
prev_ip_cnt = len(nsname_ipsets)
nsname2ipset.update_mapping(nsnames, nsname_ipsets)
total = len(nsnames)
remaining = total - len(nsname_ipsets)
new = len(nsname_ipsets) - prev_ip_cnt
if remaining == 0:
logging.info('all NS names resolved to an IP address')
break
elif new == 0:
logging.info('unable to resolve last %d NS names to an IP address, '
'leaving %0.2f %% NS names unresolved',
remaining,
remaining/total * 100)
break
else:
logging.info('resolved new %d NS names (%0.2f %%) to an IP address, '
'retryring resolution for remaining %d NS names (%0.2f %%)',
new,
new / total * 100,
remaining,
remaining/total * 100)
nsname2ipset.save(nsname_ipsets)
# domain2ipset.py
logging.info('looking for a working NS IP addresses for each domain')
netstats = domain2ipset.NetStats()
domain_ipset = {}
while True:
prev_ip_cnt = len(domain_ipset)
domain2ipset.update_mapping(domain_nsset, nsname_ipsets, netstats, domain_ipset)
total = len(domain_nsset)
remaining = total - len(domain_ipset)
new = len(domain_ipset) - prev_ip_cnt
if remaining == 0:
logging.info('all domains have at least one NS IP address which responds')
break
elif new == 0:
logging.info('unable to find NS IP addresses for last %d domains, '
'leaving %0.2f %% domains without working NS IP address',
remaining,
remaining/total * 100)
break
else:
logging.info('found working NS IP address for %d domains (%0.2f %%), '
'retryring resolution for remaining %d domains (%0.2f %%)',
new,
new / total * 100,
remaining,
remaining/total * 100)
domain2ipset.save(domain_nsset, netstats, domain_ipset)
logging.info('generating input data for genreport tool')
with open('ednscomp.input', 'w') as ednscomp_input:
ednscomp_input.writelines(genednscomp.generate(nsname_ipsets, domain_ipset))
logging.info('executing EDNS tests')
testedns.repeat_genreport(10)
ednscompresults = glob.glob('ednscompresult-*')
if not ednscompresults:
logging.critical('error: no ednscompresult-* files from previous step found, exiting')
sys.exit(2)
logging.info('processing genreport output in EDNS strict mode')
nsstats_strict = ednscomp2pickle.collect_server_stats(ednscomp2pickle.eval_edns_strict, ednscompresults)
ednscomp2pickle.save(nsstats_strict, 'strict')
logging.info('processing genreport output in EDNS permissive mode')
nsstats_permissive = ednscomp2pickle.collect_server_stats(ednscomp2pickle.eval_edns_permissive, ednscompresults)
ednscomp2pickle.save(nsstats_permissive, 'permissive')
summary, results_strict, results_permissive = evalzone.evaluate(nsstats_strict, nsstats_permissive, domain_nsset, nsname_ipsets, domain_ipset)
evalzone.save_pickle(results_strict, 'strict')
evalzone.save_pickle(results_permissive, 'permissive')
evalzone.save_summary(summary)
print(summary.text)
if __name__ == "__main__":
testedns.check_env()
main()
......@@ -6,7 +6,7 @@ zone2pickle.py zone in-addr.arpa
nsname2ipset.py
domain2ipset.py
genednscomp.py > ednscomp.input
testedns.sh 2
testedns.py 2
ednscomp2pickle.py ednscompresult-*
evalzone.py
diffresults.py
......
......@@ -119,7 +119,7 @@ def process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipse
#if len(ip_done) % 100 == 0:
# logging.info('generated output for %s IP addresses', len(ip_done))
def main():
def load():
logging.info('loading NS sets')
with open('domain2nsset.pickle', 'rb') as domain2nsset_pickle:
domain2nsset = pickle.load(domain2nsset_pickle)
......@@ -141,27 +141,9 @@ def main():
except FileNotFoundError:
domain2ipset = {}
retry_queue = collections.deque() # type: Deque[Tuple[int, dns.name.Name, str]]
#logging.info('computing number of candidates to query')
#candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
#logging.info('queue contains %s queries to be checked', count_candidates(candidates))
candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
try:
with Pool(processes = 30) as pool:
for attempt, domain, ip, state in pool.imap_unordered(check_availability, candidates):
process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipset)
if len(domain2ipset) % 1000 == 0 and len(domain2ipset.get(domain, [])) == 1:
logging.info('%s domains out of %s have at least one working NS (%0.2f %%)',
len(domain2ipset), len(domain2nsset), len(domain2ipset)/len(domain2nsset)*100)
logging.info('first pass done, processing queued retries')
while retry_queue: # retry_queue might be filled again in process_reply
for attempt, domain, ip, state in pool.imap_unordered(
check_availability,
retry_candidates(retry_queue, netstats)):
process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipset)
except KeyboardInterrupt:
pass
finally:
return domain2nsset, nsname2ipset, netstats, domain2ipset
def save(domain2nsset, netstats, domain2ipset):
logging.info('writting domain2ipset for %d domains', len(domain2ipset))
pickle.dump(domain2ipset, open('domain2ipset.pickle', 'wb'))
logging.info('writting network statistics for %d IPs', len(netstats))
......@@ -169,11 +151,36 @@ def main():
logging.debug('dead domains: %s', domain2nsset.keys() - domain2ipset.keys())
logging.info('%s out of %s domains has at least one working NS (%0.2f %%)',
len(domain2ipset), len(domain2nsset), len(domain2ipset)/len(domain2nsset)*100)
# machine readable output: # of domains which an working NS
print(len(domain2ipset))
# machine readable output: total # of domains with NS set
print(len(domain2nsset))
def update_mapping(domain2nsset, nsname2ipset, netstats, domain2ipset):
retry_queue = collections.deque() # type: Deque[Tuple[int, dns.name.Name, str]]
#logging.info('computing number of candidates to query')
#candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
#logging.info('queue contains %s queries to be checked', count_candidates(candidates))
candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
with Pool(processes = 30) as pool:
for attempt, domain, ip, state in pool.imap_unordered(check_availability, candidates):
process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipset)
if len(domain2ipset) % 1000 == 0 and len(domain2ipset.get(domain, [])) == 1:
logging.info('%s domains out of %s have at least one working NS (%0.2f %%)',
len(domain2ipset), len(domain2nsset), len(domain2ipset)/len(domain2nsset)*100)
logging.info('first pass done, processing queued retries')
while retry_queue: # retry_queue might be filled again in process_reply
for attempt, domain, ip, state in pool.imap_unordered(
check_availability,
retry_candidates(retry_queue, netstats)):
process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipset)
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
main()
domain2nsset, nsname2ipset, netstats, domain2ipset = load()
try:
update_mapping(domain2nsset, nsname2ipset, netstats, domain2ipset)
except KeyboardInterrupt:
pass
finally:
save(domain2nsset, netstats, domain2ipset)
# machine readable output: # of domains which an working NS
print(len(domain2ipset))
# machine readable output: total # of domains with NS set
print(len(domain2nsset))
......@@ -65,7 +65,7 @@ def collect_server_stats(eval_edns_func, edns_infns: str) -> Dict[str, Counter[E
Combine results from all files with ednscomp output and summarize stats
"""
server_stats = {} # type: Dict[str, Counter[EDNSResult]]
i = 0
i = 1
for infilename in edns_infns:
logging.info('processed file no. {}, file name "{}"'.format(i, infilename))
with open(infilename) as infile:
......@@ -82,18 +82,24 @@ def collect_server_stats(eval_edns_func, edns_infns: str) -> Dict[str, Counter[E
i += 1
return server_stats
def save(nsstats, criteria: str) -> None:
"""
param criteria: name of criteria - strict / permissive
"""
filename = 'ednsstats_{}.pickle'.format(criteria)
logging.info('saving EDNS results into {}'.format(filename))
pickle.dump(nsstats, open(filename, 'wb'))
def main(infiles):
"""
infiles - names of files with output from ISC genreport
"""
logging.info('processing input in EDNS strict mode')
nsstats_strict = collect_server_stats(eval_edns_strict, infiles)
pickle.dump(nsstats_strict, open('ednsstats_strict.pickle', 'wb'))
logging.info('saving results')
save(nsstats_strict, 'strict')
logging.info('processing input in EDNS permissive mode')
nsstats_permissive = collect_server_stats(eval_edns_permissive, infiles)
pickle.dump(nsstats_permissive, open('ednsstats_permissive.pickle', 'wb'))
logging.info('saving results')
save(nsstats_permissive, 'permissive')
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(message)s')
......
......@@ -4,9 +4,10 @@
produce EDNS stats summary from pickled statistical data
"""
from enum import IntEnum
import collections
import logging
import pickle
from typing import Counter, Dict, Iterable, Iterator, NamedTuple, Set
from typing import Counter, Dict, List, Iterable, Iterator, NamedTuple, Set, Union
import dns.name
......@@ -30,6 +31,8 @@ NSIPResult = NamedTuple('NSIPResult', [('ip', str),
DomainResult = NamedTuple('DomainResult', [('state', EDNSResult),
('reason', str)])
ComparisonRow = NamedTuple('ComparisonRow', [('permissive', int),
('strict', int)])
def zone_stream(zone2ns_fn: str, threshold: int) -> Iterator[ZoneNSStats]:
"""
......@@ -114,18 +117,7 @@ def eval_edns_mode(
results[domain_res.state][domain] = domain_res.reason
return results
def print_stat_line(mode, modename, total, res_perm, res_strict):
perm_pass = len(res_perm[mode])
strict_pass = len(res_strict[mode])
print('{:13s}| {:>11n} {:>7.2f} %\t| {:>11n} {:>7.2f} %'.format(
modename,
perm_pass, perm_pass/total*100,
strict_pass, strict_pass/total*100))
def main():
"""
compute stats and print them to stdout
"""
def load():
logging.info('loading EDNS statistics for strict mode (2019+)')
with open('ednsstats_strict.pickle', 'rb') as ednsstats_bin:
ednsstats_strict = pickle.load(ednsstats_bin)
......@@ -141,29 +133,75 @@ def main():
logging.info('loading domain-IP mapping')
with open('domain2ipset.pickle', 'rb') as domain2ipset_bin:
domain2ipset = pickle.load(domain2ipset_bin)
return ednsstats_strict, ednsstats_permissive, domain2nsset, nsname2ipset, domain2ipset
class ResultTable():
header = ['Mode', 'Permissive (<= 2018)', 'Strict (2019+)']
names = collections.OrderedDict([
(EDNSResult.ok, 'Ok'),
(EDNSResult.compatible, 'Compatible'),
(EDNSResult.high_latency, 'High latency'),
(EDNSResult.dead, 'Dead')
])
def __init__(self, total, results_permissive, results_strict):
self.results = {}
self.total = total
for category in self.names:
self.results[category] = \
ComparisonRow(len(results_permissive[category]),
len(results_strict[category]))
@property
def csv(self) -> str:
lines = [';'.join(self.header)]
for mode, name in self.names.items():
lines.append('{};{};{}'.format(name,
self.results[mode].permissive, self.results[mode].strict))
return '\n'.join(lines)
@property
def text(self) -> str:
output = '{:13s}| {:.20s} | {:.20s}\n'.format(*self.header)
output += '{:.13s}+-{:.20s}--+--{:.20s}\n'.format(*['-'*30]*3)
for mode, modename in self.names.items():
perm_pass = self.results[mode].permissive
strict_pass = self.results[mode].strict
output += '{:13s}| {:>11n} {:>7.2f} % | {:>11n} {:>7.2f} %\n'.format(
modename,
perm_pass, perm_pass/self.total*100,
strict_pass, strict_pass/self.total*100)
return output
def evaluate(ednsstats_strict, ednsstats_permissive, domain2nsset, nsname2ipset, domain2ipset):
logging.info('starting strict mode evaluation')
results_strict = eval_edns_mode(ednsstats_strict, domain2nsset, nsname2ipset, domain2ipset)
logging.debug('%s', {case: len(results_strict[case]) for case in EDNSResult})
logging.info('pickling results')
with open('results_strict.pickle', 'wb') as results_bin:
pickle.dump(results_strict, results_bin)
logging.info('starting permissive mode evaluation')
results_permissive = eval_edns_mode(ednsstats_permissive, domain2nsset, nsname2ipset, domain2ipset)
logging.debug('%s', {case: len(results_permissive[case]) for case in EDNSResult})
logging.info('pickling results')
with open('results_permissive.pickle', 'wb') as results_bin:
pickle.dump(results_permissive, results_bin)
total = len(domain2nsset)
print('Mode | Permissive (<= 2018)\t| Strict (2019+)')
print('---------------------------------------------------------------')
print_stat_line(EDNSResult.ok, 'Ok', total, results_permissive, results_strict)
print_stat_line(EDNSResult.compatible, 'Compatible', total, results_permissive, results_strict)
print_stat_line(EDNSResult.high_latency, 'High latency', total, results_permissive, results_strict)
print_stat_line(EDNSResult.dead, 'Dead', total, results_permissive, results_strict)
summary = ResultTable(total, results_permissive, results_strict)
return summary, results_strict, results_permissive
def save_pickle(results, mode: str) -> None:
logging.info('pickling results for EDNS {} mode'.format(mode))
with open('results_{}.pickle'.format(mode), 'wb') as results_bin:
pickle.dump(results, results_bin)
def save_summary(summary) -> None:
with open('summary.txt', 'w') as summary_file:
summary_file.write(summary.text)
with open('summary.csv', 'w') as summary_file:
summary_file.write(summary.csv)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
main()
summary, results_strict, results_permissive = evaluate(*load())
save_pickle(results_strict, 'strict')
save_pickle(results_permissive, 'permissive')
save_summary(summary)
print(summary.text)
......@@ -30,7 +30,7 @@ def generate(nsname2ipset, domain2ipset):
if ipaddr in ip_done:
continue
nsname = get_ns_name_from_ip(ipaddr, nsname2ipset)
yield '{} {} {}'.format(domain, nsname, ipaddr)
yield '{} {} {}\n'.format(domain, nsname, ipaddr)
ip_done.add(ipaddr)
def main():
......@@ -41,7 +41,7 @@ def main():
domain2ipset = dataapi.load_domain2ipset()
for genreport_line in generate(nsname2ipset, domain2ipset):
print(genreport_line)
print(genreport_line, end='')
if __name__ == "__main__":
main()
......@@ -60,32 +60,34 @@ def update_mapping(nsnames: Set[dns.name.Name],
dns.resolver.default_resolver.lifetime = 5 # seconds
#dns.resolver.default_resolver.nameservers = ['193.29.206.206']
try:
with Pool(processes=128) as p:
i = 0
logging.info('starting DNS query machinery')
for nsname, ipset in p.imap_unordered(get_ips, yield_ns_name(nsnames, mapping), chunksize=10):
i += 1
if i % 100 == 0:
logging.info('queried %d names; %s out of %s NS names resolved to an IP address (%0.2f %%)',
i, len(mapping), len(nsnames), len(mapping)/len(nsnames)*100)
if ipset:
mapping[nsname] = ipset
with Pool(processes=128) as p:
i = 0
logging.info('starting DNS query machinery')
for nsname, ipset in p.imap_unordered(get_ips, yield_ns_name(nsnames, mapping), chunksize=10):
i += 1
if i % 100 == 0:
logging.info('queried %d names; %s out of %s NS names resolved to an IP address (%0.2f %%)',
i, len(mapping), len(nsnames), len(mapping)/len(nsnames)*100)
if ipset:
mapping[nsname] = ipset
def save(mapping: Dict[dns.name.Name, Set[str]]) -> None:
logging.info('writting %d results', len(mapping))
pickle.dump(mapping, open('nsname2ipset.pickle', 'wb'))
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
nsnames = load_nsnames()
mapping = load_nsname2ipset()
try:
update_mapping(nsnames, mapping)
except Exception:
logging.exception('exception while resolving names to IP addresses')
raise
finally: # attempt to salvage partial results
logging.info('writting %d results', len(mapping))
pickle.dump(mapping, open('nsname2ipset.pickle', 'wb'))
logging.info('%s out of %s NS names resolved to an IP address (%0.2f %%)',
len(mapping), len(nsnames), len(mapping)/len(nsnames)*100)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
nsnames = load_nsnames()
mapping = load_nsname2ipset()
update_mapping(nsnames, mapping)
save(mapping)
# machine readable output: # of NS names resolved to an IP address
print(len(mapping))
# machine readable output: total # of NS names
......
#!/usr/bin/python3
import datetime
import logging
import subprocess
import sys
def repeat_genreport(cycles: int):
logging.info('Hint: use tail -f to monitor progress of individual genreport runs')
with open('ednscomp.input', 'rb') as ednscomp_input:
for cycle in range(1, cycles + 1):
with open('ednscompresult-{}'.format(datetime.datetime.utcnow().isoformat()),
'wb') as ednscomp_output:
logging.info('genreport round {} / {}, output file {}'.format(
cycle, cycles, ednscomp_output.name))
subprocess.run(check=True, stdin=ednscomp_input, stdout=ednscomp_output,
args=['genreport', '-m', '500', '-p'])
ednscomp_input.seek(0)
def check_env():
# check if genreport is in PATH and can be executed
try:
subprocess.run(check=True, input='', args=['genreport'])
except:
logging.exception('unable to execute genreport tool, make sure it is in PATH')
raise SystemExit('fatal error: genreport is required')
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
try:
cycles = int(sys.argv[1])
except IndexError:
cycles = 10
except ValueError:
logging.critical('Usage: %s [number_of_genreport_rounds]', sys.argv[0])
sys.exit(1)
check_env()
repeat_genreport(cycles)
\ No newline at end of file
#!/bin/bash
set -o nounset
N=${1:-10}
# test that genreport exists in PATH and is executable
echo "" | genreport
if [ "$?" -ne 0 ]
then
echo "cannot execute genreport binary, check if it is installed in PATH"
exit 1
fi
set -o errexit
echo "Hint: use tail -f to monitor progress of individual genreport runs"
for ROUND in $(seq 1 ${N})
do
OUTFILENAME="ednscompresult-$(date -Iseconds)"
echo "genreport round ${ROUND} / ${N}, output file ${OUTFILENAME}"
time genreport -m 500 -p < ednscomp.input > "${OUTFILENAME}"
ls -sh "${OUTFILENAME}"
done
echo "genreport scan finished ${N} times, proceed with ednscomp2pickle.py"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment