Commits (3)
......@@ -8,9 +8,11 @@ script sends queries to IP addresses from glue.
import collections
import enum
import itertools
import logging
import multiprocessing
import pickle
import random
from typing import Counter, Deque, Dict, Iterable, Set, Tuple
import dns.message
......@@ -154,12 +156,35 @@ def save(domain2nsset, netstats, domain2ipset):
logging.info('%s out of %s domains has at least one working NS (%0.2f %%)',
len(domain2ipset), len(domain2nsset), len(domain2ipset)/len(domain2nsset)*100)
def randomize_iter(iterable, window_len: int):
"""Randomize order of iteration over an iterable using fixed window."""
assert window_len > 0
# buffer up to "window_len" items
window = [True]
while window:
window = list(itertools.islice(iterable, window_len))
yield from window
def update_mapping(domain2nsset, nsname2ipset, netstats, domain2ipset):
retry_queue = collections.deque() # type: Deque[Tuple[int, dns.name.Name, str]]
#logging.info('computing number of candidates to query')
#candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
#logging.info('queue contains %s queries to be checked', count_candidates(candidates))
candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
# It is useful to avoid querying the same IP address 100 times in a row
# because we could send all 100 queries in parallel and blocking all
# threads on waiting for (potential) timeout.
# With a sufficiently large window we should detect a dead IP address
# before overshooting NetStats.timeouts_in_row limit and limit time wasted on timeouts.
# It could also help as a workaround to agressive response rate limiting.
candidates = randomize_iter(
gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset),
with multiprocessing.Pool(processes = 30) as pool:
for attempt, domain, ip, state in pool.imap_unordered(check_availability, candidates):
process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipset)
......@@ -14,7 +14,7 @@ def repeat_genreport(cycles: int):
logging.info('genreport round {} / {}, output file {}'.format(
cycle, cycles, ednscomp_output.name))
subprocess.run(check=True, stdin=ednscomp_input, stdout=ednscomp_output,
args=['genreport', '-m', '500', '-p'])
args=['genreport', '-m', '500', '-p', '-s'])
def check_env():
......@@ -36,4 +36,4 @@ if __name__ == '__main__':
\ No newline at end of file