Commit 2b2a03b4 authored by Petr Špaček's avatar Petr Špaček

diffrepro: tool to measure reproducibility of mismatches detected by msgdiff

Some mismatches detected by msgdiff are not readily reproducible and
sieving output from diffsum by hand is time consuming.

The diffrepro tool walks through all differences in sub-database "diffs"
and re-queries specified resolvers. Answers are compared using the very
same algorithm as in msgdiff and diff from new answers is compared
with respective diff stored in "diffs" DB.

Information whether new answers from other resolves match
and if comparison with target resolver lead to the same diff as stored
from previous runs is stored into "reprostats" sub-database.

This tool can be re-run multiple times to gather better statistics.
Results from current run will be added to results from all the previous
runs.

Data in the "reprostats" DB can be later read by "reprosum" tool which
produces list of queries leading to reproducible failures.
This ability will be later folded into "diffsum" tool.
parent 69995b70
import pickle
import subprocess
import sys
import lmdb
import dbhelper
import diffsum
from msgdiff import DataMismatch # needed for unpickling
import msgdiff
import orchestrator
import sendrecv
def open_db(envdir):
config = dbhelper.env_open.copy()
config.update({
'path': envdir,
'readonly': False,
'create': False
})
lenv = lmdb.Environment(**config)
qdb = lenv.open_db(key=b'queries', create=False, **dbhelper.db_open)
ddb = lenv.open_db(key=b'diffs', create=False, **dbhelper.db_open)
reprodb = lenv.open_db(key=b'reprostats', create=True, **dbhelper.db_open)
return lenv, qdb, ddb, reprodb
def load_stats(lenv, reprodb, qid):
"""(count, others_agreed, diff_matched)"""
with lenv.begin() as txn:
stats_bin = txn.get(qid, db=reprodb)
if stats_bin:
stats = pickle.loads(stats_bin)
else:
stats = (0, 0, 0)
assert len(stats) == 3
assert stats[0] >= stats[1] >= stats[2]
return stats[0], stats[1], stats[2]
def save_stats(lenv, reprodb, qid, stats):
assert len(stats) == 3
assert stats[0] >= stats[1] >= stats[2]
stats_bin = pickle.dumps(stats)
with lenv.begin(write=True) as txn:
txn.put(qid, stats_bin, db=reprodb)
def main():
criteria = ['opcode', 'rcode', 'flags', 'question', 'qname', 'qtype', 'answertypes', 'answerrrsigs'] # FIXME
selector, sockets = sendrecv.sock_init(orchestrator.resolvers)
lenv, qdb, ddb, reprodb = open_db(sys.argv[1])
diff_stream = diffsum.read_diffs_lmdb(lenv, qdb, ddb)
for qid, qwire, orig_others_agree, orig_diffs in diff_stream:
if not orig_others_agree:
continue # others do not agree, nothing to verify
# restart script
subprocess.check_call([sys.argv[2]])
# others agree, verify if answers are stable and the diff is reproducible
retries, upstream_stable, diff_matches = load_stats(lenv, reprodb, qid)
wire_blobs = sendrecv.send_recv_parallel(qwire, selector, sockets, orchestrator.timeout)
answers = msgdiff.decode_wire_dict(wire_blobs)
new_others_agree, new_diffs = msgdiff.compare(answers, criteria, 'kresd') # FIXME
retries += 1
if orig_others_agree == new_others_agree:
upstream_stable += 1
if orig_diffs == new_diffs:
diff_matches += 1
print(qid, (retries, upstream_stable, diff_matches))
save_stats(lenv, reprodb, qid, (retries, upstream_stable, diff_matches))
if __name__ == '__main__':
main()
......@@ -9,7 +9,7 @@ import lmdb
import dbhelper
from msgdiff import DataMismatch # needed for unpickling
def process_diff(field_weights, field_stats, question, diff):
def process_diff(field_weights, field_stats, qwire, diff):
found = False
for field in field_weights:
if field in diff:
......@@ -19,6 +19,9 @@ def process_diff(field_weights, field_stats, question, diff):
if significant_field == 'answer':
return
qmsg = dns.message.from_wire(qwire)
question = (qmsg.question[0].name, qmsg.question[0].rdtype)
field_mismatches = field_stats.setdefault(field, {})
mismatch = diff[significant_field]
mismatch_key = (mismatch.exp_val, mismatch.got_val)
......@@ -37,7 +40,7 @@ def process_results(field_weights, diff_generator):
field_stats = {}
#print('diffs = {')
for qid, question, others_agree, target_diff in diff_generator:
for qid, qwire, others_agree, target_diff in diff_generator:
#print(qid, others_agree, target_diff)
if not others_agree:
global_stats['others_disagree'] += 1
......@@ -50,7 +53,7 @@ def process_results(field_weights, diff_generator):
#print(target_diff, ',')
global_stats['target_disagrees'] += 1
process_diff(field_weights, field_stats, question, target_diff)
process_diff(field_weights, field_stats, qwire, target_diff)
#print('}')
return global_stats, field_stats
......@@ -168,13 +171,8 @@ def read_diffs_lmdb(levn, qdb, ddb):
with txn.cursor(ddb) as diffcur:
for qid, diffblob in diffcur:
others_agree, diff = pickle.loads(diffblob)
if others_agree:
qwire = txn.get(qid, db=qdb)
qmsg = dns.message.from_wire(qwire)
question = (qmsg.question[0].name, qmsg.question[0].rdtype)
else:
question = None
yield (qid, question, others_agree, diff)
qwire = txn.get(qid, db=qdb)
yield (qid, qwire, others_agree, diff)
def main():
lenv, qdb, adb, ddb = open_db(sys.argv[1])
......
......@@ -151,23 +151,27 @@ def match(expected, got, match_fields):
yield (code, ex)
def read_answers_lmdb(lenv, db, qid):
def decode_wire_dict(wire_dict):
assert isinstance(wire_dict, dict)
answers = {}
for k, v in wire_dict.items():
# decode bytes to dns.message objects
#if isinstance(v, bytes):
# convert from wire format to DNS message object
try:
answers[k] = dns.message.from_wire(v)
except Exception as ex:
#answers[k] = ex # decoding failed, record it!
continue
return answers
def read_answers_lmdb(lenv, db, qid):
with lenv.begin(db) as txn:
blob = txn.get(qid)
assert blob
blob_dict = pickle.loads(blob)
assert isinstance(blob_dict, dict)
for k, v in blob_dict.items():
# decode bytes to dns.message objects
#if isinstance(v, bytes):
# convert from wire format to DNS message object
try:
answers[k] = dns.message.from_wire(v)
except Exception as ex:
#answers[k] = ex # decoding failed, record it!
continue
return answers
assert blob
wire_dict = pickle.loads(blob)
return decode_wire_dict(wire_dict)
def diff_pair(answers, criteria, name1, name2):
......@@ -189,10 +193,7 @@ def transitive_equality(answers, criteria, resolvers):
res_others))
def compare(target, qid, criteria):
global lenv
global answers_db
answers = read_answers_lmdb(lenv, answers_db, qid)
def compare(answers, criteria, target):
others = list(answers.keys())
try:
others.remove(target)
......@@ -250,7 +251,8 @@ def compare_lmdb_wrapper(qid):
global i
#global prof
#return compare(target, workdir, criteria)
others_agree, target_diffs = compare(target, qid, criteria)
answers = read_answers_lmdb(lenv, answers_db, qid)
others_agree, target_diffs = compare(answers, criteria, target)
if others_agree and not target_diffs:
return # all agreed, nothing to write
blob = pickle.dumps((others_agree, target_diffs))
......
import pickle
import subprocess
import sys
import dns.message
import lmdb
import dbhelper
import diffsum
from msgdiff import DataMismatch # needed for unpickling
import msgdiff
import orchestrator
import sendrecv
def open_db(envdir):
config = dbhelper.env_open.copy()
config.update({
'path': envdir,
'readonly': False,
'create': False
})
lenv = lmdb.Environment(**config)
qdb = lenv.open_db(key=b'queries', create=False, **dbhelper.db_open)
ddb = lenv.open_db(key=b'diffs', create=False, **dbhelper.db_open)
reprodb = lenv.open_db(key=b'reprostats', create=True, **dbhelper.db_open)
return lenv, qdb, ddb, reprodb
def load_stats(lenv, reprodb, qid):
"""(count, others_agreed, diff_matched)"""
with lenv.begin() as txn:
stats_bin = txn.get(qid, db=reprodb)
if stats_bin:
stats = pickle.loads(stats_bin)
else:
stats = (0, 0, 0)
assert len(stats) == 3
assert stats[0] >= stats[1] >= stats[2]
return stats[0], stats[1], stats[2]
def read_repro_lmdb(levn, qdb, reprodb):
with levn.begin() as txn:
with txn.cursor(reprodb) as diffcur:
for qid, reproblob in diffcur:
(count, others_agreed, diff_matched) = pickle.loads(reproblob)
qwire = txn.get(qid, db=qdb)
yield (qid, qwire, (count, others_agreed, diff_matched))
def main():
lenv, qdb, ddb, reprodb = open_db(sys.argv[1])
repro_stream = read_repro_lmdb(lenv, qdb, reprodb)
for qid, qwire, (count, others_agreed, diff_matched) in repro_stream:
if not count == others_agreed == diff_matched:
continue
try:
qmsg = dns.message.from_wire(qwire)
except:
continue
print(qmsg.question[0])
if __name__ == '__main__':
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment