diffsum.py 8.06 KB
Newer Older
1 2 3
#!/usr/bin/env python3

import argparse
4
import collections
5
import logging
6
import pickle
7
import sys
8 9 10

import dns.rdatatype

11
import cfg
12
from dbhelper import LMDB
Tomas Krizek's avatar
Tomas Krizek committed
13
from msgdiff import DataMismatch  # NOQA: needed for unpickling
14

15

16
def process_diff(field_weights, field_stats, qwire, diff):
17 18 19 20 21 22 23 24
    for field in field_weights:
        if field in diff:
            significant_field = field
            break
    assert significant_field  # field must be in field_weights
    if significant_field == 'answer':
        return

25 26 27
    qmsg = dns.message.from_wire(qwire)
    question = (qmsg.question[0].name, qmsg.question[0].rdtype)

Tomas Krizek's avatar
Tomas Krizek committed
28
    field_mismatches = field_stats.setdefault(field, {})  # pylint: disable=undefined-loop-variable
29 30 31 32 33 34
    mismatch = diff[significant_field]
    mismatch_key = (mismatch.exp_val, mismatch.got_val)
    mismatch_counter = field_mismatches.setdefault(mismatch_key, collections.Counter())
    mismatch_counter[question] += 1


35
# FIXME: this code is ugly, refactor it
36
def process_results(field_weights, diff_generator):
37 38 39
    """
    field_stats { field: value_stats { (exp, got): Counter(queries) } }
    """
40
    global_stats = {
41 42
        'others_disagree': 0,
        'target_disagrees': 0,
43
    }
44
    field_stats = {}
45

Tomas Krizek's avatar
Tomas Krizek committed
46
    for _, qwire, others_agree, target_diff in diff_generator:
47
        if not others_agree:
48
            global_stats['others_disagree'] += 1
49 50
            continue

51 52 53
        if not target_diff:  # everybody agreed, nothing to count
            continue

54
        global_stats['target_disagrees'] += 1
55
        process_diff(field_weights, field_stats, qwire, target_diff)
56

57 58
    return global_stats, field_stats

59

60 61 62 63
def combine_stats(counters):
    field_mismatch_sums = {}
    for field in counters:
        field_mismatch_sums[field] = collections.Counter(
64 65
            {mismatch: sum(counter.values())
             for mismatch, counter in counters[field].items()})
66

67 68 69 70 71
    field_sums = collections.Counter(
        {field: sum(counter.values())
         for field, counter in field_mismatch_sums.items()})
    return field_sums, field_mismatch_sums

72

73 74 75
def mismatch2str(mismatch):
    if not isinstance(mismatch[0], str):
        return (' '.join(mismatch[0]), ' '.join(mismatch[1]))
Tomas Krizek's avatar
Tomas Krizek committed
76
    return mismatch
77

78

79 80 81
def maxlen(iterable):
    return max(len(str(it)) for it in iterable)

82

83
def print_results(gstats, field_weights, counters, n=10):
84
    # global stats
85
    field_sums, field_mismatch_sums = combine_stats(counters)
86

87 88 89
    maxcntlen = maxlen(gstats.values())
    others_agree = gstats['answers'] - gstats['others_disagree']
    target_disagrees = gstats['target_disagrees']
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112

    global_report = '\n'.join([
        '== Global statistics',
        'duration           {duration:{ml}} s',
        'queries            {queries:{ml}}',
        'answers            {answers:{ml}}    {answers_pct:6.2f} % of queries',
        ('others agree       {oth_agr:{ml}}    {oth_agr_pct:6.2f} % of answers'
         '(ignoring {oth_agr_ignore_pct:.2f} % of answers)'),
        ('target disagrees   {tgt_disagr:{ml}}    {tgt_disagr_pct:6.2f} % of '
         'matching answers from others')
    ])

    print(global_report.format(
        ml=maxcntlen,
        duration=gstats['duration'],
        queries=gstats['queries'],
        answers=gstats['answers'],
        answers_pct=100.0 * gstats['answers'] / gstats['queries'],
        oth_agr=others_agree,
        oth_agr_pct=100.0 * others_agree / gstats['answers'],
        oth_agr_ignore_pct=100.0 * gstats['others_disagree'] / gstats['answers'],
        tgt_disagr=gstats['target_disagrees'],
        tgt_disagr_pct=100.0 * gstats['target_disagrees'] / others_agree))
113

114 115
    if not field_sums.keys():
        return
116
    print('')
117 118 119 120
    # print('== Field statistics: field - count - % of mismatches')
    maxnamelen = maxlen(field_sums.keys())
    maxcntlen = maxlen(field_sums.values())
    print('== {:{}}    {:{}}    {}'.format(
121
        'Field', maxnamelen - (len('count') - maxcntlen),
122 123 124
        'count', maxcntlen,
        '% of mismatches'))

Tomas Krizek's avatar
Tomas Krizek committed
125
    for field, count in field_sums.most_common():
126
        print('{:{}}    {:{}}     {:3.0f} %'.format(
127
            field, maxnamelen + 3,
128
            count, maxcntlen + 3, 100.0 * count / target_disagrees))
129 130

    for field in field_weights:
131
        if field not in field_mismatch_sums:
132 133 134
            continue
        print('')
        print('== Field "%s" mismatch statistics' % field)
135
        maxvallen = max((max(len(str(mismatch2str(mism)[0])), len(str(mismatch2str(mism)[1])))
136
                         for mism in field_mismatch_sums[field].keys()))
137 138 139
        maxcntlen = maxlen(field_mismatch_sums[field].values())
        print('{:{}}  !=  {:{}}    {:{}}    {}'.format(
            'Expected', maxvallen,
140 141 142
            'Got',
            (maxvallen - (len('count') - maxcntlen)) if maxvallen - (len('count') - maxcntlen) > 1
            else 1,
143 144
            'count', maxcntlen,
            '% of mismatches'
145
        ))
Tomas Krizek's avatar
Tomas Krizek committed
146
        for mismatch, count in field_mismatch_sums[field].most_common():
147 148 149 150
            mismatch = mismatch2str(mismatch)
            print('{:{}}  !=  {:{}}    {:{}}    {:3.0f} %'.format(
                str(mismatch[0]), maxvallen,
                str(mismatch[1]), maxvallen,
151
                count, maxcntlen,
Tomas Krizek's avatar
Tomas Krizek committed
152
                100.0 * count / target_disagrees))
153 154

    for field in field_weights:
155
        if field not in counters:
156
            continue
Tomas Krizek's avatar
Tomas Krizek committed
157
        for mismatch, count in field_mismatch_sums[field].most_common():
158 159 160 161
            display_limit = count if n == 0 else n
            limit_msg = ''
            if display_limit < count:
                limit_msg = ' (displaying {} out of {} results)'.format(display_limit, count)
162
            print('')
163
            print('== Field "%s" mismatch %s query details%s' % (field, mismatch, limit_msg))
164
            counter = counters[field][mismatch]
165
            print_field_queries(counter, display_limit)
166

167

Tomas Krizek's avatar
Tomas Krizek committed
168
def print_field_queries(counter, n):
169
    for query, count in counter.most_common(n):
170 171
        qname, qtype = query
        qtype = dns.rdatatype.to_text(qtype)
172
        print("%s %s\t\t%s mismatches" % (qname, qtype, count))
173

174

175 176 177 178
def read_diffs_lmdb(lmdb):
    qdb = lmdb.get_db(LMDB.QUERIES)
    ddb = lmdb.get_db(LMDB.DIFFS)
    with lmdb.env.begin() as txn:
179 180 181
        with txn.cursor(ddb) as diffcur:
            for qid, diffblob in diffcur:
                others_agree, diff = pickle.loads(diffblob)
182
                qwire = txn.get(qid, db=qdb)
183
                yield qid, qwire, others_agree, diff
184

185

186
def main():
187 188 189 190 191 192
    logging.basicConfig(format='%(levelname)s %(message)s', level=logging.DEBUG)
    parser = argparse.ArgumentParser(
        description='read queries from LMDB, send them in parallel to servers '
                    'listed in configuration file, and record answers into LMDB')
    parser.add_argument('-c', '--config', default='respdiff.cfg', dest='cfgpath',
                        help='config file (default: respdiff.cfg)')
193 194 195
    parser.add_argument('-l', '--limit', type=int, default=10,
                        help='number of displayed mismatches in fields (default: 10; '
                             'use 0 to display all)')
196 197 198 199 200 201
    parser.add_argument('envdir', type=str,
                        help='LMDB environment to read queries and answers from')
    args = parser.parse_args()
    config = cfg.read_cfg(args.cfgpath)
    field_weights = config['report']['field_weights']

202 203 204 205 206 207 208 209 210 211
    with LMDB(args.envdir, readonly=True) as lmdb:
        qdb = lmdb.open_db(LMDB.QUERIES)
        adb = lmdb.open_db(LMDB.ANSWERS)
        lmdb.open_db(LMDB.DIFFS)
        sdb = lmdb.open_db(LMDB.STATS)
        diff_stream = read_diffs_lmdb(lmdb)
        global_stats, field_stats = process_results(field_weights, diff_stream)
        with lmdb.env.begin() as txn:
            global_stats['queries'] = txn.stat(qdb)['entries']
            global_stats['answers'] = txn.stat(adb)['entries']
212 213 214
        if global_stats['answers'] == 0:
            logging.error('No answers in DB!')
            sys.exit(1)
215 216
        with lmdb.env.begin(sdb) as txn:
            stats = pickle.loads(txn.get(b'global_stats'))
217
    global_stats['duration'] = stats['end_time'] - stats['start_time']
218
    print_results(global_stats, field_weights, field_stats, n=args.limit)
219

220

221 222
if __name__ == '__main__':
    main()