diffsum.py 3.98 KB
Newer Older
1 2 3 4
#!/usr/bin/env python3

import argparse
import logging
5
import sys
6
from typing import (  # noqa
7
    Any, Callable, Iterable, Iterator, ItemsView, List, Set, Sequence, Tuple,
8
    Union)
9

10
from respdiff import cli
11
from respdiff.database import LMDB
12 13 14
from respdiff.dataformat import DiffReport, Summary
from respdiff.query import (
    convert_queries, get_printable_queries_format, get_query_iterator)
15

16

17 18 19
DEFAULT_LIMIT = 10
GLOBAL_STATS_FORMAT = '{:21s}   {:>8}'
GLOBAL_STATS_PCT_FORMAT = '{:21s}   {:8d}   {:5.2f} % {:s}'
20

21 22

def print_global_stats(report: DiffReport) -> None:
23 24
    if report.total_answers is None or report.total_queries is None:
        raise RuntimeError("Report doesn't contain sufficient data to print statistics!")
25
    print('== Global statistics')
26 27
    if report.duration is not None:
        print(GLOBAL_STATS_FORMAT.format('duration', '{:d} s'.format(report.duration)))
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
    print(GLOBAL_STATS_FORMAT.format('queries', report.total_queries))
    print(GLOBAL_STATS_PCT_FORMAT.format(
        'answers', report.total_answers,
        report.total_answers * 100.0 / report.total_queries, 'of queries'))
    print('')


def print_differences_stats(summary: Summary, total_answers: int) -> None:
    print('== Differences statistics')
    print(GLOBAL_STATS_PCT_FORMAT.format(
        'upstream unstable', summary.upstream_unstable,
        summary.upstream_unstable * 100.0 / total_answers, 'of answers (ignoring)'))
    if summary.not_reproducible:
        print(GLOBAL_STATS_PCT_FORMAT.format(
            'not 100% reproducible', summary.not_reproducible,
            summary.not_reproducible * 100.0 / total_answers, 'of answers (ignoring)'))
    print(GLOBAL_STATS_PCT_FORMAT.format(
        'target disagrees', len(summary),
        len(summary) * 100. / summary.usable_answers,
        'of not ignored answers'))
    print('')


51
def main():
52
    cli.setup_logging()
53
    parser = argparse.ArgumentParser(
54 55 56 57 58
        description='create a summary report from gathered data stored in LMDB '
                    'and JSON datafile')
    cli.add_arg_envdir(parser)
    cli.add_arg_config(parser)
    cli.add_arg_datafile(parser)
59
    cli.add_arg_limit(parser)
60 61
    parser.add_argument('--without-diffrepro', action='store_true',
                        help='omit reproducibility data from summary')
62

63
    args = parser.parse_args()
64 65 66
    datafile = cli.get_datafile(args)
    report = DiffReport.from_json(datafile)
    field_weights = args.cfg['report']['field_weights']
67

68 69 70 71 72 73 74
    if not report.total_answers:
        logging.error('No answers in DB!')
        sys.exit(1)
    if report.target_disagreements is None:
        logging.error('JSON report is missing diff data! Did you forget to run msgdiff?')
        sys.exit(1)

75
    report = DiffReport.from_json(datafile)
76 77
    report.summary = Summary.from_report(
        report, field_weights,
78
        without_diffrepro=args.without_diffrepro)
79

80 81
    cli.print_global_stats(report)
    cli.print_differences_stats(report)
82

83
    if report.summary:  # when there are any differences to report
84
        field_counters = report.summary.get_field_counters()
85
        cli.print_fields_overview(field_counters, len(report.summary))
86 87
        for field in field_weights:
            if field in report.summary.field_labels:
88 89
                cli.print_field_mismatch_stats(
                    field, field_counters[field], len(report.summary))
90 91 92 93

        # query details
        with LMDB(args.envdir, readonly=True) as lmdb:
            lmdb.open_db(LMDB.QUERIES)
94

95 96 97
            for field in field_weights:
                if field in report.summary.field_labels:
                    for mismatch, qids in report.summary.get_field_mismatches(field):
98 99 100 101 102 103
                        queries = convert_queries(get_query_iterator(lmdb, qids))
                        cli.print_mismatch_queries(
                            field,
                            mismatch,
                            get_printable_queries_format(queries),
                            args.limit)
104

105
    report.export_json(datafile)
106

107

108 109
if __name__ == '__main__':
    main()