sumstat.py 1.35 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#!/usr/bin/env python3

import argparse
import logging
import sys

from respdiff import cli
from respdiff.stats import SummaryStatistics


11 12
def _log_threshold(stats, label):
    percentile_rank = stats.get_percentile_rank(stats.threshold)
13 14 15
    logging.info('  %s: %4.2f percentile rank', label, percentile_rank)


16 17 18
def main():
    cli.setup_logging()
    parser = argparse.ArgumentParser(description='generate statistics file from reports')
19
    cli.add_arg_report_filename(parser)
20
    cli.add_arg_stats_filename(parser)
21 22

    args = parser.parse_args()
23 24 25 26 27 28

    reports = []
    for filename in args.report:
        report = cli.read_report(filename, skip_empty=True)
        if report is not None:
            reports.append(report)
29

30 31 32 33
    try:
        sumstats = SummaryStatistics(reports)
    except ValueError as exc:
        logging.critical(exc)
34 35
        sys.exit(1)

36
    logging.info('Total sample size: %d', sumstats.sample_size)
37
    logging.info('Upper boundaries:')
38 39 40
    _log_threshold(sumstats.target_disagreements, 'target_disagreements')
    _log_threshold(sumstats.upstream_unstable, 'upstream_unstable')
    _log_threshold(sumstats.not_reproducible, 'not_reproducible')
41
    for field_name, mismatch_stats in sumstats.fields.items():
42
        _log_threshold(mismatch_stats.total, field_name)
43 44

    sumstats.export_json(args.stats_filename)
45 46 47 48


if __name__ == '__main__':
    main()