sumstat.py 1.41 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#!/usr/bin/env python3

import argparse
import logging
import sys

from respdiff import cli
from respdiff.stats import SummaryStatistics


11 12
def _log_threshold(stats, label):
    percentile_rank = stats.get_percentile_rank(stats.threshold)
13 14 15
    logging.info('  %s: %4.2f percentile rank', label, percentile_rank)


16 17 18
def main():
    cli.setup_logging()
    parser = argparse.ArgumentParser(description='generate statistics file from reports')
19
    cli.add_arg_report_filename(parser)
20
    cli.add_arg_stats_filename(parser)
21 22

    args = parser.parse_args()
23 24 25 26 27 28 29

    reports = []
    for filename in args.report:
        report = cli.read_report(filename, skip_empty=True)
        if report is not None:
            reports.append(report)
    summaries = cli.load_summaries(reports, skip_empty=True)
30 31 32 33 34

    if not summaries:
        logging.critical('No summaries found in reports!')
        sys.exit(1)

35
    sumstats = SummaryStatistics(summaries)
36 37

    logging.info('Total sample size: %d', len(summaries))
38
    logging.info('Upper boundaries:')
39 40 41
    _log_threshold(sumstats.target_disagreements, 'target_disagreements')
    _log_threshold(sumstats.upstream_unstable, 'upstream_unstable')
    _log_threshold(sumstats.not_reproducible, 'not_reproducible')
42
    for field_name, mismatch_stats in sumstats.fields.items():
43
        _log_threshold(mismatch_stats.total, field_name)
44 45

    sumstats.export_json(args.stats_filename)
46 47 48 49


if __name__ == '__main__':
    main()