Commit 4c122db5 authored by Tomas Krizek's avatar Tomas Krizek

diffsum: add support to omit unstable/failing queries

parent d64f8e63
...@@ -14,40 +14,6 @@ from respdiff.query import ( ...@@ -14,40 +14,6 @@ from respdiff.query import (
convert_queries, get_printable_queries_format, get_query_iterator) convert_queries, get_printable_queries_format, get_query_iterator)
DEFAULT_LIMIT = 10
GLOBAL_STATS_FORMAT = '{:21s} {:>8}'
GLOBAL_STATS_PCT_FORMAT = '{:21s} {:8d} {:5.2f} % {:s}'
def print_global_stats(report: DiffReport) -> None:
if report.total_answers is None or report.total_queries is None:
raise RuntimeError("Report doesn't contain sufficient data to print statistics!")
print('== Global statistics')
if report.duration is not None:
print(GLOBAL_STATS_FORMAT.format('duration', '{:d} s'.format(report.duration)))
print(GLOBAL_STATS_FORMAT.format('queries', report.total_queries))
print(GLOBAL_STATS_PCT_FORMAT.format(
'answers', report.total_answers,
report.total_answers * 100.0 / report.total_queries, 'of queries'))
print('')
def print_differences_stats(summary: Summary, total_answers: int) -> None:
print('== Differences statistics')
print(GLOBAL_STATS_PCT_FORMAT.format(
'upstream unstable', summary.upstream_unstable,
summary.upstream_unstable * 100.0 / total_answers, 'of answers (ignoring)'))
if summary.not_reproducible:
print(GLOBAL_STATS_PCT_FORMAT.format(
'not 100% reproducible', summary.not_reproducible,
summary.not_reproducible * 100.0 / total_answers, 'of answers (ignoring)'))
print(GLOBAL_STATS_PCT_FORMAT.format(
'target disagrees', len(summary),
len(summary) * 100. / summary.usable_answers,
'of not ignored answers'))
print('')
def main(): def main():
cli.setup_logging() cli.setup_logging()
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
...@@ -57,14 +23,23 @@ def main(): ...@@ -57,14 +23,23 @@ def main():
cli.add_arg_config(parser) cli.add_arg_config(parser)
cli.add_arg_datafile(parser) cli.add_arg_datafile(parser)
cli.add_arg_limit(parser) cli.add_arg_limit(parser)
cli.add_arg_stats_filename(parser, default='')
parser.add_argument('--without-diffrepro', action='store_true', parser.add_argument('--without-diffrepro', action='store_true',
help='omit reproducibility data from summary') help='omit reproducibility data from summary')
parser.add_argument('--without-ref-unstable', action='store_true',
help='omit unstable reference queries from summary')
parser.add_argument('--without-ref-failing', action='store_true',
help='omit failing reference queries from summary')
args = parser.parse_args() args = parser.parse_args()
datafile = cli.get_datafile(args) datafile = cli.get_datafile(args)
report = DiffReport.from_json(datafile) report = DiffReport.from_json(datafile)
field_weights = args.cfg['report']['field_weights'] field_weights = args.cfg['report']['field_weights']
if (args.without_ref_unstable or args.without_ref_failing) \
and not args.stats_filename:
logging.critical("Statistics file must be provided as a reference.")
sys.exit(1)
if not report.total_answers: if not report.total_answers:
logging.error('No answers in DB!') logging.error('No answers in DB!')
sys.exit(1) sys.exit(1)
...@@ -72,10 +47,23 @@ def main(): ...@@ -72,10 +47,23 @@ def main():
logging.error('JSON report is missing diff data! Did you forget to run msgdiff?') logging.error('JSON report is missing diff data! Did you forget to run msgdiff?')
sys.exit(1) sys.exit(1)
ignore_qids = set()
if args.without_ref_unstable or args.without_ref_failing:
try:
stats = cli.read_stats(args.stats_filename)
except ValueError as exc:
logging.critical(str(exc))
sys.exit(1)
if args.without_ref_unstable:
ignore_qids.update(stats.queries.unstable)
if args.without_ref_failing:
ignore_qids.update(stats.queries.failing)
report = DiffReport.from_json(datafile) report = DiffReport.from_json(datafile)
report.summary = Summary.from_report( report.summary = Summary.from_report(
report, field_weights, report, field_weights,
without_diffrepro=args.without_diffrepro) without_diffrepro=args.without_diffrepro,
ignore_qids=ignore_qids)
cli.print_global_stats(report) cli.print_global_stats(report)
cli.print_differences_stats(report) cli.print_differences_stats(report)
......
...@@ -30,6 +30,9 @@ The report uses the following terms: ...@@ -30,6 +30,9 @@ The report uses the following terms:
between the answer from ``target`` server and the others server, and the between the answer from ``target`` server and the others server, and the
other servers agree on the answer (there is no difference between them). other servers agree on the answer (there is no difference between them).
These are the most interesting cases that are analysed further. These are the most interesting cases that are analysed further.
- *manually ignored* is the number of queries which were ommitted from the
report by using `--without-ref-failing` or `--without-ref-unstable` along
with a reference statistics file
The summary evaluates how many *target disagreements* there were in particular The summary evaluates how many *target disagreements* there were in particular
*fields* (or ``criteria``), and what did these mismatches look like. It produces *fields* (or ``criteria``), and what did these mismatches look like. It produces
......
...@@ -298,6 +298,7 @@ def print_global_stats(report: DiffReport, reference: DiffReport = None) -> None ...@@ -298,6 +298,7 @@ def print_global_stats(report: DiffReport, reference: DiffReport = None) -> None
def print_differences_stats(report: DiffReport, reference: DiffReport = None) -> None: def print_differences_stats(report: DiffReport, reference: DiffReport = None) -> None:
ref_summary = getattr(reference, 'summary', None) ref_summary = getattr(reference, 'summary', None)
ref_manual_ignore = getattr(ref_summary, 'manual_ignore', None)
ref_upstream_unstable = getattr(ref_summary, 'upstream_unstable', None) ref_upstream_unstable = getattr(ref_summary, 'upstream_unstable', None)
ref_not_reproducible = getattr(ref_summary, 'not_reproducible', None) ref_not_reproducible = getattr(ref_summary, 'not_reproducible', None)
ref_target_disagrees = len(ref_summary) if ref_summary is not None else None ref_target_disagrees = len(ref_summary) if ref_summary is not None else None
...@@ -306,6 +307,10 @@ def print_differences_stats(report: DiffReport, reference: DiffReport = None) -> ...@@ -306,6 +307,10 @@ def print_differences_stats(report: DiffReport, reference: DiffReport = None) ->
raise RuntimeError("Report doesn't containt necassary data!") raise RuntimeError("Report doesn't containt necassary data!")
print('== Differences statistics') print('== Differences statistics')
print(format_stats_line('manually ignored', *get_stats_data(
report.summary.manual_ignore, report.total_answers,
ref_manual_ignore),
additional='of answers (ignoring)'))
print(format_stats_line('upstream unstable', *get_stats_data( print(format_stats_line('upstream unstable', *get_stats_data(
report.summary.upstream_unstable, report.total_answers, report.summary.upstream_unstable, report.total_answers,
ref_upstream_unstable), ref_upstream_unstable),
......
...@@ -261,6 +261,7 @@ class Summary(Disagreements): ...@@ -261,6 +261,7 @@ class Summary(Disagreements):
'upstream_unstable': (None, None), 'upstream_unstable': (None, None),
'usable_answers': (None, None), 'usable_answers': (None, None),
'not_reproducible': (None, None), 'not_reproducible': (None, None),
'manual_ignore': (None, None),
} }
def __init__( def __init__(
...@@ -270,6 +271,7 @@ class Summary(Disagreements): ...@@ -270,6 +271,7 @@ class Summary(Disagreements):
self.usable_answers = 0 self.usable_answers = 0
self.upstream_unstable = 0 self.upstream_unstable = 0
self.not_reproducible = 0 self.not_reproducible = 0
self.manual_ignore = 0
super().__init__(_restore_dict=_restore_dict) super().__init__(_restore_dict=_restore_dict)
def add_mismatch(self, field: FieldLabel, mismatch: DataMismatch, qid: QID) -> None: def add_mismatch(self, field: FieldLabel, mismatch: DataMismatch, qid: QID) -> None:
...@@ -282,18 +284,31 @@ class Summary(Disagreements): ...@@ -282,18 +284,31 @@ class Summary(Disagreements):
report: 'DiffReport', report: 'DiffReport',
field_weights: Sequence[FieldLabel], field_weights: Sequence[FieldLabel],
reproducibility_threshold: float = 1, reproducibility_threshold: float = 1,
without_diffrepro: bool = False without_diffrepro: bool = False,
ignore_qids: Optional[Set[QID]] = None
) -> 'Summary': ) -> 'Summary':
"""Get summary of disagreements above the specified reproduciblity threshold (0, 1].""" """
Get summary of disagreements above the specified reproduciblity
threshold [0, 1].
Optionally, provide a list of known unstable and/or failing QIDs which
will be ignored.
"""
if (report.other_disagreements is None if (report.other_disagreements is None
or report.target_disagreements is None or report.target_disagreements is None
or report.total_answers is None): or report.total_answers is None):
raise RuntimeError("Report has insufficient data to create Summary") raise RuntimeError("Report has insufficient data to create Summary")
if ignore_qids is None:
ignore_qids = set()
summary = Summary() summary = Summary()
summary.upstream_unstable = len(report.other_disagreements) summary.upstream_unstable = len(report.other_disagreements)
for qid, diff in report.target_disagreements.items(): for qid, diff in report.target_disagreements.items():
if qid in ignore_qids:
summary.manual_ignore += 1
continue
if not without_diffrepro and report.reprodata is not None: if not without_diffrepro and report.reprodata is not None:
reprocounter = report.reprodata[qid] reprocounter = report.reprodata[qid]
if reprocounter.retries > 0: if reprocounter.retries > 0:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment