qstats: add query statistics

Related #6
parent 98c5efd5
......@@ -100,10 +100,10 @@ def add_arg_stats(parser: ArgumentParser) -> None:
help='statistics file (default: {})'.format(STATS_FILENAME))
def add_arg_stats_filename(parser: ArgumentParser) -> None:
def add_arg_stats_filename(parser: ArgumentParser, default=STATS_FILENAME) -> None:
parser.add_argument('-s', '--stats', type=str,
default=STATS_FILENAME, dest='stats_filename',
help='statistics file (default: {})'.format(STATS_FILENAME))
default=default, dest='stats_filename',
help='statistics file (default: {})'.format(default))
def add_arg_report(parser: ArgumentParser) -> None:
......@@ -387,6 +387,9 @@ class ReproData(collections.abc.Mapping, JSONDataObject):
yield from self._counters.keys()
QueryData = collections.namedtuple('QueryData', 'total, others_disagree, target_disagrees')
class DiffReport(JSONDataObject): # pylint: disable=too-many-instance-attributes
'start_time': (None, None),
import collections
from enum import Enum
import logging
from typing import Any, Mapping, Optional, Set, Sequence
from .dataformat import DiffReport, JSONDataObject, QueryData
from .typing import QID
UPSTREAM_UNSTABLE_THRESHOLD = 0.1 # consider query unstable when 10 % of results are unstable
ALLOWED_FAIL_THRESHOLD = 0.05 # ignore up to 5 % of FAIL results for a given query (as noise)
class QueryStatus(Enum):
UNKNOWN = 2 # upstream is unstable
def get_query_status(query_data: QueryData) -> QueryStatus:
if query_data.others_disagree / query_data.total >= UPSTREAM_UNSTABLE_THRESHOLD:
return QueryStatus.UNKNOWN
if query_data.target_disagrees / query_data.total < ALLOWED_FAIL_THRESHOLD:
return QueryStatus.PASSING
return QueryStatus.FAILING
class QueryStatistics(JSONDataObject):
'failing': (set, list),
'unstable': (set, list),
def __init__(
failing: Optional[Set[QID]] = None,
unstable: Optional[Set[QID]] = None,
_restore_dict: Optional[Mapping[str, Any]] = None
) -> None:
self.failing = failing if failing is not None else set()
self.unstable = unstable if unstable is not None else set()
if _restore_dict is not None:
def add_query(self, qid: QID, query_data: QueryData) -> None:
status = get_query_status(query_data)
if status == QueryStatus.FAILING:
elif status == QueryStatus.UNKNOWN:
def from_reports(reports: Sequence[DiffReport]) -> 'QueryStatistics':
"""Create query statistics from multiple reports - usually used as a reference"""
others_disagree = collections.Counter() # type: collections.Counter
target_disagrees = collections.Counter() # type: collections.Counter
reprodata_present = False
# collect query results
for report in reports:
if report.reprodata is not None:
reprodata_present = True
assert report.other_disagreements is not None
assert report.target_disagreements is not None
for qid in report.other_disagreements.queries:
others_disagree[qid] += 1
for qid in report.target_disagreements:
target_disagrees[qid] += 1
if reprodata_present:
logging.warning("reprodata ignored when creating query stability statistics")
# evaluate
total = len(reports)
query_statistics = QueryStatistics()
suspect_queries = set(others_disagree.keys())
for qid in suspect_queries:
qid, QueryData(total, others_disagree[qid], target_disagrees[qid]))
return query_statistics
import collections
from enum import Enum
import logging
import math
import statistics
from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa
......@@ -7,8 +8,9 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa
import numpy
import scipy.stats
from .dataformat import Counter, JSONDataObject, Summary
from .cfg import ALL_FIELDS
from .dataformat import Counter, DiffReport, JSONDataObject, Summary
from .qstats import QueryStatistics
class Stats(JSONDataObject):
......@@ -244,11 +246,14 @@ class SummaryStatistics(JSONDataObject):
'fields': (
lambda x: FieldStatistics(_restore_dict=x),
lambda x: x.save()),
'queries': (
lambda x: QueryStatistics(_restore_dict=x),
lambda x: x.save()),
def __init__(
summaries: Sequence[Summary] = None,
reports: Sequence[DiffReport] = None,
_restore_dict: Mapping[str, Any] = None
) -> None:
......@@ -258,12 +263,28 @@ class SummaryStatistics(JSONDataObject):
self.not_reproducible = None
self.target_disagreements = None
self.fields = None
if summaries is not None:
self.queries = None
if reports is not None:
# use only reports with diffsum
usable_reports = []
for report in reports:
if report.summary is None:
logging.warning('Empty diffsum in %s Omitting...', report.fileorigin)
summaries = [
report.summary for report in reports if report.summary is not None]
assert len(summaries) == len(usable_reports)
if not summaries:
raise ValueError('No summaries found in reports!')
self.sample_size = len(summaries)
self.upstream_unstable = Stats([s.upstream_unstable for s in summaries])
self.usable_answers = Stats([s.usable_answers for s in summaries])
self.not_reproducible = Stats([s.not_reproducible for s in summaries])
self.target_disagreements = Stats([len(s) for s in summaries])
self.fields = FieldStatistics(summaries)
self.queries = QueryStatistics.from_reports(usable_reports)
elif _restore_dict is not None:
......@@ -26,15 +26,14 @@ def main():
report = cli.read_report(filename, skip_empty=True)
if report is not None:
summaries = cli.load_summaries(reports, skip_empty=True)
if not summaries:
logging.critical('No summaries found in reports!')
sumstats = SummaryStatistics(reports)
except ValueError as exc:
sumstats = SummaryStatistics(summaries)
logging.info('Total sample size: %d', len(summaries))
logging.info('Total sample size: %d', sumstats.sample_size)
logging.info('Upper boundaries:')
_log_threshold(sumstats.target_disagreements, 'target_disagreements')
_log_threshold(sumstats.upstream_unstable, 'upstream_unstable')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment