qstats: add query statistics

Related #6
parent 98c5efd5
...@@ -100,10 +100,10 @@ def add_arg_stats(parser: ArgumentParser) -> None: ...@@ -100,10 +100,10 @@ def add_arg_stats(parser: ArgumentParser) -> None:
help='statistics file (default: {})'.format(STATS_FILENAME)) help='statistics file (default: {})'.format(STATS_FILENAME))
def add_arg_stats_filename(parser: ArgumentParser) -> None: def add_arg_stats_filename(parser: ArgumentParser, default=STATS_FILENAME) -> None:
parser.add_argument('-s', '--stats', type=str, parser.add_argument('-s', '--stats', type=str,
default=STATS_FILENAME, dest='stats_filename', default=default, dest='stats_filename',
help='statistics file (default: {})'.format(STATS_FILENAME)) help='statistics file (default: {})'.format(default))
def add_arg_report(parser: ArgumentParser) -> None: def add_arg_report(parser: ArgumentParser) -> None:
......
...@@ -387,6 +387,9 @@ class ReproData(collections.abc.Mapping, JSONDataObject): ...@@ -387,6 +387,9 @@ class ReproData(collections.abc.Mapping, JSONDataObject):
yield from self._counters.keys() yield from self._counters.keys()
QueryData = collections.namedtuple('QueryData', 'total, others_disagree, target_disagrees')
class DiffReport(JSONDataObject): # pylint: disable=too-many-instance-attributes class DiffReport(JSONDataObject): # pylint: disable=too-many-instance-attributes
_ATTRIBUTES = { _ATTRIBUTES = {
'start_time': (None, None), 'start_time': (None, None),
......
import collections
from enum import Enum
import logging
from typing import Any, Mapping, Optional, Set, Sequence
from .dataformat import DiffReport, JSONDataObject, QueryData
from .typing import QID
UPSTREAM_UNSTABLE_THRESHOLD = 0.1 # consider query unstable when 10 % of results are unstable
ALLOWED_FAIL_THRESHOLD = 0.05 # ignore up to 5 % of FAIL results for a given query (as noise)
class QueryStatus(Enum):
PASSING = 1
UNKNOWN = 2 # upstream is unstable
FAILING = 3
def get_query_status(query_data: QueryData) -> QueryStatus:
if query_data.others_disagree / query_data.total >= UPSTREAM_UNSTABLE_THRESHOLD:
return QueryStatus.UNKNOWN
if query_data.target_disagrees / query_data.total < ALLOWED_FAIL_THRESHOLD:
return QueryStatus.PASSING
return QueryStatus.FAILING
class QueryStatistics(JSONDataObject):
_ATTRIBUTES = {
'failing': (set, list),
'unstable': (set, list),
}
def __init__(
self,
failing: Optional[Set[QID]] = None,
unstable: Optional[Set[QID]] = None,
_restore_dict: Optional[Mapping[str, Any]] = None
) -> None:
super().__init__()
self.failing = failing if failing is not None else set()
self.unstable = unstable if unstable is not None else set()
if _restore_dict is not None:
self.restore(_restore_dict)
def add_query(self, qid: QID, query_data: QueryData) -> None:
status = get_query_status(query_data)
if status == QueryStatus.FAILING:
self.failing.add(qid)
elif status == QueryStatus.UNKNOWN:
self.unstable.add(qid)
@staticmethod
def from_reports(reports: Sequence[DiffReport]) -> 'QueryStatistics':
"""Create query statistics from multiple reports - usually used as a reference"""
others_disagree = collections.Counter() # type: collections.Counter
target_disagrees = collections.Counter() # type: collections.Counter
reprodata_present = False
# collect query results
for report in reports:
if report.reprodata is not None:
reprodata_present = True
assert report.other_disagreements is not None
assert report.target_disagreements is not None
for qid in report.other_disagreements.queries:
others_disagree[qid] += 1
for qid in report.target_disagreements:
target_disagrees[qid] += 1
if reprodata_present:
logging.warning("reprodata ignored when creating query stability statistics")
# evaluate
total = len(reports)
query_statistics = QueryStatistics()
suspect_queries = set(others_disagree.keys())
suspect_queries.update(target_disagrees.keys())
for qid in suspect_queries:
query_statistics.add_query(
qid, QueryData(total, others_disagree[qid], target_disagrees[qid]))
return query_statistics
import collections import collections
from enum import Enum from enum import Enum
import logging
import math import math
import statistics import statistics
from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa
...@@ -7,8 +8,9 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa ...@@ -7,8 +8,9 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa
import numpy import numpy
import scipy.stats import scipy.stats
from .dataformat import Counter, JSONDataObject, Summary
from .cfg import ALL_FIELDS from .cfg import ALL_FIELDS
from .dataformat import Counter, DiffReport, JSONDataObject, Summary
from .qstats import QueryStatistics
class Stats(JSONDataObject): class Stats(JSONDataObject):
...@@ -244,11 +246,14 @@ class SummaryStatistics(JSONDataObject): ...@@ -244,11 +246,14 @@ class SummaryStatistics(JSONDataObject):
'fields': ( 'fields': (
lambda x: FieldStatistics(_restore_dict=x), lambda x: FieldStatistics(_restore_dict=x),
lambda x: x.save()), lambda x: x.save()),
'queries': (
lambda x: QueryStatistics(_restore_dict=x),
lambda x: x.save()),
} }
def __init__( def __init__(
self, self,
summaries: Sequence[Summary] = None, reports: Sequence[DiffReport] = None,
_restore_dict: Mapping[str, Any] = None _restore_dict: Mapping[str, Any] = None
) -> None: ) -> None:
super().__init__() super().__init__()
...@@ -258,12 +263,28 @@ class SummaryStatistics(JSONDataObject): ...@@ -258,12 +263,28 @@ class SummaryStatistics(JSONDataObject):
self.not_reproducible = None self.not_reproducible = None
self.target_disagreements = None self.target_disagreements = None
self.fields = None self.fields = None
if summaries is not None: self.queries = None
if reports is not None:
# use only reports with diffsum
usable_reports = []
for report in reports:
if report.summary is None:
logging.warning('Empty diffsum in %s Omitting...', report.fileorigin)
else:
usable_reports.append(report)
summaries = [
report.summary for report in reports if report.summary is not None]
assert len(summaries) == len(usable_reports)
if not summaries:
raise ValueError('No summaries found in reports!')
self.sample_size = len(summaries) self.sample_size = len(summaries)
self.upstream_unstable = Stats([s.upstream_unstable for s in summaries]) self.upstream_unstable = Stats([s.upstream_unstable for s in summaries])
self.usable_answers = Stats([s.usable_answers for s in summaries]) self.usable_answers = Stats([s.usable_answers for s in summaries])
self.not_reproducible = Stats([s.not_reproducible for s in summaries]) self.not_reproducible = Stats([s.not_reproducible for s in summaries])
self.target_disagreements = Stats([len(s) for s in summaries]) self.target_disagreements = Stats([len(s) for s in summaries])
self.fields = FieldStatistics(summaries) self.fields = FieldStatistics(summaries)
self.queries = QueryStatistics.from_reports(usable_reports)
elif _restore_dict is not None: elif _restore_dict is not None:
self.restore(_restore_dict) self.restore(_restore_dict)
...@@ -26,15 +26,14 @@ def main(): ...@@ -26,15 +26,14 @@ def main():
report = cli.read_report(filename, skip_empty=True) report = cli.read_report(filename, skip_empty=True)
if report is not None: if report is not None:
reports.append(report) reports.append(report)
summaries = cli.load_summaries(reports, skip_empty=True)
if not summaries: try:
logging.critical('No summaries found in reports!') sumstats = SummaryStatistics(reports)
except ValueError as exc:
logging.critical(exc)
sys.exit(1) sys.exit(1)
sumstats = SummaryStatistics(summaries) logging.info('Total sample size: %d', sumstats.sample_size)
logging.info('Total sample size: %d', len(summaries))
logging.info('Upper boundaries:') logging.info('Upper boundaries:')
_log_threshold(sumstats.target_disagreements, 'target_disagreements') _log_threshold(sumstats.target_disagreements, 'target_disagreements')
_log_threshold(sumstats.upstream_unstable, 'upstream_unstable') _log_threshold(sumstats.upstream_unstable, 'upstream_unstable')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment