Commit 8506658c authored by Petr Špaček's avatar Petr Špaček

Merge branch 'diffrepro-improvements' into 'master'

diffrepro: query stability statistics

Closes #6

See merge request !31
parents 98c5efd5 cde82989
Pipeline #41582 passed with stage
in 1 minute and 19 seconds
#!/usr/bin/env python3
import argparse
from itertools import zip_longest
import logging
from multiprocessing import pool
import random
import subprocess
import sys
from typing import ( # noqa
Any, AbstractSet, Iterable, Iterator, Mapping, Sequence, Tuple, TypeVar,
Union)
from respdiff import cli, sendrecv
from respdiff.database import (
DNSReply, DNSRepliesFactory, key2qid, LMDB, MetaDatabase,
ResolverID, qid2key, QKey, WireFormat)
from respdiff.dataformat import Diff, DiffReport, FieldLabel, ReproData, QID # noqa
from respdiff.match import compare
from respdiff.query import get_query_iterator
T = TypeVar('T')
def restart_resolver(script_path: str) -> None:
try:
subprocess.check_call(script_path)
except subprocess.CalledProcessError as exc:
logging.warning('Resolver restart failed (exit code %d): %s',
exc.returncode, script_path)
except PermissionError as exc:
logging.warning('Resolver restart failed (permission error): %s',
script_path)
def get_restart_scripts(config: Mapping[str, Any]) -> Mapping[ResolverID, str]:
restart_scripts = {}
for resolver in config['servers']['names']:
try:
restart_scripts[resolver] = config[resolver]['restart_script']
except KeyError:
logging.warning('No restart script available for "%s"!', resolver)
return restart_scripts
def disagreement_query_stream(
lmdb,
report: DiffReport,
skip_unstable: bool = True,
skip_non_reproducible: bool = True,
shuffle: bool = True
) -> Iterator[Tuple[QKey, WireFormat]]:
if report.target_disagreements is None or report.reprodata is None:
raise RuntimeError("Report doesn't contain necessary data!")
qids = report.target_disagreements.keys() # type: Union[Sequence[QID], AbstractSet[QID]]
if shuffle:
# create a new, randomized list from disagreements
qids = random.sample(qids, len(qids))
queries = get_query_iterator(lmdb, qids)
for qid, qwire in queries:
diff = report.target_disagreements[qid]
reprocounter = report.reprodata[qid]
# verify if answers are stable
if skip_unstable and reprocounter.retries != reprocounter.upstream_stable:
logging.debug('Skipping QID %7d: unstable upstream', diff.qid)
continue
if skip_non_reproducible and reprocounter.retries != reprocounter.verified:
logging.debug('Skipping QID %7d: not 100 %% reproducible', diff.qid)
continue
yield qid2key(qid), qwire
def chunker(iterable: Iterable[T], size: int) -> Iterator[Iterable[T]]:
"""
Collect data into fixed-length chunks or blocks
chunker([x, y, z], 2) --> [x, y], [z, None]
"""
args = [iter(iterable)] * size
return zip_longest(*args)
def process_answers(
qkey: QKey,
answers: Mapping[ResolverID, DNSReply],
report: DiffReport,
criteria: Sequence[FieldLabel],
target: ResolverID
) -> None:
if report.target_disagreements is None or report.reprodata is None:
raise RuntimeError("Report doesn't contain necessary data!")
qid = key2qid(qkey)
reprocounter = report.reprodata[qid]
others_agree, mismatches = compare(answers, criteria, target)
reprocounter.retries += 1
if others_agree:
reprocounter.upstream_stable += 1
assert mismatches is not None
if Diff(qid, mismatches) == report.target_disagreements[qid]:
reprocounter.verified += 1
from respdiff import cli, repro, sendrecv
from respdiff.database import DNSRepliesFactory, LMDB
from respdiff.dataformat import DiffReport, ReproData
def main():
......@@ -109,14 +14,14 @@ def main():
cli.add_arg_envdir(parser)
cli.add_arg_config(parser)
cli.add_arg_datafile(parser)
parser.add_argument('-s', '--sequential', action='store_true', default=False,
parser.add_argument('--sequential', action='store_true', default=False,
help='send one query at a time (slower, but more reliable)')
args = parser.parse_args()
sendrecv.module_init(args)
datafile = cli.get_datafile(args)
report = DiffReport.from_json(datafile)
restart_scripts = get_restart_scripts(args.cfg)
restart_scripts = repro.get_restart_scripts(args.cfg)
servers = args.cfg['servers']['names']
dnsreplies_factory = DNSRepliesFactory(servers)
......@@ -130,34 +35,13 @@ def main():
with LMDB(args.envdir, readonly=True) as lmdb:
lmdb.open_db(LMDB.QUERIES)
cli.check_metadb_servers_version(lmdb, servers)
dstream = repro.query_stream_from_disagreements(lmdb, report)
try:
MetaDatabase(lmdb, servers, create=False) # check version and servers
except NotImplementedError as exc:
logging.critical(exc)
sys.exit(1)
dstream = disagreement_query_stream(lmdb, report)
try:
with pool.Pool(processes=nproc) as p:
done = 0
for process_args in chunker(dstream, nproc):
# restart resolvers and clear their cache
for script in restart_scripts.values():
restart_resolver(script)
process_args = [args for args in process_args if args is not None]
for qkey, replies_data, in p.imap_unordered(
sendrecv.worker_perform_single_query,
process_args,
chunksize=1):
replies = dnsreplies_factory.parse(replies_data)
process_answers(qkey, replies, report,
args.cfg['diff']['criteria'],
args.cfg['diff']['target'])
done += len(process_args)
logging.info('Processed {:4d} queries'.format(done))
repro.reproduce_queries(
dstream, report, dnsreplies_factory, args.cfg['diff']['criteria'],
args.cfg['diff']['target'], restart_scripts, nproc)
finally:
# make sure data is saved in case of interrupt
report.export_json(datafile)
......
......@@ -14,40 +14,6 @@ from respdiff.query import (
convert_queries, get_printable_queries_format, get_query_iterator)
DEFAULT_LIMIT = 10
GLOBAL_STATS_FORMAT = '{:21s} {:>8}'
GLOBAL_STATS_PCT_FORMAT = '{:21s} {:8d} {:5.2f} % {:s}'
def print_global_stats(report: DiffReport) -> None:
if report.total_answers is None or report.total_queries is None:
raise RuntimeError("Report doesn't contain sufficient data to print statistics!")
print('== Global statistics')
if report.duration is not None:
print(GLOBAL_STATS_FORMAT.format('duration', '{:d} s'.format(report.duration)))
print(GLOBAL_STATS_FORMAT.format('queries', report.total_queries))
print(GLOBAL_STATS_PCT_FORMAT.format(
'answers', report.total_answers,
report.total_answers * 100.0 / report.total_queries, 'of queries'))
print('')
def print_differences_stats(summary: Summary, total_answers: int) -> None:
print('== Differences statistics')
print(GLOBAL_STATS_PCT_FORMAT.format(
'upstream unstable', summary.upstream_unstable,
summary.upstream_unstable * 100.0 / total_answers, 'of answers (ignoring)'))
if summary.not_reproducible:
print(GLOBAL_STATS_PCT_FORMAT.format(
'not 100% reproducible', summary.not_reproducible,
summary.not_reproducible * 100.0 / total_answers, 'of answers (ignoring)'))
print(GLOBAL_STATS_PCT_FORMAT.format(
'target disagrees', len(summary),
len(summary) * 100. / summary.usable_answers,
'of not ignored answers'))
print('')
def main():
cli.setup_logging()
parser = argparse.ArgumentParser(
......@@ -57,14 +23,23 @@ def main():
cli.add_arg_config(parser)
cli.add_arg_datafile(parser)
cli.add_arg_limit(parser)
cli.add_arg_stats_filename(parser, default='')
parser.add_argument('--without-diffrepro', action='store_true',
help='omit reproducibility data from summary')
parser.add_argument('--without-ref-unstable', action='store_true',
help='omit unstable reference queries from summary')
parser.add_argument('--without-ref-failing', action='store_true',
help='omit failing reference queries from summary')
args = parser.parse_args()
datafile = cli.get_datafile(args)
report = DiffReport.from_json(datafile)
field_weights = args.cfg['report']['field_weights']
if (args.without_ref_unstable or args.without_ref_failing) \
and not args.stats_filename:
logging.critical("Statistics file must be provided as a reference.")
sys.exit(1)
if not report.total_answers:
logging.error('No answers in DB!')
sys.exit(1)
......@@ -72,10 +47,23 @@ def main():
logging.error('JSON report is missing diff data! Did you forget to run msgdiff?')
sys.exit(1)
ignore_qids = set()
if args.without_ref_unstable or args.without_ref_failing:
try:
stats = cli.read_stats(args.stats_filename)
except ValueError as exc:
logging.critical(str(exc))
sys.exit(1)
if args.without_ref_unstable:
ignore_qids.update(stats.queries.unstable)
if args.without_ref_failing:
ignore_qids.update(stats.queries.failing)
report = DiffReport.from_json(datafile)
report.summary = Summary.from_report(
report, field_weights,
without_diffrepro=args.without_diffrepro)
without_diffrepro=args.without_diffrepro,
ignore_qids=ignore_qids)
cli.print_global_stats(report)
cli.print_differences_stats(report)
......
......@@ -29,7 +29,10 @@ The report uses the following terms:
- *target disagreements* refers to cases, when there's a difference
between the answer from ``target`` server and the others server, and the
other servers agree on the answer (there is no difference between them).
These are the most interesting cases that are analysed further.
These are the most interesting cases that are analyzed further.
- *manually ignored* is the number of queries which were omitted from the
report by using `--without-ref-failing` or `--without-ref-unstable` along
with a reference statistics file
The summary evaluates how many *target disagreements* there were in particular
*fields* (or ``criteria``), and what did these mismatches look like. It produces
......@@ -40,7 +43,7 @@ Notes
-----
* If you adjust the ``field_weights``, just re-order the fields. Don't remove
them, otherwise there'll be issues if such field is ever encountered when
them, otherwise there will be issues if such field is ever encountered when
producing the summary.
* In case you update respdiff and ``diffsum.py`` doesn't work, check the
changelog. If a new field was added, adjust your config accordingly.
......
......@@ -7,7 +7,6 @@ from multiprocessing import pool
import os
import pickle
from typing import Any, Dict, Iterator, Mapping, Optional, Sequence, Tuple # noqa
import sys
from respdiff import cli
from respdiff.dataformat import (
......@@ -117,11 +116,7 @@ def main():
# interaction when using multiple transaction / processes, open a separate
# environment. Also, any dbs have to be opened before using MetaDatabase().
report = prepare_report(lmdb_, servers)
try:
MetaDatabase(lmdb_, servers, create=False) # check version and servers
except NotImplementedError as exc:
logging.critical(exc)
sys.exit(1)
cli.check_metadb_servers_version(lmdb_, servers)
with LMDB(args.envdir, fast=True) as lmdb_:
lmdb = lmdb_
......
......@@ -8,6 +8,7 @@ from typing import Callable, Dict, Mapping, Optional, Sequence, Tuple, Union #
from tabulate import tabulate
from .cfg import read_cfg
from .database import MetaDatabase
from .dataformat import DiffReport, FieldLabel, InvalidFileFormat, Summary
from .match import DataMismatch
from .stats import SummaryStatistics
......@@ -100,10 +101,10 @@ def add_arg_stats(parser: ArgumentParser) -> None:
help='statistics file (default: {})'.format(STATS_FILENAME))
def add_arg_stats_filename(parser: ArgumentParser) -> None:
def add_arg_stats_filename(parser: ArgumentParser, default=STATS_FILENAME) -> None:
parser.add_argument('-s', '--stats', type=str,
default=STATS_FILENAME, dest='stats_filename',
help='statistics file (default: {})'.format(STATS_FILENAME))
default=default, dest='stats_filename',
help='statistics file (default: {})'.format(default))
def add_arg_report(parser: ArgumentParser) -> None:
......@@ -128,6 +129,14 @@ def get_datafile(args: Namespace, key: str = 'datafile', check_exists: bool = Tr
return datafile
def check_metadb_servers_version(lmdb, servers: Sequence[str]) -> None:
try:
MetaDatabase(lmdb, servers, create=False) # check version and servers
except NotImplementedError as exc:
logging.critical(str(exc))
sys.exit(1)
def format_stats_line(
description: str,
number: int,
......@@ -289,6 +298,7 @@ def print_global_stats(report: DiffReport, reference: DiffReport = None) -> None
def print_differences_stats(report: DiffReport, reference: DiffReport = None) -> None:
ref_summary = getattr(reference, 'summary', None)
ref_manual_ignore = getattr(ref_summary, 'manual_ignore', None)
ref_upstream_unstable = getattr(ref_summary, 'upstream_unstable', None)
ref_not_reproducible = getattr(ref_summary, 'not_reproducible', None)
ref_target_disagrees = len(ref_summary) if ref_summary is not None else None
......@@ -297,6 +307,10 @@ def print_differences_stats(report: DiffReport, reference: DiffReport = None) ->
raise RuntimeError("Report doesn't containt necassary data!")
print('== Differences statistics')
print(format_stats_line('manually ignored', *get_stats_data(
report.summary.manual_ignore, report.total_answers,
ref_manual_ignore),
additional='of answers (ignoring)'))
print(format_stats_line('upstream unstable', *get_stats_data(
report.summary.upstream_unstable, report.total_answers,
ref_upstream_unstable),
......
......@@ -261,6 +261,7 @@ class Summary(Disagreements):
'upstream_unstable': (None, None),
'usable_answers': (None, None),
'not_reproducible': (None, None),
'manual_ignore': (None, None),
}
def __init__(
......@@ -270,6 +271,7 @@ class Summary(Disagreements):
self.usable_answers = 0
self.upstream_unstable = 0
self.not_reproducible = 0
self.manual_ignore = 0
super().__init__(_restore_dict=_restore_dict)
def add_mismatch(self, field: FieldLabel, mismatch: DataMismatch, qid: QID) -> None:
......@@ -282,18 +284,31 @@ class Summary(Disagreements):
report: 'DiffReport',
field_weights: Sequence[FieldLabel],
reproducibility_threshold: float = 1,
without_diffrepro: bool = False
without_diffrepro: bool = False,
ignore_qids: Optional[Set[QID]] = None
) -> 'Summary':
"""Get summary of disagreements above the specified reproduciblity threshold (0, 1]."""
"""
Get summary of disagreements above the specified reproduciblity
threshold [0, 1].
Optionally, provide a list of known unstable and/or failing QIDs which
will be ignored.
"""
if (report.other_disagreements is None
or report.target_disagreements is None
or report.total_answers is None):
raise RuntimeError("Report has insufficient data to create Summary")
if ignore_qids is None:
ignore_qids = set()
summary = Summary()
summary.upstream_unstable = len(report.other_disagreements)
for qid, diff in report.target_disagreements.items():
if qid in ignore_qids:
summary.manual_ignore += 1
continue
if not without_diffrepro and report.reprodata is not None:
reprocounter = report.reprodata[qid]
if reprocounter.retries > 0:
......@@ -324,9 +339,10 @@ class Summary(Disagreements):
class ReproCounter(JSONDataObject):
_ATTRIBUTES = {
'retries': (None, None),
'upstream_stable': (None, None),
'verified': (None, None),
'retries': (None, None), # total amount of attempts to reproduce
'upstream_stable': (None, None), # number of cases, where others disagree
'verified': (None, None), # the query fails, and the diff is same (reproduced)
'different_failure': (None, None) # the query fails, but the diff doesn't match
}
def __init__(
......@@ -334,12 +350,14 @@ class ReproCounter(JSONDataObject):
retries: int = 0,
upstream_stable: int = 0,
verified: int = 0,
different_failure: int = 0,
_restore_dict: Optional[Mapping[str, int]] = None
) -> None:
super().__init__()
self.retries = retries
self.upstream_stable = upstream_stable
self.verified = verified
self.different_failure = different_failure
if _restore_dict is not None:
self.restore(_restore_dict)
......@@ -352,7 +370,8 @@ class ReproCounter(JSONDataObject):
return (
self.retries == other.retries
and self.upstream_stable == other.upstream_stable
and self.verified == other.verified)
and self.verified == other.verified
and self.different_failure == other.different_failure)
class ReproData(collections.abc.Mapping, JSONDataObject):
......@@ -387,6 +406,9 @@ class ReproData(collections.abc.Mapping, JSONDataObject):
yield from self._counters.keys()
QueryData = collections.namedtuple('QueryData', 'total, others_disagree, target_disagrees')
class DiffReport(JSONDataObject): # pylint: disable=too-many-instance-attributes
_ATTRIBUTES = {
'start_time': (None, None),
......
import collections
from enum import Enum
import logging
from typing import Any, Mapping, Optional, Set, Sequence
from .dataformat import DiffReport, JSONDataObject, QueryData
from .typing import QID
UPSTREAM_UNSTABLE_THRESHOLD = 0.1 # consider query unstable when 10 % of results are unstable
ALLOWED_FAIL_THRESHOLD = 0.05 # ignore up to 5 % of FAIL results for a given query (as noise)
class QueryStatus(Enum):
PASSING = 1
UNKNOWN = 2 # upstream is unstable
FAILING = 3
def get_query_status(query_data: QueryData) -> QueryStatus:
if query_data.others_disagree / query_data.total >= UPSTREAM_UNSTABLE_THRESHOLD:
return QueryStatus.UNKNOWN
if query_data.target_disagrees / query_data.total < ALLOWED_FAIL_THRESHOLD:
return QueryStatus.PASSING
return QueryStatus.FAILING
class QueryStatistics(JSONDataObject):
_ATTRIBUTES = {
'failing': (set, list),
'unstable': (set, list),
}
def __init__(
self,
failing: Optional[Set[QID]] = None,
unstable: Optional[Set[QID]] = None,
_restore_dict: Optional[Mapping[str, Any]] = None
) -> None:
super().__init__()
self.failing = failing if failing is not None else set()
self.unstable = unstable if unstable is not None else set()
if _restore_dict is not None:
self.restore(_restore_dict)
def add_query(self, qid: QID, query_data: QueryData) -> None:
status = get_query_status(query_data)
if status == QueryStatus.FAILING:
self.failing.add(qid)
elif status == QueryStatus.UNKNOWN:
self.unstable.add(qid)
@staticmethod
def from_reports(reports: Sequence[DiffReport]) -> 'QueryStatistics':
"""Create query statistics from multiple reports - usually used as a reference"""
others_disagree = collections.Counter() # type: collections.Counter
target_disagrees = collections.Counter() # type: collections.Counter
reprodata_present = False
# collect query results
for report in reports:
if report.reprodata is not None:
reprodata_present = True
assert report.other_disagreements is not None
assert report.target_disagreements is not None
for qid in report.other_disagreements.queries:
others_disagree[qid] += 1
for qid in report.target_disagreements:
target_disagrees[qid] += 1
if reprodata_present:
logging.warning("reprodata ignored when creating query stability statistics")
# evaluate
total = len(reports)
query_statistics = QueryStatistics()
suspect_queries = set(others_disagree.keys())
suspect_queries.update(target_disagrees.keys())
for qid in suspect_queries:
query_statistics.add_query(
qid, QueryData(total, others_disagree[qid], target_disagrees[qid]))
return query_statistics
from itertools import zip_longest
import logging
from multiprocessing import pool
import random
import subprocess
from typing import ( # noqa
AbstractSet, Any, Iterator, Iterable, Mapping, Optional, Sequence, Tuple,
TypeVar, Union)
from .database import (
DNSRepliesFactory, DNSReply, key2qid, ResolverID, qid2key, QKey, WireFormat)
from .dataformat import Diff, DiffReport, FieldLabel
from .match import compare
from .query import get_query_iterator
from .sendrecv import worker_perform_single_query
from .typing import QID # noqa
T = TypeVar('T')
def restart_resolver(script_path: str) -> None:
try:
subprocess.check_call(script_path)
except subprocess.CalledProcessError as exc:
logging.warning('Resolver restart failed (exit code %d): %s',
exc.returncode, script_path)
except PermissionError as exc:
logging.warning('Resolver restart failed (permission error): %s',
script_path)
def get_restart_scripts(config: Mapping[str, Any]) -> Mapping[ResolverID, str]:
restart_scripts = {}
for resolver in config['servers']['names']:
try:
restart_scripts[resolver] = config[resolver]['restart_script']
except KeyError:
logging.warning('No restart script available for "%s"!', resolver)
return restart_scripts
def chunker(iterable: Iterable[T], size: int) -> Iterator[Iterable[T]]:
"""
Collect data into fixed-length chunks or blocks
chunker([x, y, z], 2) --> [x, y], [z, None]
"""
args = [iter(iterable)] * size
return zip_longest(*args)
def process_answers(
qkey: QKey,
answers: Mapping[ResolverID, DNSReply],
report: DiffReport,
criteria: Sequence[FieldLabel],
target: ResolverID
) -> None:
if report.target_disagreements is None or report.reprodata is None:
raise RuntimeError("Report doesn't contain necessary data!")
qid = key2qid(qkey)
reprocounter = report.reprodata[qid]
others_agree, mismatches = compare(answers, criteria, target)
reprocounter.retries += 1
if others_agree:
reprocounter.upstream_stable += 1
assert mismatches is not None
new_diff = Diff(qid, mismatches)
if new_diff == report.target_disagreements[qid]:
reprocounter.verified += 1
elif new_diff:
reprocounter.different_failure += 1
def query_stream_from_disagreements(
lmdb,
report: DiffReport,
skip_unstable: bool = True,
skip_non_reproducible: bool = True,
shuffle: bool = True
) -> Iterator[Tuple[QKey, WireFormat]]:
if report.target_disagreements is None or report.reprodata is None:
raise RuntimeError("Report doesn't contain necessary data!")
qids = report.target_disagreements.keys() # type: Union[Sequence[QID], AbstractSet[QID]]
if shuffle:
# create a new, randomized list from disagreements
qids = random.sample(qids, len(qids))
queries = get_query_iterator(lmdb, qids)
for qid, qwire in queries:
diff = report.target_disagreements[qid]
reprocounter = report.reprodata[qid]
# verify if answers are stable
if skip_unstable and reprocounter.retries != reprocounter.upstream_stable:
logging.debug('Skipping QID %7d: unstable upstream', diff.qid)
continue
if skip_non_reproducible and reprocounter.retries != reprocounter.verified:
logging.debug('Skipping QID %7d: not 100 %% reproducible', diff.qid)
continue
yield qid2key(qid), qwire
def reproduce_queries(
query_stream: Iterator[Tuple[QKey, WireFormat]],
report: DiffReport,
dnsreplies_factory: DNSRepliesFactory,
criteria: Sequence[FieldLabel],
target: ResolverID,
restart_scripts: Optional[Mapping[ResolverID, str]] = None,
nproc: int = 1
) -> None:
if restart_scripts is None:
restart_scripts = {}
with pool.Pool(processes=nproc) as p:
done = 0
for process_args in chunker(query_stream, nproc):
# restart resolvers and clear their cache
for script in restart_scripts.values():
restart_resolver(script)
process_args = [args for args in process_args if args is not None]
for qkey, replies_data, in p.imap_unordered(
worker_perform_single_query,
process_args,
chunksize=1):
replies = dnsreplies_factory.parse(replies_data)
process_answers(qkey, replies, report, criteria, target)
done += len(process_args)
logging.debug('Processed {:4d} queries'.format(done))
import collections
from enum import Enum
import logging
import math
import statistics
from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa
......@@ -7,8 +8,9 @@ from typing import Any, Dict, List, Mapping, Optional, Sequence # noqa
import numpy
import scipy.stats
from .dataformat import Counter, JSONDataObject, Summary
from .cfg import ALL_FIELDS
from .dataformat import Counter, DiffReport, JSONDataObject, Summary
from .qstats import QueryStatistics
class Stats(JSONDataObject):
......@@ -244,11 +246,14 @@ class SummaryStatistics(JSONDataObject):
'fields': (
lambda x: FieldStatistics(_restore_dict=x),
lambda x: x.save()),
'queries': (
lambda x: QueryStatistics(_restore_dict=x),
lambda x: x.save()),
}
def __init__(
self,
summaries: Sequence[Summary] = None,
reports: Sequence[DiffReport] = None,
_restore_dict: Mapping[str, Any] = None
) -> None:
super().__init__()
......@@ -258,12 +263,28 @@ class SummaryStatistics(JSONDataObject):
self.not_reproducible = None
self.target_disagreements = None
self.fields = None
if summaries is not None:
self.queries = None
if reports is not None:
# use only reports with diffsum
usable_reports = []
for report in reports:
if report.summary is None:
logging.warning('Empty diffsum in %s Omitting...', report.fileorigin)
else:
usable_reports.append(report)
summaries = [
report.summary for report in reports if report.summary is not None]
assert len(summaries) == len(usable_reports)
if not summaries:
raise ValueError('No summaries found in reports!')
self.sample_size = len(summaries)
self.upstream_unstable = Stats([s.upstream_unstable for s in summaries])
self.usable_answers = Stats([s.usable_answers for s in summaries])
self.not_reproducible = Stats([s.not_reproducible for s in summaries])
self.target_disagreements = Stats([len(s) for s in summaries])
self.fields = FieldStatistics(summaries)
self.queries = QueryStatistics.from_reports(usable_reports)
elif _restore_dict is not None:
self.