mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-05-19 13:26:25 +02:00
Merge a580404b46
into 04daf02b94
This commit is contained in:
commit
2566b2e094
|
@ -0,0 +1,8 @@
|
|||
if __name__ == "__main__":
|
||||
"""
|
||||
CommandLine:
|
||||
python -m json_benchmarks
|
||||
"""
|
||||
from json_benchmarks import core
|
||||
|
||||
core.main()
|
|
@ -0,0 +1,116 @@
|
|||
"""
|
||||
The analysis of the measurements
|
||||
"""
|
||||
import scriptconfig as scfg
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
class AnalysisConfig(scfg.Config):
|
||||
default = {
|
||||
"cache_dir": scfg.Value(
|
||||
None,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Location for benchmark cache.
|
||||
Defaults to $XDG_CACHE/ujson/benchmark_results/
|
||||
"""
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
def normalize(self):
|
||||
dpath = self["cache_dir"]
|
||||
if dpath is None:
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results")
|
||||
dpath = ub.Path(dpath)
|
||||
self["cache_dir"] = dpath
|
||||
|
||||
|
||||
def analyze_results(result_fpaths):
|
||||
import json
|
||||
|
||||
from json_benchmarks import benchmarker
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
|
||||
results = []
|
||||
for fpath in ub.ProgIter(result_fpaths, desc="load results"):
|
||||
data = json.loads(fpath.read_text())
|
||||
for row in data["rows"]:
|
||||
result = benchmarker.BenchmarkerResult.load(fpath)
|
||||
results.extend(result.to_result_list())
|
||||
|
||||
RECORD_ALL = 0
|
||||
metric_key = "time" if RECORD_ALL else "mean_time"
|
||||
|
||||
# results = benchmark.result.to_result_list()
|
||||
|
||||
analysis = benchmarker.result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=[metric_key],
|
||||
params=["impl", "impl_version"],
|
||||
metric_objectives={
|
||||
"min_time": "min",
|
||||
"mean_time": "min",
|
||||
"time": "min",
|
||||
},
|
||||
)
|
||||
analysis.analysis()
|
||||
|
||||
table = analysis.table
|
||||
stats_table = util_stats.aggregate_stats(
|
||||
table, suffix="_time", group_keys=["name", "impl_version"]
|
||||
)
|
||||
|
||||
single_size = stats_table[
|
||||
(stats_table["size"] == 256) | stats_table["size"].isnull()
|
||||
]
|
||||
# single_size_combo = aggregate_stats(single_size, None)
|
||||
single_size_combo = util_stats.aggregate_stats(
|
||||
single_size, suffix="_time", group_keys=["name", "impl_version"]
|
||||
)
|
||||
|
||||
param_group = ["impl", "impl_version"]
|
||||
single_size_combo["calls/sec"] = 1 / single_size_combo["mean_time"]
|
||||
# _single_size_combo = single_size_combo.copy()
|
||||
time_piv = single_size_combo.pivot(["input", "func"], param_group, "mean_time")
|
||||
|
||||
hz_piv = 1 / time_piv
|
||||
# hzstr_piv = (1 / time_piv).applymap(lambda x: f"{x:,.02f}")
|
||||
print("Table for size=256")
|
||||
# print(hzstr_piv.to_markdown())
|
||||
print(hz_piv.to_markdown(floatfmt=",.02f"))
|
||||
print("")
|
||||
print("Above metrics are in call/sec, larger is better.")
|
||||
|
||||
speedup_piv = hz_piv / hz_piv["json"].values
|
||||
print(speedup_piv.to_markdown(floatfmt=",.02g"))
|
||||
|
||||
analysis.abalate(param_group)
|
||||
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
|
||||
|
||||
xlabel = "size"
|
||||
# Set these to empty lists if they are not used
|
||||
group_labels = {
|
||||
"fig": ["input"],
|
||||
"col": ["func"],
|
||||
# "fig": [],
|
||||
# "col": ["func" "input"],
|
||||
"hue": ["impl", "impl_version"],
|
||||
"size": [],
|
||||
}
|
||||
import kwplot
|
||||
|
||||
kwplot.autosns()
|
||||
self = analysis # NOQA
|
||||
|
||||
data = stats_table
|
||||
plots = analysis.plot(
|
||||
xlabel,
|
||||
metric_key,
|
||||
group_labels,
|
||||
xscale="log",
|
||||
yscale="log",
|
||||
data=data,
|
||||
)
|
||||
plots
|
||||
kwplot.show_if_requested()
|
|
@ -0,0 +1,68 @@
|
|||
"""
|
||||
A helper module for executing, serializing, combining, and comparing benchmarks
|
||||
"""
|
||||
|
||||
__mkinit__ = """
|
||||
# Autogenerate this file
|
||||
mkinit ~/code/ultrajson/json_benchmarks/benchmarker/__init__.py -w
|
||||
"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
from json_benchmarks.benchmarker import (
|
||||
benchmarker,
|
||||
process_context,
|
||||
result_analysis,
|
||||
util_json,
|
||||
util_stats,
|
||||
visualize,
|
||||
)
|
||||
from json_benchmarks.benchmarker.benchmarker import (
|
||||
Benchmarker,
|
||||
BenchmarkerConfig,
|
||||
BenchmarkerResult,
|
||||
)
|
||||
from json_benchmarks.benchmarker.process_context import ProcessContext
|
||||
from json_benchmarks.benchmarker.result_analysis import (
|
||||
DEFAULT_METRIC_TO_OBJECTIVE,
|
||||
Result,
|
||||
ResultAnalysis,
|
||||
SkillTracker,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_json import (
|
||||
ensure_json_serializable,
|
||||
find_json_unserializable,
|
||||
indexable_allclose,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_stats import (
|
||||
aggregate_stats,
|
||||
combine_stats,
|
||||
combine_stats_arrs,
|
||||
stats_dict,
|
||||
)
|
||||
from json_benchmarks.benchmarker.visualize import benchmark_analysis
|
||||
|
||||
__all__ = [
|
||||
"Benchmarker",
|
||||
"BenchmarkerConfig",
|
||||
"BenchmarkerResult",
|
||||
"DEFAULT_METRIC_TO_OBJECTIVE",
|
||||
"ProcessContext",
|
||||
"Result",
|
||||
"ResultAnalysis",
|
||||
"SkillTracker",
|
||||
"aggregate_stats",
|
||||
"benchmark_analysis",
|
||||
"benchmarker",
|
||||
"combine_stats",
|
||||
"combine_stats_arrs",
|
||||
"ensure_json_serializable",
|
||||
"find_json_unserializable",
|
||||
"indexable_allclose",
|
||||
"process_context",
|
||||
"result_analysis",
|
||||
"stats_dict",
|
||||
"util_json",
|
||||
"util_stats",
|
||||
"visualize",
|
||||
]
|
|
@ -0,0 +1,233 @@
|
|||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
import timerit
|
||||
import ubelt as ub
|
||||
|
||||
from json_benchmarks.benchmarker.process_context import ProcessContext
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkerConfig:
|
||||
name: str = None
|
||||
num: int = 100
|
||||
bestof: int = 10
|
||||
|
||||
|
||||
class BenchmarkerResult:
|
||||
"""
|
||||
Serialization for a single benchmark result
|
||||
"""
|
||||
|
||||
def __init__(self, context, rows):
|
||||
self.context = context
|
||||
self.rows = rows
|
||||
|
||||
def __json__(self):
|
||||
data = {
|
||||
"type": "benchmark_result",
|
||||
"context": self.context,
|
||||
"rows": self.rows,
|
||||
}
|
||||
return data
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, data):
|
||||
assert data["type"] == "benchmark_result"
|
||||
self = cls(data["context"], data["rows"])
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def load(cls, fpath):
|
||||
with open(fpath) as file:
|
||||
data = json.load(file)
|
||||
self = cls.from_json(data)
|
||||
return self
|
||||
|
||||
def to_result_list(self):
|
||||
"""
|
||||
Returns a list of result objects suitable for ResultAnalysis
|
||||
|
||||
Returns:
|
||||
List[Result]
|
||||
"""
|
||||
from json_benchmarks.benchmarker import result_analysis
|
||||
|
||||
results = []
|
||||
for row in self.rows:
|
||||
result = result_analysis.Result(
|
||||
name=row["name"],
|
||||
metrics=row["metrics"],
|
||||
params=row["params"].copy(),
|
||||
)
|
||||
machine = self.context["machine"]
|
||||
assert not ub.dict_isect(result.params, machine)
|
||||
result.params.update(machine)
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
|
||||
class Benchmarker:
|
||||
"""
|
||||
Helper to organize the execution and serialization of a benchmark
|
||||
|
||||
Example:
|
||||
>>> import numpy as np
|
||||
>>> impl_lut = {
|
||||
>>> 'numpy': np.sum,
|
||||
>>> 'builtin': sum,
|
||||
>>> }
|
||||
>>> def data_lut(params):
|
||||
>>> item = 42 if params['dtype'] == 'int' else 42.0
|
||||
>>> data = [item] * params['size']
|
||||
>>> return data
|
||||
>>> basis = {
|
||||
>>> 'impl': ['builtin', 'numpy'],
|
||||
>>> 'size': [10, 10000],
|
||||
>>> 'dtype': ['int', 'float'],
|
||||
>>> }
|
||||
>>> self = Benchmarker(name='demo', num=10, bestof=3, basis=basis)
|
||||
>>> for params in self.iter_params():
|
||||
>>> impl = impl_lut[params['impl']]
|
||||
>>> data = data_lut(params)
|
||||
>>> for timer in self.measure():
|
||||
>>> with timer:
|
||||
>>> impl(data)
|
||||
>>> print('self.result = {}'.format(ub.repr2(self.result.__json__(), sort=0, nl=2, precision=8)))
|
||||
>>> dpath = ub.Path.appdir('benchmarker/demo').ensuredir()
|
||||
>>> self.dump_in_dpath(dpath)
|
||||
"""
|
||||
|
||||
def __init__(self, basis={}, verbose=1, **kwargs):
|
||||
self.basis = basis
|
||||
|
||||
self.config = BenchmarkerConfig(**kwargs)
|
||||
|
||||
self.ti = timerit.Timerit(
|
||||
num=self.config.num,
|
||||
bestof=self.config.bestof,
|
||||
verbose=verbose,
|
||||
)
|
||||
self.context = ProcessContext(name=self.config.name)
|
||||
self.rows = []
|
||||
self.RECORD_ALL = 0
|
||||
self.result = None
|
||||
|
||||
def dump_in_dpath(self, dpath):
|
||||
dpath = ub.Path(dpath)
|
||||
timestamp = self.context.obj["stop_timestamp"]
|
||||
fname = f"benchmarks_{self.config.name}_{timestamp}.json"
|
||||
fpath = dpath / fname
|
||||
|
||||
with open(fpath, "w") as file:
|
||||
json.dump(self.result.__json__(), file)
|
||||
return fpath
|
||||
|
||||
def iter_params(self):
|
||||
self.context.start()
|
||||
if isinstance(self.basis, dict):
|
||||
grid_iter = ub.named_product(self.basis)
|
||||
else:
|
||||
grid_iter = ub.flatten([ub.named_product(b) for b in self.basis])
|
||||
|
||||
for params in grid_iter:
|
||||
self.params = params
|
||||
self.key = ub.repr2(params, compact=1, si=1)
|
||||
yield params
|
||||
obj = self.context.stop()
|
||||
self.result = BenchmarkerResult(obj, self.rows)
|
||||
|
||||
def measure(self):
|
||||
yield from self.ti.reset(self.key)
|
||||
|
||||
rows = self.rows
|
||||
ti = self.ti
|
||||
key = self.key
|
||||
params = self.params
|
||||
times = ti.robust_times()
|
||||
if self.RECORD_ALL:
|
||||
for time in times:
|
||||
metrics = {
|
||||
"time": time,
|
||||
}
|
||||
row = {
|
||||
"name": key,
|
||||
"metrics": metrics,
|
||||
"params": params,
|
||||
}
|
||||
rows.append(row)
|
||||
else:
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
|
||||
times = np.array(ti.robust_times())
|
||||
metrics = util_stats.stats_dict(times, "_time")
|
||||
row = {
|
||||
"metrics": metrics,
|
||||
"params": params,
|
||||
"name": key,
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
|
||||
def _test_demo():
|
||||
import numpy as np
|
||||
|
||||
from json_benchmarks.benchmarker import BenchmarkerResult, result_analysis
|
||||
from json_benchmarks.benchmarker.benchmarker import Benchmarker
|
||||
|
||||
impl_lut = {
|
||||
"numpy": np.sum,
|
||||
"builtin": sum,
|
||||
}
|
||||
|
||||
def data_lut(params):
|
||||
item = 42 if params["dtype"] == "int" else 42.0
|
||||
data = [item] * params["size"]
|
||||
return data
|
||||
|
||||
basis = {
|
||||
"impl": ["builtin", "numpy"],
|
||||
"size": [10, 10000],
|
||||
"dtype": ["int", "float"],
|
||||
}
|
||||
|
||||
dpath = ub.Path.appdir("benchmarker/agg_demo").delete().ensuredir()
|
||||
|
||||
def run_one_benchmark():
|
||||
self = Benchmarker(name="agg_demo", num=10, bestof=3, basis=basis)
|
||||
for params in self.iter_params():
|
||||
impl = impl_lut[params["impl"]]
|
||||
data = data_lut(params)
|
||||
for timer in self.measure():
|
||||
with timer:
|
||||
impl(data)
|
||||
fpath = self.dump_in_dpath(dpath)
|
||||
return fpath
|
||||
|
||||
# Run the benchmark multiple times
|
||||
fpaths = []
|
||||
for _ in range(5):
|
||||
fpath = run_one_benchmark()
|
||||
fpaths.append(fpath)
|
||||
|
||||
results = []
|
||||
for fpath in fpaths:
|
||||
data = json.loads(fpath.read_text())
|
||||
for row in data["rows"]:
|
||||
result = BenchmarkerResult.load(fpath)
|
||||
results.extend(result.to_result_list())
|
||||
|
||||
analysis = result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=["min", "mean"],
|
||||
params=["impl"],
|
||||
metric_objectives={
|
||||
"min": "min",
|
||||
"mean": "min",
|
||||
},
|
||||
)
|
||||
analysis.analysis()
|
||||
# single_df = pd.DataFrame(data['rows'])
|
||||
# context = data['context']
|
||||
# single_df
|
|
@ -0,0 +1,123 @@
|
|||
import platform
|
||||
import socket
|
||||
import sys
|
||||
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
class ProcessContext:
|
||||
"""
|
||||
Context manager to track the context under which a result was computed
|
||||
|
||||
Example:
|
||||
>>> from json_benchmarks.benchmarker.process_context import * # NOQA
|
||||
>>> self = ProcessContext()
|
||||
>>> obj = self.start().stop()
|
||||
>>> print('obj = {}'.format(ub.repr2(obj, nl=2)))
|
||||
"""
|
||||
|
||||
def __init__(self, name=None, args=None, config=None):
|
||||
if args is None:
|
||||
args = sys.argv
|
||||
|
||||
self.obj = {
|
||||
"type": "process_context",
|
||||
"name": name,
|
||||
"args": args,
|
||||
"config": config,
|
||||
"machine": None,
|
||||
"start_timestamp": None,
|
||||
"stop_timestamp": None,
|
||||
}
|
||||
|
||||
def _timestamp(self):
|
||||
import datetime
|
||||
|
||||
timestamp = (
|
||||
datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
|
||||
)
|
||||
timestamp = timestamp.replace(":", "")
|
||||
# timestamp = ub.timestamp()
|
||||
return timestamp
|
||||
|
||||
def _hostinfo(self):
|
||||
return {
|
||||
"host": socket.gethostname(),
|
||||
"user": ub.Path(ub.userhome()).name,
|
||||
# 'cwd': os.getcwd(),
|
||||
}
|
||||
|
||||
def _osinfo(self):
|
||||
(
|
||||
uname_system,
|
||||
_,
|
||||
uname_release,
|
||||
uname_version,
|
||||
_,
|
||||
uname_processor,
|
||||
) = platform.uname()
|
||||
return {
|
||||
"os_name": uname_system,
|
||||
"os_release": uname_release,
|
||||
"os_version": uname_version,
|
||||
"arch": uname_processor,
|
||||
}
|
||||
|
||||
def _pyinfo(self):
|
||||
return {
|
||||
"py_impl": platform.python_implementation(),
|
||||
"py_version": sys.version.replace("\n", ""),
|
||||
}
|
||||
|
||||
def _meminfo(self):
|
||||
import psutil
|
||||
|
||||
# TODO: could collect memory info at start and stop and intermediate
|
||||
# stages. Here we just want info that is static wrt to the machine.
|
||||
# For now, just get the total available.
|
||||
svmem_info = psutil.virtual_memory()
|
||||
return {
|
||||
"mem_total": svmem_info.total,
|
||||
}
|
||||
|
||||
def _cpuinfo(self):
|
||||
import cpuinfo
|
||||
|
||||
_cpu_info = cpuinfo.get_cpu_info()
|
||||
cpu_info = {
|
||||
"cpu_brand": _cpu_info["brand_raw"],
|
||||
}
|
||||
return cpu_info
|
||||
|
||||
def _machine(self):
|
||||
return ub.dict_union(
|
||||
self._hostinfo(),
|
||||
self._meminfo(),
|
||||
self._cpuinfo(),
|
||||
self._osinfo(),
|
||||
self._pyinfo(),
|
||||
)
|
||||
|
||||
def start(self):
|
||||
self.obj.update(
|
||||
{
|
||||
"machine": self._machine(),
|
||||
"start_timestamp": self._timestamp(),
|
||||
"stop_timestamp": None,
|
||||
}
|
||||
)
|
||||
return self
|
||||
|
||||
def stop(self):
|
||||
self.obj.update(
|
||||
{
|
||||
"stop_timestamp": self._timestamp(),
|
||||
}
|
||||
)
|
||||
return self.obj
|
||||
|
||||
def __enter__(self):
|
||||
return self.start()
|
||||
|
||||
def __exit__(self, a, b, c):
|
||||
self.stop()
|
|
@ -0,0 +1,1089 @@
|
|||
import itertools as it
|
||||
import math
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import scipy
|
||||
import scipy.stats # NOQA
|
||||
import ubelt as ub
|
||||
|
||||
# a list of common objectives
|
||||
DEFAULT_METRIC_TO_OBJECTIVE = {
|
||||
"time": "min",
|
||||
"ap": "max",
|
||||
"acc": "max",
|
||||
"f1": "max",
|
||||
"mcc": "max",
|
||||
#
|
||||
"loss": "min",
|
||||
"brier": "min",
|
||||
}
|
||||
|
||||
|
||||
class Result(ub.NiceRepr):
|
||||
"""
|
||||
Storage of names, parameters, and quality metrics for a single experiment.
|
||||
|
||||
Attributes:
|
||||
name (str | None):
|
||||
Name of the experiment. Optional. This is unused in the analysis.
|
||||
(i.e. names will never be used computationally. Use them for keys)
|
||||
|
||||
params (Dict[str, object]): configuration of the experiment.
|
||||
This is a dictionary mapping a parameter name to its value.
|
||||
|
||||
metrics (Dict[str, float]): quantitative results of the experiment
|
||||
This is a dictionary for each quality metric computed on this
|
||||
result.
|
||||
|
||||
meta (Dict | None): any other metadata about this result.
|
||||
This is unused in the analysis.
|
||||
|
||||
Example:
|
||||
>>> self = Result.demo(rng=32)
|
||||
>>> print('self = {}'.format(self))
|
||||
self = <Result(name=53f57161,f1=0.33,acc=0.75,param1=1,param2=6.67,param3=a)>
|
||||
|
||||
Example:
|
||||
>>> self = Result.demo(mode='alt', rng=32)
|
||||
>>> print('self = {}'.format(self))
|
||||
"""
|
||||
|
||||
def __init__(self, name, params, metrics, meta=None):
|
||||
self.name = name
|
||||
self.params = params
|
||||
self.metrics = metrics
|
||||
self.meta = meta
|
||||
|
||||
def to_dict(self):
|
||||
row = ub.dict_union({"name": self.name}, self.metrics, self.params)
|
||||
return row
|
||||
|
||||
def __nice__(self):
|
||||
row = self.to_dict()
|
||||
text = ub.repr2(row, compact=True, precision=2, sort=0)
|
||||
return text
|
||||
|
||||
@classmethod
|
||||
def demo(cls, mode="null", rng=None):
|
||||
import string
|
||||
|
||||
import kwarray
|
||||
import numpy as np
|
||||
|
||||
rng = kwarray.ensure_rng(rng)
|
||||
|
||||
if mode == "null":
|
||||
# The null hypothesis should generally be true here,
|
||||
# there is no relation between the results and parameters
|
||||
demo_param_space = {
|
||||
"param1": list(range(3)),
|
||||
"param2": np.linspace(0, 10, 10),
|
||||
"param3": list(string.ascii_lowercase[0:3]),
|
||||
}
|
||||
params = {k: rng.choice(b) for k, b in demo_param_space.items()}
|
||||
metrics = {
|
||||
"f1": rng.rand(),
|
||||
"acc": rng.rand(),
|
||||
}
|
||||
elif mode == "alt":
|
||||
# The alternative hypothesis should be true here, there is a
|
||||
# relationship between results two of the params.
|
||||
from scipy.special import expit
|
||||
|
||||
params = {
|
||||
"u": rng.randint(0, 1 + 1),
|
||||
"v": rng.randint(-1, 1 + 1),
|
||||
"x": rng.randint(-2, 3 + 1),
|
||||
"y": rng.randint(-1, 2 + 1),
|
||||
"z": rng.randint(-0, 3 + 1),
|
||||
}
|
||||
noise = np.random.randn() * 1
|
||||
r = 3 * params["x"] + params["y"] ** 2 + 0.3 * params["z"] ** 3
|
||||
acc = expit(r / 20 + noise)
|
||||
metrics = {
|
||||
"acc": acc,
|
||||
}
|
||||
else:
|
||||
raise KeyError(mode)
|
||||
name = ub.hash_data(params)[0:8]
|
||||
self = cls(name, params, metrics)
|
||||
return self
|
||||
|
||||
|
||||
class ResultAnalysis(ub.NiceRepr):
|
||||
"""
|
||||
Groups and runs stats on results
|
||||
|
||||
Runs statistical tests on sets of configuration-metrics pairs
|
||||
|
||||
Attributes:
|
||||
results (List[Result]): list of results
|
||||
|
||||
ignore_metrics (Set[str]): metrics to ignore
|
||||
|
||||
ignore_params (Set[str]): parameters to ignore
|
||||
|
||||
metric_objectives (Dict[str, str]):
|
||||
indicate if each metrix should be maximized "max" or minimized
|
||||
"min"
|
||||
|
||||
metrics (List[str]):
|
||||
only consider these metrics
|
||||
|
||||
params (List[str]):
|
||||
if given, only consider these params
|
||||
|
||||
abalation_orders (Set[int]):
|
||||
The number of parameters to be held constant in each statistical
|
||||
grouping. Defaults to 1, so it groups together results where 1
|
||||
variable is held constant. Including 2 will include pairwise
|
||||
settings of parameters to be held constant. Using -1 or -2 means
|
||||
all but 1 or 2 parameters will be held constant, repsectively.
|
||||
|
||||
default_objective (str):
|
||||
assume max or min for unknown metrics
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo()
|
||||
>>> self.analysis()
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(num=5000, mode='alt')
|
||||
>>> self.analysis()
|
||||
|
||||
Example:
|
||||
>>> # Given a list of experiments, configs, and results
|
||||
>>> # Create a ResultAnalysis object
|
||||
>>> results = ResultAnalysis([
|
||||
>>> Result('expt0', {'param1': 2, 'param3': 'b'}, {'f1': 0.75}),
|
||||
>>> Result('expt1', {'param1': 0, 'param3': 'c'}, {'f1': 0.92}),
|
||||
>>> Result('expt2', {'param1': 1, 'param3': 'b'}, {'f1': 0.77}),
|
||||
>>> Result('expt3', {'param1': 1, 'param3': 'a'}, {'f1': 0.67}),
|
||||
>>> Result('expt4', {'param1': 0, 'param3': 'c'}, {'f1': 0.98}),
|
||||
>>> Result('expt5', {'param1': 2, 'param3': 'a'}, {'f1': 0.86}),
|
||||
>>> Result('expt6', {'param1': 1, 'param3': 'c'}, {'f1': 0.77}),
|
||||
>>> Result('expt7', {'param1': 1, 'param3': 'c'}, {'f1': 0.41}),
|
||||
>>> Result('expt8', {'param1': 1, 'param3': 'a'}, {'f1': 0.64}),
|
||||
>>> Result('expt9', {'param1': 0, 'param3': 'b'}, {'f1': 0.95}),
|
||||
>>> ])
|
||||
>>> # Calling the analysis method prints something like the following
|
||||
>>> results.analysis()
|
||||
|
||||
PARAMETER 'param1' - f1
|
||||
=======================
|
||||
f1 mean std max min num best
|
||||
param1
|
||||
0 0.950 0.030000 0.98 0.92 3.0 0.98
|
||||
2 0.805 0.077782 0.86 0.75 2.0 0.86
|
||||
1 0.652 0.147377 0.77 0.41 5.0 0.77
|
||||
|
||||
ANOVA hypothesis (roughly): the param 'param1' has no effect on the metric
|
||||
Reject this hypothesis if the p value is less than a threshold
|
||||
Rank-ANOVA: p=0.0397
|
||||
Mean-ANOVA: p=0.0277
|
||||
|
||||
Pairwise T-Tests
|
||||
Is param1=0 about as good as param1=2?
|
||||
ttest_ind: p=0.2058
|
||||
Is param1=1 about as good as param1=2?
|
||||
ttest_ind: p=0.1508
|
||||
|
||||
|
||||
PARAMETER 'param3' - f1
|
||||
=======================
|
||||
f1 mean std max min num best
|
||||
param3
|
||||
c 0.770000 0.255734 0.98 0.41 4.0 0.98
|
||||
b 0.823333 0.110151 0.95 0.75 3.0 0.95
|
||||
a 0.723333 0.119304 0.86 0.64 3.0 0.86
|
||||
|
||||
ANOVA hypothesis (roughly): the param 'param3' has no effect on the metric
|
||||
Reject this hypothesis if the p value is less than a threshold
|
||||
Rank-ANOVA: p=0.5890
|
||||
Mean-ANOVA: p=0.8145
|
||||
|
||||
Pairwise T-Tests
|
||||
Is param3=b about as good as param3=c?
|
||||
ttest_ind: p=0.7266
|
||||
Is param3=a about as good as param3=b?
|
||||
ttest_ind: p=0.3466
|
||||
ttest_rel: p=0.3466
|
||||
Is param3=a about as good as param3=c?
|
||||
ttest_ind: p=0.7626
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
results,
|
||||
metrics=None,
|
||||
params=None,
|
||||
ignore_params=None,
|
||||
ignore_metrics=None,
|
||||
metric_objectives=None,
|
||||
abalation_orders={1},
|
||||
default_objective="max",
|
||||
p_threshold=0.05,
|
||||
):
|
||||
self.results = results
|
||||
if ignore_metrics is None:
|
||||
ignore_metrics = set()
|
||||
if ignore_params is None:
|
||||
ignore_params = set()
|
||||
self.ignore_params = ignore_params
|
||||
self.ignore_metrics = ignore_metrics
|
||||
|
||||
self.abalation_orders = abalation_orders
|
||||
self.default_objective = default_objective
|
||||
|
||||
# encode if we want to maximize or minimize a metric
|
||||
if metric_objectives is None:
|
||||
metric_objectives = {}
|
||||
self.metric_objectives = DEFAULT_METRIC_TO_OBJECTIVE.copy()
|
||||
self.metric_objectives.update(metric_objectives)
|
||||
|
||||
self.params = params
|
||||
self.metrics = metrics
|
||||
self.statistics = None
|
||||
self.p_threshold = p_threshold
|
||||
|
||||
self._description = {}
|
||||
self._description["built"] = False
|
||||
self._description["num_results"] = len(self.results)
|
||||
|
||||
def __nice__(self):
|
||||
return ub.repr2(self._description, si=1, sv=1)
|
||||
|
||||
@classmethod
|
||||
def demo(cls, num=10, mode="null", rng=None):
|
||||
import kwarray
|
||||
|
||||
rng = kwarray.ensure_rng(rng)
|
||||
results = [Result.demo(mode=mode, rng=rng) for _ in range(num)]
|
||||
if mode == "null":
|
||||
self = cls(results, metrics={"f1", "acc"})
|
||||
else:
|
||||
self = cls(results, metrics={"acc"})
|
||||
return self
|
||||
|
||||
def run(self):
|
||||
self.build()
|
||||
self.report()
|
||||
|
||||
def analysis(self):
|
||||
# alias for run
|
||||
return self.run()
|
||||
self.build()
|
||||
self.report()
|
||||
|
||||
@ub.memoize_property
|
||||
def table(self):
|
||||
rows = [r.to_dict() for r in self.results]
|
||||
table = pd.DataFrame(rows)
|
||||
return table
|
||||
|
||||
def metric_table(self):
|
||||
rows = [r.to_dict() for r in self.results]
|
||||
table = pd.DataFrame(rows)
|
||||
return table
|
||||
|
||||
@ub.memoize_property
|
||||
def varied(self):
|
||||
config_rows = [r.params for r in self.results]
|
||||
sentinel = object()
|
||||
# pd.DataFrame(config_rows).channels
|
||||
varied = dict(ub.varied_values(config_rows, default=sentinel, min_variations=1))
|
||||
# remove nans
|
||||
varied = {
|
||||
k: {v for v in vs if not (isinstance(v, float) and math.isnan(v))}
|
||||
for k, vs in varied.items()
|
||||
}
|
||||
varied = {k: vs for k, vs in varied.items() if len(vs)}
|
||||
return varied
|
||||
|
||||
def abalation_groups(self, param_group, k=2):
|
||||
"""
|
||||
Return groups where the specified parameter(s) are varied, but all
|
||||
other non-ignored parameters are held the same.
|
||||
|
||||
Args:
|
||||
param_group (str | List[str]):
|
||||
One or more parameters that are allowed to vary
|
||||
|
||||
k (int):
|
||||
minimum number of items a group must contain to be returned
|
||||
|
||||
Returns:
|
||||
List[DataFrame]:
|
||||
a list of subsets of in the table where all but the specified
|
||||
(non-ignored) parameters are allowed to vary.
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo()
|
||||
>>> param = 'param2'
|
||||
>>> self.abalation_groups(param)
|
||||
"""
|
||||
if not ub.iterable(param_group):
|
||||
param_group = [param_group]
|
||||
table = self.table
|
||||
config_rows = [r.params for r in self.results]
|
||||
config_keys = list(map(set, config_rows))
|
||||
# if self.params:
|
||||
# config_keys = list(self.params)
|
||||
if self.ignore_params:
|
||||
config_keys = [c - self.ignore_params for c in config_keys]
|
||||
isect_params = set.intersection(*config_keys)
|
||||
other_params = sorted(isect_params - set(param_group))
|
||||
groups = []
|
||||
for key, group in table.groupby(other_params, dropna=False):
|
||||
if len(group) >= k:
|
||||
groups.append(group)
|
||||
return groups
|
||||
|
||||
def _objective_is_ascending(self, metric_key):
|
||||
"""
|
||||
Args:
|
||||
metric_key (str): the metric in question
|
||||
|
||||
Returns:
|
||||
bool:
|
||||
True if we should minimize the objective (lower is better)
|
||||
False if we should maximize the objective (higher is better)
|
||||
"""
|
||||
objective = self.metric_objectives.get(metric_key, None)
|
||||
if objective is None:
|
||||
warnings.warn(f"warning assume {self.default_objective} for {metric_key=}")
|
||||
objective = self.default_objective
|
||||
ascending = objective == "min"
|
||||
return ascending
|
||||
|
||||
def abalate(self, param_group):
|
||||
"""
|
||||
TODO:
|
||||
rectify with test-group
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(100)
|
||||
>>> param = 'param2'
|
||||
>>> # xdoctest: +REQUIRES(module:openskill)
|
||||
>>> self.abalate(param)
|
||||
|
||||
>>> self = ResultAnalysis.demo()
|
||||
>>> param_group = ['param2', 'param3']
|
||||
>>> # xdoctest: +REQUIRES(module:openskill)
|
||||
>>> self.abalate(param_group)
|
||||
"""
|
||||
if self.table is None:
|
||||
self.table = self.build_table()
|
||||
if not ub.iterable(param_group):
|
||||
param_group = [param_group]
|
||||
|
||||
# For hashable generic dictionary
|
||||
from collections import namedtuple
|
||||
|
||||
gd = namedtuple("config", param_group)
|
||||
|
||||
# from types import SimpleNamespace
|
||||
param_unique_vals_ = (
|
||||
self.table[param_group].drop_duplicates().to_dict("records")
|
||||
)
|
||||
param_unique_vals = [gd(**d) for d in param_unique_vals_]
|
||||
# param_unique_vals = {p: self.table[p].unique().tolist() for p in param_group}
|
||||
score_improvements = ub.ddict(list)
|
||||
scored_obs = []
|
||||
skillboard = SkillTracker(param_unique_vals)
|
||||
groups = self.abalation_groups(param_group, k=2)
|
||||
|
||||
for group in groups:
|
||||
for metric_key in self.metrics:
|
||||
ascending = self._objective_is_ascending(metric_key)
|
||||
|
||||
group = group.sort_values(metric_key, ascending=ascending)
|
||||
subgroups = group.groupby(param_group)
|
||||
if ascending:
|
||||
best_idx = subgroups[metric_key].idxmax()
|
||||
else:
|
||||
best_idx = subgroups[metric_key].idxmin()
|
||||
best_group = group.loc[best_idx]
|
||||
best_group = best_group.sort_values(metric_key, ascending=ascending)
|
||||
|
||||
for x1, x2 in it.product(best_group.index, best_group.index):
|
||||
if x1 != x2:
|
||||
r1 = best_group.loc[x1]
|
||||
r2 = best_group.loc[x2]
|
||||
k1 = gd(**r1[param_group])
|
||||
k2 = gd(**r2[param_group])
|
||||
diff = r1[metric_key] - r2[metric_key]
|
||||
score_improvements[(k1, k2, metric_key)].append(diff)
|
||||
|
||||
# metric_vals = best_group[metric_key].values
|
||||
# diffs = metric_vals[None, :] - metric_vals[:, None]
|
||||
best_group.set_index(param_group)
|
||||
# best_group[param_group]
|
||||
# best_group[metric_key].diff()
|
||||
scored_ranking = best_group[param_group + [metric_key]].reset_index(
|
||||
drop=True
|
||||
)
|
||||
scored_obs.append(scored_ranking)
|
||||
ranking = [
|
||||
gd(**d) for d in scored_ranking[param_group].to_dict("records")
|
||||
]
|
||||
skillboard.observe(ranking)
|
||||
|
||||
print(
|
||||
"skillboard.ratings = {}".format(
|
||||
ub.repr2(skillboard.ratings, nl=1, align=":")
|
||||
)
|
||||
)
|
||||
win_probs = skillboard.predict_win()
|
||||
print(f"win_probs = {ub.repr2(win_probs, nl=1)}")
|
||||
for key, improves in score_improvements.items():
|
||||
k1, k2, metric_key = key
|
||||
improves = np.array(improves)
|
||||
pos_delta = improves[improves > 0]
|
||||
print(
|
||||
f"\nWhen {k1} is better than {k2}, the improvement in {metric_key} is"
|
||||
)
|
||||
print(pd.DataFrame([pd.Series(pos_delta).describe().T]))
|
||||
return scored_obs
|
||||
|
||||
def test_group(self, param_group, metric_key):
|
||||
"""
|
||||
Get stats for a particular metric / constant group
|
||||
|
||||
Args:
|
||||
param_group (List[str]): group of parameters to hold constant.
|
||||
metric_key (str): The metric to test.
|
||||
|
||||
Returns:
|
||||
dict
|
||||
# TODO : document these stats clearly and accurately
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(num=100)
|
||||
>>> print(self.table)
|
||||
>>> param_group = ['param2', 'param1']
|
||||
>>> metric_key = 'f1'
|
||||
>>> stats_row = self.test_group(param_group, metric_key)
|
||||
>>> print('stats_row = {}'.format(ub.repr2(stats_row, nl=2, sort=0, precision=2)))
|
||||
"""
|
||||
param_group_name = ",".join(param_group)
|
||||
stats_row = {
|
||||
"param_name": param_group_name,
|
||||
"metric": metric_key,
|
||||
}
|
||||
# param_values = varied[param_name]
|
||||
# stats_row['param_values'] = param_values
|
||||
ascending = self._objective_is_ascending(metric_key)
|
||||
|
||||
# Find all items with this particular param value
|
||||
value_to_metric_group = {}
|
||||
value_to_metric_stats = {}
|
||||
value_to_metric = {}
|
||||
|
||||
varied_cols = sorted(self.varied.keys())
|
||||
|
||||
# Not sure if this is the right name, these are the other param keys
|
||||
# that we are not directly investigating, but might have an impact.
|
||||
# We use these to select comparable rows for pairwise t-tests
|
||||
nuisance_cols = sorted(set(self.varied.keys()) - set(param_group))
|
||||
|
||||
for param_value, group in self.table.groupby(param_group):
|
||||
metric_group = group[["name", metric_key] + varied_cols]
|
||||
metric_vals = metric_group[metric_key]
|
||||
metric_vals = metric_vals.dropna()
|
||||
if len(metric_vals) > 0:
|
||||
metric_stats = metric_vals.describe()
|
||||
value_to_metric_stats[param_value] = metric_stats
|
||||
value_to_metric_group[param_value] = metric_group
|
||||
value_to_metric[param_value] = metric_vals.values
|
||||
|
||||
moments = pd.DataFrame(value_to_metric_stats).T
|
||||
moments = moments.sort_values("mean", ascending=ascending)
|
||||
moments.index.name = param_group_name
|
||||
moments.columns.name = metric_key
|
||||
ranking = moments["mean"].index.to_list()
|
||||
param_to_rank = ub.invert_dict(dict(enumerate(ranking)))
|
||||
|
||||
# Determine a set of value pairs to do pairwise comparisons on
|
||||
value_pairs = ub.oset()
|
||||
# value_pairs.update(
|
||||
# map(frozenset, ub.iter_window(moments.index, 2)))
|
||||
value_pairs.update(
|
||||
map(
|
||||
frozenset,
|
||||
ub.iter_window(
|
||||
moments.sort_values("mean", ascending=ascending).index, 2
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
|
||||
# If the researcher can make the assumptions of an identically
|
||||
# shaped and scaled distribution for all groups, except for any
|
||||
# difference in medians, then the null hypothesis is that the
|
||||
# medians of all groups are equal, and the alternative
|
||||
# hypothesis is that at least one population median of one
|
||||
# group is different from the population median of at least one
|
||||
# other group.
|
||||
try:
|
||||
anova_krus_result = scipy.stats.kruskal(*value_to_metric.values())
|
||||
except ValueError:
|
||||
anova_krus_result = scipy.stats.stats.KruskalResult(np.nan, np.nan)
|
||||
|
||||
# https://en.wikipedia.org/wiki/One-way_analysis_of_variance
|
||||
# The One-Way ANOVA tests the null hypothesis, which states
|
||||
# that samples in all groups are drawn from populations with
|
||||
# the same mean values
|
||||
if len(value_to_metric) > 1:
|
||||
anova_1way_result = scipy.stats.f_oneway(*value_to_metric.values())
|
||||
else:
|
||||
anova_1way_result = scipy.stats.stats.F_onewayResult(np.nan, np.nan)
|
||||
|
||||
stats_row["anova_rank_H"] = anova_krus_result.statistic
|
||||
stats_row["anova_rank_p"] = anova_krus_result.pvalue
|
||||
stats_row["anova_mean_F"] = anova_1way_result.statistic
|
||||
stats_row["anova_mean_p"] = anova_1way_result.pvalue
|
||||
stats_row["moments"] = moments
|
||||
|
||||
pair_stats_list = []
|
||||
for pair in value_pairs:
|
||||
pair_stats = {}
|
||||
param_val1, param_val2 = pair
|
||||
|
||||
metric_vals1 = value_to_metric[param_val1]
|
||||
metric_vals2 = value_to_metric[param_val2]
|
||||
|
||||
rank1 = param_to_rank[param_val1]
|
||||
rank2 = param_to_rank[param_val2]
|
||||
pair_stats["winner"] = param_val1 if rank1 < rank2 else param_val2
|
||||
pair_stats["value1"] = param_val1
|
||||
pair_stats["value2"] = param_val2
|
||||
pair_stats["n1"] = len(metric_vals1)
|
||||
pair_stats["n2"] = len(metric_vals2)
|
||||
|
||||
TEST_ONLY_FOR_DIFFERENCE = True
|
||||
if TEST_ONLY_FOR_DIFFERENCE:
|
||||
if ascending:
|
||||
# We want to minimize the metric
|
||||
alternative = "less" if rank1 < rank2 else "greater"
|
||||
else:
|
||||
# We want to maximize the metric
|
||||
alternative = "greater" if rank1 < rank2 else "less"
|
||||
else:
|
||||
alternative = "two-sided"
|
||||
|
||||
ind_kw = dict(
|
||||
equal_var=False,
|
||||
alternative=alternative,
|
||||
)
|
||||
ttest_ind_result = scipy.stats.ttest_ind(
|
||||
metric_vals1, metric_vals2, **ind_kw
|
||||
)
|
||||
|
||||
if 0:
|
||||
from benchmarker.benchmarker import stats_dict
|
||||
|
||||
stats1 = stats_dict(metric_vals1)
|
||||
stats2 = stats_dict(metric_vals2)
|
||||
scipy.stats.ttest_ind_from_stats(
|
||||
stats1["mean"],
|
||||
stats1["std"],
|
||||
stats1["nobs"],
|
||||
stats2["mean"],
|
||||
stats2["std"],
|
||||
stats2["nobs"],
|
||||
**ind_kw,
|
||||
)
|
||||
# metric_vals1, metric_vals2, equal_var=False)
|
||||
|
||||
scipy.stats.ttest_ind_from_stats
|
||||
|
||||
pair_stats["ttest_ind"] = ttest_ind_result
|
||||
|
||||
# Do relative checks, need to find comparable subgroups
|
||||
metric_group1 = value_to_metric_group[param_val1]
|
||||
metric_group2 = value_to_metric_group[param_val2]
|
||||
nuisance_vals1 = metric_group1[nuisance_cols]
|
||||
nuisance_vals2 = metric_group2[nuisance_cols]
|
||||
nk_to_group1 = dict(list(nuisance_vals1.groupby(nuisance_cols)))
|
||||
nk_to_group2 = dict(list(nuisance_vals2.groupby(nuisance_cols)))
|
||||
common = set(nk_to_group1) & set(nk_to_group2)
|
||||
comparable_indexes1 = []
|
||||
comparable_indexes2 = []
|
||||
if common:
|
||||
for nk in common:
|
||||
group1 = nk_to_group1[nk]
|
||||
group2 = nk_to_group2[nk]
|
||||
# TODO: Not sure if taking the product of everything within
|
||||
# the comparable group is correct or not. I think it is ok.
|
||||
for i, j in it.product(group1.index, group2.index):
|
||||
comparable_indexes1.append(i)
|
||||
comparable_indexes2.append(j)
|
||||
|
||||
comparable_groups1 = metric_group1.loc[comparable_indexes1, metric_key]
|
||||
comparable_groups2 = metric_group2.loc[comparable_indexes2, metric_key]
|
||||
|
||||
# Does this need to have the values aligned?
|
||||
# I think that is the case giving my understanding of paired
|
||||
# t-tests, but the docs need a PR to make that more clear.
|
||||
ttest_rel_result = scipy.stats.ttest_rel(
|
||||
comparable_groups1, comparable_groups2
|
||||
)
|
||||
pair_stats["n_common"] = len(common)
|
||||
pair_stats["ttest_rel"] = ttest_rel_result
|
||||
pair_stats_list.append(pair_stats)
|
||||
|
||||
stats_row["pairwise"] = pair_stats_list
|
||||
return stats_row
|
||||
|
||||
def build(self):
|
||||
import itertools as it
|
||||
|
||||
if len(self.results) < 2:
|
||||
raise Exception("need at least 2 results")
|
||||
|
||||
varied = self.varied.copy()
|
||||
if self.ignore_params:
|
||||
for k in self.ignore_params:
|
||||
varied.pop(k, None)
|
||||
if self.params:
|
||||
varied = ub.dict_isect(varied, self.params)
|
||||
|
||||
# Experimental:
|
||||
# Find Auto-abalation groups
|
||||
# TODO: when the group size is -1, instead of showing all of the group
|
||||
# settings, for each group setting do the k=1 analysis within that group
|
||||
varied_param_names = list(varied.keys())
|
||||
num_varied_params = len(varied)
|
||||
held_constant_orders = {
|
||||
num_varied_params + i if i < 0 else i for i in self.abalation_orders
|
||||
}
|
||||
held_constant_orders = [i for i in held_constant_orders if i > 0]
|
||||
held_constant_groups = []
|
||||
for k in held_constant_orders:
|
||||
held_constant_groups.extend(
|
||||
list(map(list, it.combinations(varied_param_names, k)))
|
||||
)
|
||||
|
||||
if self.metrics is None:
|
||||
avail_metrics = set.intersection(
|
||||
*[set(r.metrics.keys()) for r in self.results]
|
||||
)
|
||||
metrics_of_interest = sorted(avail_metrics - set(self.ignore_metrics))
|
||||
else:
|
||||
metrics_of_interest = self.metrics
|
||||
self.metrics_of_interest = metrics_of_interest
|
||||
self._description["metrics_of_interest"] = metrics_of_interest
|
||||
self._description["num_groups"] = len(held_constant_groups)
|
||||
|
||||
# Analyze the impact of each parameter
|
||||
self.statistics = statistics = []
|
||||
for param_group in held_constant_groups:
|
||||
for metric_key in metrics_of_interest:
|
||||
stats_row = self.test_group(param_group, metric_key)
|
||||
statistics.append(stats_row)
|
||||
|
||||
self.stats_table = pd.DataFrame(
|
||||
[
|
||||
ub.dict_diff(d, {"pairwise", "param_values", "moments"})
|
||||
for d in self.statistics
|
||||
]
|
||||
)
|
||||
|
||||
if len(self.stats_table):
|
||||
self.stats_table = self.stats_table.sort_values("anova_rank_p")
|
||||
|
||||
self._description["built"] = True
|
||||
|
||||
def report(self):
|
||||
stat_groups = ub.group_items(self.statistics, key=lambda x: x["param_name"])
|
||||
stat_groups_items = list(stat_groups.items())
|
||||
|
||||
# Modify this order to change the grouping pattern
|
||||
grid = ub.named_product(
|
||||
{
|
||||
"stat_group_item": stat_groups_items,
|
||||
"metrics": self.metrics_of_interest,
|
||||
}
|
||||
)
|
||||
for grid_item in grid:
|
||||
self._report_one(grid_item)
|
||||
|
||||
print(self.stats_table)
|
||||
|
||||
def _report_one(self, grid_item):
|
||||
p_threshold = self.p_threshold
|
||||
metric_key = grid_item["metrics"]
|
||||
stat_groups_item = grid_item["stat_group_item"]
|
||||
|
||||
param_name, stat_group = stat_groups_item
|
||||
stats_row = ub.group_items(stat_group, key=lambda x: x["metric"])[metric_key][0]
|
||||
title = f"PARAMETER: {param_name} - METRIC: {metric_key}"
|
||||
print("\n\n")
|
||||
print(title)
|
||||
print("=" * len(title))
|
||||
print(stats_row["moments"])
|
||||
anova_rank_p = stats_row["anova_rank_p"]
|
||||
anova_mean_p = stats_row["anova_mean_p"]
|
||||
# Rougly speaking
|
||||
print("")
|
||||
print(f"ANOVA: If p is low, the param {param_name!r} might have an effect")
|
||||
print(
|
||||
ub.color_text(
|
||||
f" Rank-ANOVA: p={anova_rank_p:0.8f}",
|
||||
"green" if anova_rank_p < p_threshold else None,
|
||||
)
|
||||
)
|
||||
print(
|
||||
ub.color_text(
|
||||
f" Mean-ANOVA: p={anova_mean_p:0.8f}",
|
||||
"green" if anova_mean_p < p_threshold else None,
|
||||
)
|
||||
)
|
||||
print("")
|
||||
print("Pairwise T-Tests")
|
||||
for pairstat in stats_row["pairwise"]:
|
||||
# Is this backwards?
|
||||
value1 = pairstat["value1"]
|
||||
value2 = pairstat["value2"]
|
||||
winner = pairstat["winner"]
|
||||
if value2 == winner:
|
||||
value1, value2 = value2, value1
|
||||
print(
|
||||
f" If p is low, {param_name}={value1} may outperform {param_name}={value2}."
|
||||
)
|
||||
if "ttest_ind" in pairstat:
|
||||
ttest_ind_result = pairstat["ttest_ind"]
|
||||
print(
|
||||
ub.color_text(
|
||||
f" ttest_ind: p={ttest_ind_result.pvalue:0.8f}",
|
||||
"green" if ttest_ind_result.pvalue < p_threshold else None,
|
||||
)
|
||||
)
|
||||
if "ttest_rel" in pairstat:
|
||||
n_common = pairstat["n_common"]
|
||||
ttest_rel_result = pairstat["ttest_ind"]
|
||||
print(
|
||||
ub.color_text(
|
||||
f" ttest_rel: p={ttest_rel_result.pvalue:0.8f}, n_pairs={n_common}",
|
||||
"green" if ttest_rel_result.pvalue < p_threshold else None,
|
||||
)
|
||||
)
|
||||
|
||||
def conclusions(self):
|
||||
conclusions = []
|
||||
for stat in self.statistics:
|
||||
param_name = stat["param_name"]
|
||||
metric = stat["metric"]
|
||||
for pairstat in stat["pairwise"]:
|
||||
value1 = pairstat["value1"]
|
||||
value2 = pairstat["value2"]
|
||||
winner = pairstat["winner"]
|
||||
if value2 == winner:
|
||||
value1, value2 = value2, value1
|
||||
pvalue = stat = pairstat["ttest_ind"].pvalue
|
||||
txt = f"p={pvalue:0.8f}, If p is low, {param_name}={value1} may outperform {value2} on {metric}."
|
||||
conclusions.append(txt)
|
||||
return conclusions
|
||||
|
||||
def plot(self, xlabel, metric_key, group_labels, data=None, **kwargs):
|
||||
"""
|
||||
Args:
|
||||
group_labels (dict):
|
||||
Tells seaborn what attributes to use to distinsuish curves like
|
||||
hue, size, marker. Also can contain "col" for use with
|
||||
FacetGrid, and "fig" to separate different configurations into
|
||||
different figures.
|
||||
|
||||
Returns:
|
||||
List[Dict]:
|
||||
A list for each figure containing info abou that figure for any
|
||||
postprocessing.
|
||||
|
||||
Example:
|
||||
>>> self = ResultAnalysis.demo(num=1000, mode='alt')
|
||||
>>> self.analysis()
|
||||
>>> print('self = {}'.format(self))
|
||||
>>> print('self.varied = {}'.format(ub.repr2(self.varied, nl=1)))
|
||||
>>> # xdoctest: +REQUIRES(module:kwplot)
|
||||
>>> import kwplot
|
||||
>>> kwplot.autosns()
|
||||
>>> xlabel = 'x'
|
||||
>>> metric_key = 'acc'
|
||||
>>> group_labels = {
|
||||
>>> 'fig': ['u'],
|
||||
>>> 'col': ['y', 'v'],
|
||||
>>> 'hue': ['z'],
|
||||
>>> 'size': [],
|
||||
>>> }
|
||||
>>> kwargs = {'xscale': 'log', 'yscale': 'log'}
|
||||
>>> self.plot(xlabel, metric_key, group_labels, **kwargs)
|
||||
"""
|
||||
print("Init seaborn and pyplot")
|
||||
import seaborn as sns
|
||||
|
||||
sns.set()
|
||||
from matplotlib import pyplot as plt # NOQA
|
||||
|
||||
print("Starting plot")
|
||||
|
||||
if data is None:
|
||||
data = self.table
|
||||
data = data.sort_values(metric_key)
|
||||
|
||||
print("Compute group labels")
|
||||
for gname, labels in group_labels.items():
|
||||
if len(labels):
|
||||
new_col = []
|
||||
for row in data[labels].to_dict("records"):
|
||||
item = ub.repr2(row, compact=1, si=1)
|
||||
new_col.append(item)
|
||||
gkey = gname + "_key"
|
||||
data[gkey] = new_col
|
||||
|
||||
plot_kws = {
|
||||
"x": xlabel,
|
||||
"y": metric_key,
|
||||
}
|
||||
for gname, labels in group_labels.items():
|
||||
if labels:
|
||||
plot_kws[gname] = gname + "_key"
|
||||
|
||||
# Your variables may change
|
||||
# ax = plt.figure().gca()
|
||||
fig_params = plot_kws.pop("fig", [])
|
||||
|
||||
facet_kws = {
|
||||
"sharex": True,
|
||||
"sharey": True,
|
||||
}
|
||||
# facet_kws['col'] = plot_kws.pop("col", None)
|
||||
# facet_kws['row'] = plot_kws.pop("row", None)
|
||||
# if not facet_kws['row']:
|
||||
# facet_kws['col_wrap'] = 5
|
||||
plot_kws["row"] = plot_kws.get("row", None)
|
||||
# if not plot_kws['row']:
|
||||
# plot_kws['col_wrap'] = 5
|
||||
|
||||
if not fig_params:
|
||||
groups = [("", data)]
|
||||
else:
|
||||
groups = data.groupby(fig_params)
|
||||
|
||||
if "marker" not in plot_kws:
|
||||
plot_kws["marker"] = "o"
|
||||
|
||||
# We will want to overwrite this with our own std estimate
|
||||
plot_kws["ci"] = "sd"
|
||||
# err_style='band',
|
||||
# err_kws=None,
|
||||
|
||||
# Use a consistent pallete across plots
|
||||
unique_hues = data["hue_key"].unique()
|
||||
palette = ub.dzip(unique_hues, sns.color_palette(n_colors=len(unique_hues)))
|
||||
plot_kws["palette"] = palette
|
||||
|
||||
# kwplot.close_figures()
|
||||
|
||||
plots = []
|
||||
base_fnum = 1
|
||||
print("Start plots")
|
||||
# hack
|
||||
hack_groups = [(k, v) for k, v in groups if k != "input=Complex object"]
|
||||
|
||||
for fnum, (fig_key, group) in enumerate(hack_groups, start=base_fnum):
|
||||
# TODO: seaborn doesn't give us any option to reuse an existing
|
||||
# figure or even specify what it's handle should be. A patch should
|
||||
# be submitted to add that feature, but in the meantime work around
|
||||
# it and use the figures they give us.
|
||||
|
||||
# fig = plt.figure(fnum)
|
||||
# fig.clf()
|
||||
|
||||
facet = sns.relplot(
|
||||
data=group,
|
||||
kind="line",
|
||||
# kind="scatter",
|
||||
facet_kws=facet_kws,
|
||||
**plot_kws,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_stats import aggregate_stats
|
||||
|
||||
# print(f'facet._col_var={facet._col_var}')
|
||||
if facet._col_var is not None:
|
||||
facet_data_groups = dict(list(facet.data.groupby(facet._col_var)))
|
||||
else:
|
||||
facet_data_groups = None
|
||||
# facet_data_group_iter = iter(facet_data_groups.keys())
|
||||
|
||||
for ax in facet.axes.ravel():
|
||||
col_key = ax.get_title().split("=", 1)[-1].strip()
|
||||
# col_key = next(facet_data_group_iter)
|
||||
if facet_data_groups is not None:
|
||||
col_data = facet_data_groups[col_key]
|
||||
else:
|
||||
col_data = facet.data
|
||||
col_data["mean_time"]
|
||||
col_data["std_time"]
|
||||
xlabel = plot_kws["x"]
|
||||
ylabel = plot_kws["y"]
|
||||
subgroups = col_data.groupby(plot_kws["hue"])
|
||||
for subgroup_key, subgroup in subgroups:
|
||||
# combine stds in multiple groups on the x and manually draw errors
|
||||
suffix = "_" + ylabel.partition("_")[2]
|
||||
if "mean_" in ylabel:
|
||||
std_label = ylabel.replace("mean_", "std_")
|
||||
combo_group = aggregate_stats(
|
||||
subgroup, suffix=suffix, group_keys=[plot_kws["x"]]
|
||||
)
|
||||
_xdata = combo_group[xlabel].values
|
||||
_ydata_mean = combo_group[ylabel].values
|
||||
_ydata_std = combo_group[std_label].values
|
||||
std_label = ylabel.replace("mean_", "std_")
|
||||
|
||||
# Plot bars 3 standard deviations from the mean to
|
||||
# get a 99.7% interval
|
||||
num_std = 3
|
||||
y_data_min = _ydata_mean - num_std * _ydata_std
|
||||
y_data_max = _ydata_mean + num_std * _ydata_std
|
||||
spread_alpha = 0.3
|
||||
color = palette[subgroup_key]
|
||||
ax.fill_between(
|
||||
_xdata,
|
||||
y_data_min,
|
||||
y_data_max,
|
||||
alpha=spread_alpha,
|
||||
color=color,
|
||||
zorder=1,
|
||||
)
|
||||
# zorder=0)
|
||||
|
||||
xscale = kwargs.get("xscale", None)
|
||||
yscale = kwargs.get("yscale", None)
|
||||
for ax in facet.axes.ravel():
|
||||
if xscale is not None:
|
||||
try:
|
||||
ax.set_xscale(xscale)
|
||||
except ValueError:
|
||||
pass
|
||||
if yscale is not None:
|
||||
try:
|
||||
ax.set_yscale(yscale)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
fig = facet.figure
|
||||
fig.suptitle(fig_key)
|
||||
fig.tight_layout()
|
||||
# facet = sns.FacetGrid(group, **facet_kws)
|
||||
# facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, **plot_kws)
|
||||
# facet.add_legend()
|
||||
|
||||
plot = {
|
||||
"fig": fig,
|
||||
"facet": facet,
|
||||
}
|
||||
plots.append(plot)
|
||||
|
||||
# if fnum >= 1:
|
||||
# break
|
||||
|
||||
# print("Adjust plots")
|
||||
# for plot in plots:
|
||||
# xscale = kwargs.get("xscale", None)
|
||||
# yscale = kwargs.get("yscale", None)
|
||||
# facet = plot["facet"]
|
||||
|
||||
# facet_data_groups = dict(list(facet.data.groupby(facet._col_var)))
|
||||
# facet_data_group_iter = iter(facet_data_groups.keys())
|
||||
|
||||
# for ax in facet.axes.ravel():
|
||||
|
||||
# if xscale is not None:
|
||||
# try:
|
||||
# ax.set_xscale(xscale)
|
||||
# except ValueError:
|
||||
# pass
|
||||
# if yscale is not None:
|
||||
# try:
|
||||
# ax.set_yscale(yscale)
|
||||
# except ValueError:
|
||||
# pass
|
||||
print("Finish")
|
||||
return plots
|
||||
|
||||
|
||||
class SkillTracker:
|
||||
"""
|
||||
Wrapper around openskill
|
||||
|
||||
Args:
|
||||
player_ids (List[T]):
|
||||
a list of ids (usually ints) used to represent each player
|
||||
|
||||
Example:
|
||||
>>> # xdoctest: +REQUIRES(module:openskill)
|
||||
>>> self = SkillTracker([1, 2, 3, 4, 5])
|
||||
>>> self.observe([2, 3]) # Player 2 beat player 3.
|
||||
>>> self.observe([1, 2, 5, 3]) # Player 3 didnt play this round.
|
||||
>>> self.observe([2, 3, 4, 5, 1]) # Everyone played, player 2 won.
|
||||
>>> win_probs = self.predict_win()
|
||||
>>> print('win_probs = {}'.format(ub.repr2(win_probs, nl=1, precision=2)))
|
||||
win_probs = {
|
||||
1: 0.20,
|
||||
2: 0.21,
|
||||
3: 0.19,
|
||||
4: 0.20,
|
||||
5: 0.20,
|
||||
}
|
||||
|
||||
Requirements:
|
||||
openskill
|
||||
"""
|
||||
|
||||
def __init__(self, player_ids):
|
||||
import openskill
|
||||
|
||||
self.player_ids = player_ids
|
||||
self.ratings = {m: openskill.Rating() for m in player_ids}
|
||||
# self.observations = []
|
||||
|
||||
def predict_win(self):
|
||||
"""
|
||||
Estimate the probability that a particular player will win given the
|
||||
current ratings.
|
||||
|
||||
Returns:
|
||||
Dict[T, float]: mapping from player ids to win probabilites
|
||||
"""
|
||||
from openskill import predict_win
|
||||
|
||||
teams = [[p] for p in list(self.ratings.keys())]
|
||||
ratings = [[r] for r in self.ratings.values()]
|
||||
probs = predict_win(ratings)
|
||||
win_probs = {team[0]: prob for team, prob in zip(teams, probs)}
|
||||
return win_probs
|
||||
|
||||
def observe(self, ranking):
|
||||
"""
|
||||
After simulating a round, pass the ranked order of who won
|
||||
(winner is first, looser is last) to this function. And it
|
||||
updates the rankings.
|
||||
|
||||
Args:
|
||||
ranking (List[T]):
|
||||
ranking of all the players that played in this round
|
||||
winners are at the front (0-th place) of the list.
|
||||
"""
|
||||
import openskill
|
||||
|
||||
# self.observations.append(ranking)
|
||||
ratings = self.ratings
|
||||
team_standings = [[r] for r in ub.take(ratings, ranking)]
|
||||
# new_values = openskill.rate(team_standings) # Not inplace
|
||||
# new_ratings = [openskill.Rating(*new[0]) for new in new_values]
|
||||
new_team_ratings = openskill.rate(team_standings)
|
||||
new_ratings = [new[0] for new in new_team_ratings]
|
||||
ratings.update(ub.dzip(ranking, new_ratings))
|
|
@ -0,0 +1,240 @@
|
|||
import copy
|
||||
import json
|
||||
import pathlib
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
def ensure_json_serializable(dict_, normalize_containers=False, verbose=0):
|
||||
"""
|
||||
Attempt to convert common types (e.g. numpy) into something json complient
|
||||
|
||||
Convert numpy and tuples into lists
|
||||
|
||||
Args:
|
||||
normalize_containers (bool, default=False):
|
||||
if True, normalizes dict containers to be standard python
|
||||
structures.
|
||||
|
||||
Example:
|
||||
>>> data = ub.ddict(lambda: int)
|
||||
>>> data['foo'] = ub.ddict(lambda: int)
|
||||
>>> data['bar'] = np.array([1, 2, 3])
|
||||
>>> data['foo']['a'] = 1
|
||||
>>> data['foo']['b'] = (1, np.array([1, 2, 3]), {3: np.int32(3), 4: np.float16(1.0)})
|
||||
>>> dict_ = data
|
||||
>>> print(ub.repr2(data, nl=-1))
|
||||
>>> assert list(find_json_unserializable(data))
|
||||
>>> result = ensure_json_serializable(data, normalize_containers=True)
|
||||
>>> print(ub.repr2(result, nl=-1))
|
||||
>>> assert not list(find_json_unserializable(result))
|
||||
>>> assert type(result) is dict
|
||||
"""
|
||||
dict_ = copy.deepcopy(dict_)
|
||||
|
||||
def _norm_container(c):
|
||||
if isinstance(c, dict):
|
||||
# Cast to a normal dictionary
|
||||
if isinstance(c, OrderedDict):
|
||||
if type(c) is not OrderedDict:
|
||||
c = OrderedDict(c)
|
||||
else:
|
||||
if type(c) is not dict:
|
||||
c = dict(c)
|
||||
return c
|
||||
|
||||
walker = ub.IndexableWalker(dict_)
|
||||
for prefix, value in walker:
|
||||
if isinstance(value, tuple):
|
||||
new_value = list(value)
|
||||
walker[prefix] = new_value
|
||||
elif isinstance(value, np.ndarray):
|
||||
new_value = value.tolist()
|
||||
walker[prefix] = new_value
|
||||
elif isinstance(value, (np.integer)):
|
||||
new_value = int(value)
|
||||
walker[prefix] = new_value
|
||||
elif isinstance(value, (np.floating)):
|
||||
new_value = float(value)
|
||||
walker[prefix] = new_value
|
||||
elif isinstance(value, (np.complexfloating)):
|
||||
new_value = complex(value)
|
||||
walker[prefix] = new_value
|
||||
elif isinstance(value, pathlib.Path):
|
||||
new_value = str(value)
|
||||
walker[prefix] = new_value
|
||||
elif hasattr(value, "__json__"):
|
||||
new_value = value.__json__()
|
||||
walker[prefix] = new_value
|
||||
elif normalize_containers:
|
||||
if isinstance(value, dict):
|
||||
new_value = _norm_container(value)
|
||||
walker[prefix] = new_value
|
||||
|
||||
if normalize_containers:
|
||||
# normalize the outer layer
|
||||
dict_ = _norm_container(dict_)
|
||||
return dict_
|
||||
|
||||
|
||||
def find_json_unserializable(data, quickcheck=False):
|
||||
"""
|
||||
Recurse through json datastructure and find any component that
|
||||
causes a serialization error. Record the location of these errors
|
||||
in the datastructure as we recurse through the call tree.
|
||||
|
||||
Args:
|
||||
data (object): data that should be json serializable
|
||||
quickcheck (bool): if True, check the entire datastructure assuming
|
||||
its ok before doing the python-based recursive logic.
|
||||
|
||||
Returns:
|
||||
List[Dict]: list of "bad part" dictionaries containing items
|
||||
'value' - the value that caused the serialization error
|
||||
'loc' - which contains a list of key/indexes that can be used
|
||||
to lookup the location of the unserializable value.
|
||||
If the "loc" is a list, then it indicates a rare case where
|
||||
a key in a dictionary is causing the serialization error.
|
||||
|
||||
Example:
|
||||
>>> part = ub.ddict(lambda: int)
|
||||
>>> part['foo'] = ub.ddict(lambda: int)
|
||||
>>> part['bar'] = np.array([1, 2, 3])
|
||||
>>> part['foo']['a'] = 1
|
||||
>>> # Create a dictionary with two unserializable parts
|
||||
>>> data = [1, 2, {'nest1': [2, part]}, {frozenset({'badkey'}): 3, 2: 4}]
|
||||
>>> parts = list(find_json_unserializable(data))
|
||||
>>> print('parts = {}'.format(ub.repr2(parts, nl=1)))
|
||||
>>> # Check expected structure of bad parts
|
||||
>>> assert len(parts) == 2
|
||||
>>> part = parts[1]
|
||||
>>> assert list(part['loc']) == [2, 'nest1', 1, 'bar']
|
||||
>>> # We can use the "loc" to find the bad value
|
||||
>>> for part in parts:
|
||||
>>> # "loc" is a list of directions containing which keys/indexes
|
||||
>>> # to traverse at each descent into the data structure.
|
||||
>>> directions = part['loc']
|
||||
>>> curr = data
|
||||
>>> special_flag = False
|
||||
>>> for key in directions:
|
||||
>>> if isinstance(key, list):
|
||||
>>> # special case for bad keys
|
||||
>>> special_flag = True
|
||||
>>> break
|
||||
>>> else:
|
||||
>>> # normal case for bad values
|
||||
>>> curr = curr[key]
|
||||
>>> if special_flag:
|
||||
>>> assert part['data'] in curr.keys()
|
||||
>>> assert part['data'] is key[1]
|
||||
>>> else:
|
||||
>>> assert part['data'] is curr
|
||||
"""
|
||||
needs_check = True
|
||||
if quickcheck:
|
||||
try:
|
||||
# Might be a more efficient way to do this check. We duplicate a lot of
|
||||
# work by doing the check for unserializable data this way.
|
||||
json.dumps(data)
|
||||
except Exception:
|
||||
# If there is unserializable data, find out where it is.
|
||||
# is_serializable = False
|
||||
pass
|
||||
else:
|
||||
# is_serializable = True
|
||||
needs_check = False
|
||||
|
||||
if needs_check:
|
||||
# mode = 'new'
|
||||
# if mode == 'new':
|
||||
scalar_types = (int, float, str, type(None))
|
||||
container_types = (tuple, list, dict)
|
||||
serializable_types = scalar_types + container_types
|
||||
walker = ub.IndexableWalker(data)
|
||||
for prefix, value in walker:
|
||||
*root, key = prefix
|
||||
if not isinstance(key, scalar_types):
|
||||
# Special case where a dict key is the error value
|
||||
# Purposely make loc non-hashable so its not confused with
|
||||
# an address. All we can know in this case is that they key
|
||||
# is at this level, there is no concept of where.
|
||||
yield {"loc": root + [[".keys", key]], "data": key}
|
||||
elif not isinstance(value, serializable_types):
|
||||
yield {"loc": prefix, "data": value}
|
||||
|
||||
|
||||
def indexable_allclose(dct1, dct2, return_info=False):
|
||||
"""
|
||||
Walks through two nested data structures and ensures that everything is
|
||||
roughly the same.
|
||||
|
||||
Args:
|
||||
dct1: a nested indexable item
|
||||
dct2: a nested indexable item
|
||||
|
||||
Example:
|
||||
>>> dct1 = {
|
||||
>>> 'foo': [1.222222, 1.333],
|
||||
>>> 'bar': 1,
|
||||
>>> 'baz': [],
|
||||
>>> }
|
||||
>>> dct2 = {
|
||||
>>> 'foo': [1.22222, 1.333],
|
||||
>>> 'bar': 1,
|
||||
>>> 'baz': [],
|
||||
>>> }
|
||||
>>> assert indexable_allclose(dct1, dct2)
|
||||
"""
|
||||
walker1 = ub.IndexableWalker(dct1)
|
||||
walker2 = ub.IndexableWalker(dct2)
|
||||
flat_items1 = [
|
||||
(path, value)
|
||||
for path, value in walker1
|
||||
if not isinstance(value, walker1.indexable_cls) or len(value) == 0
|
||||
]
|
||||
flat_items2 = [
|
||||
(path, value)
|
||||
for path, value in walker2
|
||||
if not isinstance(value, walker1.indexable_cls) or len(value) == 0
|
||||
]
|
||||
|
||||
flat_items1 = sorted(flat_items1)
|
||||
flat_items2 = sorted(flat_items2)
|
||||
|
||||
if len(flat_items1) != len(flat_items2):
|
||||
info = {"faillist": ["length mismatch"]}
|
||||
final_flag = False
|
||||
else:
|
||||
passlist = []
|
||||
faillist = []
|
||||
|
||||
for t1, t2 in zip(flat_items1, flat_items2):
|
||||
p1, v1 = t1
|
||||
p2, v2 = t2
|
||||
assert p1 == p2
|
||||
|
||||
flag = v1 == v2
|
||||
if not flag:
|
||||
if (
|
||||
isinstance(v1, float)
|
||||
and isinstance(v2, float)
|
||||
and np.isclose(v1, v2)
|
||||
):
|
||||
flag = True
|
||||
if flag:
|
||||
passlist.append(p1)
|
||||
else:
|
||||
faillist.append((p1, v1, v2))
|
||||
|
||||
final_flag = len(faillist) == 0
|
||||
info = {
|
||||
"passlist": passlist,
|
||||
"faillist": faillist,
|
||||
}
|
||||
|
||||
if return_info:
|
||||
return final_flag, info
|
||||
else:
|
||||
return final_flag
|
|
@ -0,0 +1,235 @@
|
|||
import numpy as np
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
def __tabulate_issue():
|
||||
# MWE for tabulate issue
|
||||
# The decimals are not aligned when using "," in the floatfmt
|
||||
import tabulate
|
||||
|
||||
data = [
|
||||
[
|
||||
13213.2,
|
||||
3213254.23,
|
||||
432432.231,
|
||||
],
|
||||
[432432.0, 432.3, 3.2],
|
||||
]
|
||||
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=",.02f"))
|
||||
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=".02f"))
|
||||
|
||||
|
||||
def __groupby_issue():
|
||||
# MWE of an issue with pandas groupby
|
||||
import pandas as pd
|
||||
|
||||
data = pd.DataFrame(
|
||||
[
|
||||
{"p1": "a", "p2": 1, "p3": 0},
|
||||
{"p1": "a", "p2": 1, "p3": 0},
|
||||
{"p1": "a", "p2": 2, "p3": 0},
|
||||
{"p1": "b", "p2": 2, "p3": 0},
|
||||
{"p1": "b", "p2": 1, "p3": 0},
|
||||
{"p1": "b", "p2": 1, "p3": 0},
|
||||
{"p1": "b", "p2": 1, "p3": 0},
|
||||
]
|
||||
)
|
||||
|
||||
by = "p1"
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {"by": by, "key": key, "type(key)": type(key)}
|
||||
print(f"result = {ub.repr2(result, nl=1)}")
|
||||
assert not ub.iterable(
|
||||
key
|
||||
), "`by` is specified as a scalar, so getting `key` as a scalar makes sense"
|
||||
|
||||
by = ["p1"]
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {"by": by, "key": key, "type(key)": type(key)}
|
||||
print(f"result = {ub.repr2(result, nl=1)}")
|
||||
assert not ub.iterable(key), (
|
||||
"`by` is specified as a list of scalars (with one element), but we "
|
||||
"still get `key` as a scalar. This does not make sense"
|
||||
)
|
||||
|
||||
by = ["p1", "p2"]
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {"by": by, "key": key, "type(key)": type(key)}
|
||||
print(f"result = {ub.repr2(result, nl=1)}")
|
||||
assert ub.iterable(key), (
|
||||
"`by` is specified as a list of scalars (with multiple elements), "
|
||||
"and we still get `key` as a tuple of values. This makes sense"
|
||||
)
|
||||
|
||||
|
||||
def aggregate_stats(data, suffix="", group_keys=None):
|
||||
"""
|
||||
Given columns interpreted as containing stats, aggregate those stats
|
||||
within each group. For each row, any non-group, non-stat column
|
||||
with consistent values across that columns in the group is kept as-is,
|
||||
otherwise the new column for that row is set to None.
|
||||
|
||||
Args:
|
||||
data (DataFrame):
|
||||
a data frame with columns: 'mean', 'std', 'min', 'max', and 'nobs'
|
||||
(possibly with a suffix)
|
||||
|
||||
suffix (str):
|
||||
if the nobs, std, mean, min, and max have a suffix, specify it
|
||||
|
||||
group_keys (List[str]):
|
||||
pass
|
||||
|
||||
Returns:
|
||||
DataFrame:
|
||||
New dataframe where grouped rows have been aggregated into a single
|
||||
row.
|
||||
|
||||
Example:
|
||||
>>> import sys, ubelt
|
||||
>>> sys.path.append(ubelt.expandpath('~/code/ultrajson'))
|
||||
>>> from json_benchmarks.benchmarker.util_stats import * # NOQA
|
||||
>>> import pandas as pd
|
||||
>>> data = pd.DataFrame([
|
||||
>>> #
|
||||
>>> {'mean': 8, 'std': 1, 'min': 0, 'max': 1, 'nobs': 2, 'p1': 'a', 'p2': 1},
|
||||
>>> {'mean': 6, 'std': 2, 'min': 0, 'max': 1, 'nobs': 3, 'p1': 'a', 'p2': 1},
|
||||
>>> {'mean': 7, 'std': 3, 'min': 0, 'max': 2, 'nobs': 5, 'p1': 'a', 'p2': 2},
|
||||
>>> {'mean': 5, 'std': 4, 'min': 0, 'max': 3, 'nobs': 7, 'p1': 'a', 'p2': 1},
|
||||
>>> #
|
||||
>>> {'mean': 3, 'std': 1, 'min': 0, 'max': 20, 'nobs': 6, 'p1': 'b', 'p2': 1},
|
||||
>>> {'mean': 0, 'std': 2, 'min': 0, 'max': 20, 'nobs': 26, 'p1': 'b', 'p2': 2},
|
||||
>>> {'mean': 9, 'std': 3, 'min': 0, 'max': 20, 'nobs': 496, 'p1': 'b', 'p2': 1},
|
||||
>>> #
|
||||
>>> {'mean': 5, 'std': 0, 'min': 0, 'max': 1, 'nobs': 2, 'p1': 'c', 'p2': 2},
|
||||
>>> {'mean': 5, 'std': 0, 'min': 0, 'max': 1, 'nobs': 7, 'p1': 'c', 'p2': 2},
|
||||
>>> #
|
||||
>>> {'mean': 5, 'std': 2, 'min': 0, 'max': 2, 'nobs': 7, 'p1': 'd', 'p2': 2},
|
||||
>>> #
|
||||
>>> {'mean': 5, 'std': 2, 'min': 0, 'max': 2, 'nobs': 7, 'p1': 'e', 'p2': 1},
|
||||
>>> ])
|
||||
>>> print(data)
|
||||
>>> new_data = aggregate_stats(data)
|
||||
>>> print(new_data)
|
||||
>>> new_data1 = aggregate_stats(data, group_keys=['p1'])
|
||||
>>> print(new_data1)
|
||||
>>> new_data2 = aggregate_stats(data, group_keys=['p2'])
|
||||
>>> print(new_data2)
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
# Stats groupings
|
||||
raw_stats_cols = ["nobs", "std", "mean", "max", "min"]
|
||||
stats_cols = [c + suffix for c in raw_stats_cols]
|
||||
mapper = dict(zip(stats_cols, raw_stats_cols))
|
||||
unmapper = dict(zip(raw_stats_cols, stats_cols))
|
||||
non_stats_cols = list(ub.oset(data.columns) - stats_cols)
|
||||
if group_keys is None:
|
||||
group_keys = non_stats_cols
|
||||
non_group_keys = list(ub.oset(non_stats_cols) - group_keys)
|
||||
|
||||
new_rows = []
|
||||
for group_vals, group in list(data.groupby(group_keys)):
|
||||
# hack, is this a pandas bug in 1.4.1? Is it fixed? (Not in 1.4.2)
|
||||
if isinstance(group_keys, list) and len(group_keys) == 1:
|
||||
# For some reason, when we specify group keys as a list of one
|
||||
# element, we get a squeezed value out
|
||||
group_vals = (group_vals,)
|
||||
stat_data = group[stats_cols].rename(mapper, axis=1)
|
||||
new_stats = combine_stats_arrs(stat_data)
|
||||
new_time_stats = ub.map_keys(unmapper, new_stats)
|
||||
new_row = ub.dzip(group_keys, group_vals)
|
||||
if non_group_keys:
|
||||
for k in non_group_keys:
|
||||
unique_vals = group[k].unique()
|
||||
if len(unique_vals) == 1:
|
||||
new_row[k] = unique_vals[0]
|
||||
else:
|
||||
new_row[k] = None
|
||||
new_row.update(new_time_stats)
|
||||
new_rows.append(new_row)
|
||||
new_data = pd.DataFrame(new_rows)
|
||||
return new_data
|
||||
|
||||
|
||||
def stats_dict(data, suffix=""):
|
||||
stats = {
|
||||
"nobs" + suffix: len(data),
|
||||
"mean" + suffix: data.mean(),
|
||||
"std" + suffix: data.std(),
|
||||
"min" + suffix: data.min(),
|
||||
"max" + suffix: data.max(),
|
||||
}
|
||||
return stats
|
||||
|
||||
|
||||
def combine_stats(s1, s2):
|
||||
"""
|
||||
Helper for combining mean and standard deviation of multiple measurements
|
||||
|
||||
Args:
|
||||
s1 (dict): stats dict containing mean, std, and n
|
||||
s2 (dict): stats dict containing mean, std, and n
|
||||
|
||||
Example:
|
||||
>>> basis = {
|
||||
>>> 'nobs1': [1, 10, 100, 10000],
|
||||
>>> 'nobs2': [1, 10, 100, 10000],
|
||||
>>> }
|
||||
>>> for params in ub.named_product(basis):
|
||||
>>> data1 = np.random.rand(params['nobs1'])
|
||||
>>> data2 = np.random.rand(params['nobs2'])
|
||||
>>> data3 = np.hstack([data1, data2])
|
||||
>>> s1 = stats_dict(data1)
|
||||
>>> s2 = stats_dict(data2)
|
||||
>>> s3 = stats_dict(data3)
|
||||
>>> # Check that our combo works
|
||||
>>> combo_s3 = combine_stats(s1, s2)
|
||||
>>> compare = pd.DataFrame({'raw': s3, 'combo': combo_s3})
|
||||
>>> print(compare)
|
||||
>>> assert np.allclose(compare.raw, compare.combo)
|
||||
|
||||
References:
|
||||
.. [SO7753002] https://stackoverflow.com/questions/7753002/adding-combining-standard-deviations
|
||||
.. [SO2971315] https://math.stackexchange.com/questions/2971315/how-do-i-combine-standard-deviations-of-two-groups
|
||||
"""
|
||||
stats = [s1, s2]
|
||||
data = {
|
||||
"nobs": np.array([s["nobs"] for s in stats]),
|
||||
"mean": np.array([s["mean"] for s in stats]),
|
||||
"std": np.array([s["std"] for s in stats]),
|
||||
"min": np.array([s["min"] for s in stats]),
|
||||
"max": np.array([s["max"] for s in stats]),
|
||||
}
|
||||
return combine_stats_arrs(data)
|
||||
|
||||
|
||||
def combine_stats_arrs(data):
|
||||
sizes = data["nobs"]
|
||||
means = data["mean"]
|
||||
stds = data["std"]
|
||||
mins = data["min"]
|
||||
maxs = data["max"]
|
||||
varis = stds * stds
|
||||
|
||||
# TODO: ddof
|
||||
# https://github.com/Erotemic/misc/blob/28cf797b9b0f8bd82e3ebee2f6d0a688ecee2838/learn/stats.py#L128
|
||||
|
||||
combo_size = sizes.sum()
|
||||
combo_mean = (sizes * means).sum() / combo_size
|
||||
|
||||
mean_deltas = means - combo_mean
|
||||
|
||||
sv = (sizes * varis).sum()
|
||||
sm = (sizes * (mean_deltas * mean_deltas)).sum()
|
||||
combo_vars = (sv + sm) / combo_size
|
||||
combo_std = np.sqrt(combo_vars)
|
||||
|
||||
combo_stats = {
|
||||
"nobs": combo_size,
|
||||
"mean": combo_mean,
|
||||
"std": combo_std,
|
||||
"min": mins.min(),
|
||||
"max": maxs.max(),
|
||||
}
|
||||
return combo_stats
|
|
@ -0,0 +1,119 @@
|
|||
import pandas as pd
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
def benchmark_analysis(
|
||||
rows,
|
||||
xlabel,
|
||||
group_labels,
|
||||
basis,
|
||||
):
|
||||
# xlabel = "size"
|
||||
# Set these to empty lists if they are not used
|
||||
# group_labels = {
|
||||
# "col": ["input"],
|
||||
# "hue": ["impl"],
|
||||
# "size": [],
|
||||
# }
|
||||
# group_keys = {}
|
||||
# for gname, labels in group_labels.items():
|
||||
# group_keys[gname + "_key"] = ub.repr2(
|
||||
# ub.dict_isect(params, labels), compact=1, si=1
|
||||
# )
|
||||
# key = ub.repr2(params, compact=1, si=1)
|
||||
|
||||
from process_tracker.result_analysis import SkillTracker
|
||||
|
||||
RECORD_ALL = 0
|
||||
|
||||
USE_OPENSKILL = True
|
||||
|
||||
RECORD_ALL = 0
|
||||
metric_key = "time" if RECORD_ALL else "min"
|
||||
|
||||
# The rows define a long-form pandas data array.
|
||||
# Data in long-form makes it very easy to use seaborn.
|
||||
data = pd.DataFrame(rows)
|
||||
data = data.sort_values(metric_key)
|
||||
|
||||
if RECORD_ALL:
|
||||
# Show the min / mean if we record all
|
||||
min_times = data.groupby("key").min().rename({"time": "min"}, axis=1)
|
||||
mean_times = (
|
||||
data.groupby("key")[["time"]].mean().rename({"time": "mean"}, axis=1)
|
||||
)
|
||||
stats_data = pd.concat([min_times, mean_times], axis=1)
|
||||
stats_data = stats_data.sort_values("min")
|
||||
else:
|
||||
stats_data = data
|
||||
|
||||
if USE_OPENSKILL:
|
||||
# Track the "skill" of each method
|
||||
# The idea is that each setting of parameters is a game, and each
|
||||
# "impl" is a player. We rank the players by which is fastest, and
|
||||
# update their ranking according to the Weng-Lin Bayes ranking model.
|
||||
# This does not take the fact that some "games" (i.e. parameter
|
||||
# settings) are more important than others, but it should be fairly
|
||||
# robust on average.
|
||||
skillboard = SkillTracker(basis["impl"])
|
||||
|
||||
other_keys = sorted(
|
||||
set(stats_data.columns)
|
||||
- {"key", "impl", "min", "mean", "hue_key", "size_key", "style_key"}
|
||||
)
|
||||
for params, variants in stats_data.groupby(other_keys):
|
||||
variants = variants.sort_values("mean")
|
||||
ranking = variants["impl"].reset_index(drop=True)
|
||||
|
||||
mean_speedup = variants["mean"].max() / variants["mean"]
|
||||
stats_data.loc[mean_speedup.index, "mean_speedup"] = mean_speedup
|
||||
min_speedup = variants["min"].max() / variants["min"]
|
||||
stats_data.loc[min_speedup.index, "min_speedup"] = min_speedup
|
||||
|
||||
if USE_OPENSKILL:
|
||||
skillboard.observe(ranking)
|
||||
|
||||
print("Statistics:")
|
||||
print(stats_data)
|
||||
|
||||
if USE_OPENSKILL:
|
||||
win_probs = skillboard.predict_win()
|
||||
win_probs = ub.sorted_vals(win_probs, reverse=True)
|
||||
print(
|
||||
"Aggregated Rankings = {}".format(
|
||||
ub.repr2(win_probs, nl=1, precision=4, align=":")
|
||||
)
|
||||
)
|
||||
|
||||
plot = True
|
||||
if plot:
|
||||
# import seaborn as sns
|
||||
# kwplot autosns works well for IPython and script execution.
|
||||
# not sure about notebooks.
|
||||
import seaborn as sns
|
||||
|
||||
sns.set()
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
plotkw = {}
|
||||
for gname, labels in group_labels.items():
|
||||
if labels:
|
||||
plotkw[gname] = gname + "_key"
|
||||
|
||||
# Your variables may change
|
||||
# ax = plt.figure().gca()
|
||||
col = plotkw.pop("col")
|
||||
facet = sns.FacetGrid(data, col=col, sharex=False, sharey=False)
|
||||
facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, marker="o", **plotkw)
|
||||
facet.add_legend()
|
||||
# sns.lineplot(data=data, )
|
||||
# ax.set_title('JSON Benchmarks')
|
||||
# ax.set_xlabel('Size')
|
||||
# ax.set_ylabel('Time')
|
||||
# ax.set_xscale('log')
|
||||
# ax.set_yscale('log')
|
||||
|
||||
try:
|
||||
__IPYTHON__
|
||||
except NameError:
|
||||
plt.show()
|
|
@ -0,0 +1,82 @@
|
|||
"""
|
||||
Main definition of the benchmarks
|
||||
"""
|
||||
import scriptconfig as scfg
|
||||
import ubelt as ub
|
||||
|
||||
from json_benchmarks import analysis, measures
|
||||
|
||||
|
||||
class CoreConfig(scfg.Config):
|
||||
"""
|
||||
Benchmark JSON implementations
|
||||
"""
|
||||
|
||||
default = {
|
||||
"mode": scfg.Value(
|
||||
"all",
|
||||
position=1,
|
||||
choices=["all", "single", "run", "analyze"],
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
By default all benchmarks are run, saved, and aggregated
|
||||
with any other existing benchmarks for analysis and
|
||||
visualization.
|
||||
|
||||
In "single" mode, other existing benchmarks are ignord.
|
||||
|
||||
In "run" mode, the benchmarks are run, but no analysis is done.
|
||||
|
||||
In "analyze" mode, no benchmarks are run, but any existing
|
||||
benchmarks are loaded for analysis and visualization.
|
||||
"""
|
||||
),
|
||||
),
|
||||
"cache_dir": scfg.Value(
|
||||
None,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Location for benchmark cache.
|
||||
Defaults to $XDG_CACHE/ujson/benchmark_results/
|
||||
"""
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
def normalize(self):
|
||||
dpath = self["cache_dir"]
|
||||
if dpath is None:
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results")
|
||||
dpath = ub.Path(dpath)
|
||||
self["cache_dir"] = dpath
|
||||
|
||||
|
||||
def main(cmdline=True, **kwargs):
|
||||
"""
|
||||
Example:
|
||||
>>> import sys, ubelt
|
||||
>>> sys.path.append(ubelt.expandpath('~/code/ultrajson'))
|
||||
>>> from json_benchmarks.core import * # NOQA
|
||||
>>> import kwplot
|
||||
>>> kwplot.autosns()
|
||||
>>> cmdline = False
|
||||
>>> kwargs = {}
|
||||
>>> main(cmdline, **kwargs)
|
||||
"""
|
||||
config = CoreConfig(cmdline=cmdline, data=kwargs)
|
||||
dpath = config["cache_dir"]
|
||||
print(f"dpath={dpath}")
|
||||
|
||||
run = config["mode"] in {"all", "single", "run"}
|
||||
if run:
|
||||
result_fpath = measures.benchmark_json()
|
||||
print(f"result_fpath = {result_fpath!r}")
|
||||
result_fpaths = [result_fpath]
|
||||
|
||||
agg = config["mode"] not in {"single"}
|
||||
if agg:
|
||||
result_fpaths = list(dpath.glob("benchmarks*.json"))
|
||||
|
||||
analyze = config["mode"] in {"all", "single", "analyze"}
|
||||
if analyze:
|
||||
analysis.analyze_results(result_fpaths)
|
|
@ -0,0 +1,120 @@
|
|||
import random
|
||||
import sys
|
||||
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
def json_test_data_generators():
|
||||
"""
|
||||
Generates data for benchmarks with various sizes
|
||||
|
||||
Returns:
|
||||
Dict[str, callable]:
|
||||
a mapping from test data name to its generator
|
||||
|
||||
Example:
|
||||
>>> data_lut = json_test_data_generators()
|
||||
>>> size = 2
|
||||
>>> keys = sorted(set(data_lut) - {'Complex object'})
|
||||
>>> for key in keys:
|
||||
>>> func = data_lut[key]
|
||||
>>> test_object = func(size)
|
||||
>>> print('key = {!r}'.format(key))
|
||||
>>> print('test_object = {!r}'.format(test_object))
|
||||
"""
|
||||
data_lut = {}
|
||||
|
||||
def _register_data(name):
|
||||
def _wrap(func):
|
||||
data_lut[name] = func
|
||||
|
||||
return _wrap
|
||||
|
||||
# seed if desired
|
||||
# rng = random.Random(0)
|
||||
rng = random
|
||||
|
||||
@_register_data("Array with doubles")
|
||||
def array_with_doubles(size):
|
||||
test_object = [sys.maxsize * rng.random() for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data("Array with UTF-8 strings")
|
||||
def array_with_utf8_strings(size):
|
||||
utf8_string = (
|
||||
"نظام الحكم سلطاني وراثي "
|
||||
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
|
||||
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
|
||||
)
|
||||
test_object = [utf8_string for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data("Medium complex object")
|
||||
def medium_complex_object(size):
|
||||
user = {
|
||||
"userId": 3381293,
|
||||
"age": 213,
|
||||
"username": "johndoe",
|
||||
"fullname": "John Doe the Second",
|
||||
"isAuthorized": True,
|
||||
"liked": 31231.31231202,
|
||||
"approval": 31.1471,
|
||||
"jobs": [1, 2],
|
||||
"currJob": None,
|
||||
}
|
||||
friends = [user, user, user, user, user, user, user, user]
|
||||
test_object = [[user, friends] for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data("Array with True values")
|
||||
def true_values(size):
|
||||
test_object = [True for _ in range(size)]
|
||||
return test_object
|
||||
|
||||
@_register_data("Array of Dict[str, int]")
|
||||
def array_of_dict_string_int(size):
|
||||
test_object = [
|
||||
{str(rng.random() * 20): int(rng.random() * 1000000)} for _ in range(size)
|
||||
]
|
||||
return test_object
|
||||
|
||||
@_register_data("Dict of List[Dict[str, int]]")
|
||||
def dict_of_list_dict_str_int(size):
|
||||
keys = set()
|
||||
while len(keys) < size:
|
||||
key = str(rng.random() * 20)
|
||||
keys.add(key)
|
||||
test_object = {
|
||||
key: [
|
||||
{str(rng.random() * 20): int(rng.random() * 1000000)}
|
||||
for _ in range(256)
|
||||
]
|
||||
for key in keys
|
||||
}
|
||||
return test_object
|
||||
|
||||
@_register_data("Complex object")
|
||||
def complex_object(size):
|
||||
import json
|
||||
|
||||
# TODO: might be better to reigster this file with setup.py or
|
||||
# download it via some mechanism
|
||||
try:
|
||||
dpath = ub.Path(__file__).parent
|
||||
fpath = dpath / "sample.json"
|
||||
if not fpath.exists():
|
||||
raise Exception
|
||||
except Exception:
|
||||
import ujson
|
||||
|
||||
dpath = ub.Path(ujson.__file__).parent / "tests"
|
||||
fpath = dpath / "sample.json"
|
||||
if not fpath.exists():
|
||||
raise Exception
|
||||
with open(fpath) as f:
|
||||
test_object = json.load(f)
|
||||
if size is not None:
|
||||
test_object = [test_object] * size
|
||||
return test_object
|
||||
|
||||
return data_lut
|
|
@ -0,0 +1,99 @@
|
|||
"""
|
||||
Define the json libraries we are considering
|
||||
"""
|
||||
|
||||
KNOWN_LIBRARIES = [
|
||||
{"modname": "ujson", "distname": "ujson"},
|
||||
# {"modname": "nujson", "distname": "nujson"},
|
||||
# {"modname": "orjson", "distname": "orjson"},
|
||||
# {"modname": "simplejson", "distname": "simplejson"},
|
||||
{"modname": "json", "distname": "<stdlib>"},
|
||||
# {"modname": "simdjson", "distname": "pysimdjson"},
|
||||
# {"modname": "cysimdjson", "distname": "cysimdjson"},
|
||||
# {"modname": "libpy_simdjson", "distname": "libpy-simdjson"},
|
||||
]
|
||||
|
||||
KNOWN_MODNAMES = [info["modname"] for info in KNOWN_LIBRARIES]
|
||||
|
||||
|
||||
# TODO:
|
||||
# def distname_to_modnames(distname):
|
||||
# # TODO: nice way to switch between a module's import name and it's distribution name
|
||||
# # References:
|
||||
# # https://stackoverflow.com/questions/49764802/get-module-name-programmatically-with-only-pypi-package-name/49764960#49764960
|
||||
# import distlib.database
|
||||
# distlib.database.DistributionPath().get_distribution(distname)
|
||||
# # import importlib.metadata
|
||||
# # importlib.metadata.metadata(distname)
|
||||
# # importlib.util.find_spec(modname)
|
||||
# # import simdjson
|
||||
# # import pkg_resources
|
||||
# # pkg_resources.get_distribution('pysimdjson')
|
||||
|
||||
|
||||
class Compatability:
|
||||
"""
|
||||
Expose a common API for all tested implmentations
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def lut_dumps(module):
|
||||
if module.__name__ == "cysimdjson":
|
||||
return None
|
||||
elif module.__name__ == "simdjson":
|
||||
return None
|
||||
else:
|
||||
return getattr(module, "dumps", None)
|
||||
|
||||
@staticmethod
|
||||
def lut_loads(module):
|
||||
if module.__name__ == "cysimdjson":
|
||||
parser = module.JSONParser()
|
||||
return parser.loads
|
||||
else:
|
||||
return getattr(module, "loads", None)
|
||||
|
||||
|
||||
def available_json_impls():
|
||||
"""
|
||||
Return a dictionary of information about each json implementation
|
||||
|
||||
Example:
|
||||
>>> from json_benchmarks.libraries import * # NOQA
|
||||
>>> json_impls = available_json_impls()
|
||||
>>> print('json_impls = {}'.format(ub.repr2(json_impls, nl=1)))
|
||||
"""
|
||||
import importlib
|
||||
|
||||
import pkg_resources
|
||||
|
||||
known_libinfo = KNOWN_LIBRARIES
|
||||
json_impls = {}
|
||||
for libinfo in known_libinfo:
|
||||
modname = libinfo["modname"]
|
||||
distname = libinfo["distname"]
|
||||
try:
|
||||
module = importlib.import_module(modname)
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
mod_version = getattr(module, "__version__", None)
|
||||
if distname == "<stdlib>":
|
||||
pkg_version = mod_version
|
||||
else:
|
||||
pkg_version = pkg_resources.get_distribution(distname).version
|
||||
if mod_version is not None:
|
||||
assert mod_version == pkg_version
|
||||
version = pkg_version
|
||||
dumps = Compatability.lut_dumps(module)
|
||||
loads = Compatability.lut_loads(module)
|
||||
impl_info = {
|
||||
"module": module,
|
||||
"modname": modname,
|
||||
"distname": distname,
|
||||
"version": version,
|
||||
"dumps": dumps,
|
||||
"loads": loads,
|
||||
}
|
||||
json_impls[modname] = impl_info
|
||||
return json_impls
|
|
@ -0,0 +1,132 @@
|
|||
"""
|
||||
The definitions of the measurements we want to take
|
||||
"""
|
||||
import json
|
||||
|
||||
import scriptconfig as scfg
|
||||
import ubelt as ub
|
||||
|
||||
from json_benchmarks import libraries
|
||||
|
||||
|
||||
class MeasurementConfig(scfg.Config):
|
||||
default = {
|
||||
"disable": scfg.Value(
|
||||
[],
|
||||
choices=libraries.KNOWN_MODNAMES,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Remove specified libraries from the benchmarks
|
||||
"""
|
||||
),
|
||||
),
|
||||
"factor": scfg.Value(
|
||||
1.0,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Specify as a fraction to speed up benchmarks for development /
|
||||
testing
|
||||
"""
|
||||
),
|
||||
),
|
||||
"cache_dir": scfg.Value(
|
||||
None,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Location for benchmark cache.
|
||||
Defaults to $XDG_CACHE/ujson/benchmark_results/
|
||||
"""
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
def normalize(self):
|
||||
dpath = self["cache_dir"]
|
||||
if dpath is None:
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results")
|
||||
dpath = ub.Path(dpath)
|
||||
self["cache_dir"] = dpath
|
||||
|
||||
|
||||
def benchmark_json():
|
||||
from json_benchmarks import benchmarker, datagen, libraries
|
||||
|
||||
json_impls = libraries.available_json_impls()
|
||||
data_lut = datagen.json_test_data_generators()
|
||||
|
||||
# These are the parameters that we benchmark over
|
||||
common_basis = {
|
||||
"impl": list(json_impls.keys()),
|
||||
"func": ["dumps", "loads"],
|
||||
}
|
||||
sized_basis = {
|
||||
"input": [
|
||||
"Array with doubles",
|
||||
"Array with UTF-8 strings",
|
||||
# 'Medium complex object',
|
||||
"Array with True values",
|
||||
"Array of Dict[str, int]",
|
||||
# 'Dict of List[Dict[str, int]]',
|
||||
# 'Complex object'
|
||||
],
|
||||
"size": [1, 2, 4, 8, 16, 32, 128, 256, 512, 1024, 2048, 4096, 8192],
|
||||
}
|
||||
predefined_basis = {
|
||||
"input": ["Complex object"],
|
||||
"size": [None],
|
||||
}
|
||||
|
||||
basis = [
|
||||
ub.dict_union(common_basis, predefined_basis),
|
||||
ub.dict_union(common_basis, sized_basis),
|
||||
]
|
||||
|
||||
# The Benchmarker class is a new experimental API around timerit to
|
||||
# abstract away the details of timing a process over a grid of parameters,
|
||||
# serializing the results, and aggregating results from disparate runs.
|
||||
benchmark = benchmarker.Benchmarker(
|
||||
name="bench_json",
|
||||
num=100,
|
||||
bestof=10,
|
||||
verbose=3,
|
||||
basis=basis,
|
||||
)
|
||||
|
||||
def is_blocked(params):
|
||||
if params["input"] == "Complex object":
|
||||
# Some libraries can't handle the complex object
|
||||
if params["impl"] in {"orjson", "libpy_simdjson"}:
|
||||
return True
|
||||
|
||||
# For each variation of your experiment, create a row.
|
||||
for params in benchmark.iter_params():
|
||||
if is_blocked(params):
|
||||
continue
|
||||
# Make any modifications you need to compute input kwargs for each
|
||||
# method here.
|
||||
impl_info = json_impls[params["impl"]]
|
||||
params["impl_version"] = impl_info["version"]
|
||||
method = impl_info[params["func"]]
|
||||
if method is None:
|
||||
# Not all libraries implement all methods
|
||||
continue
|
||||
py_data = data_lut[params["input"]](params["size"])
|
||||
if params["func"] == "dumps":
|
||||
data = py_data
|
||||
elif params["func"] == "loads":
|
||||
data = json.dumps(py_data)
|
||||
# Timerit will run some user-specified number of loops.
|
||||
# and compute time stats with similar methodology to timeit
|
||||
try:
|
||||
for timer in benchmark.measure():
|
||||
# Put any setup logic you dont want to time here.
|
||||
# ...
|
||||
with timer:
|
||||
# Put the logic you want to time here
|
||||
method(data)
|
||||
except Exception as ex:
|
||||
print(f"Failed to time: ex={ex}. Skipping")
|
||||
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results").ensuredir()
|
||||
result_fpath = benchmark.dump_in_dpath(dpath)
|
||||
return result_fpath
|
Loading…
Reference in New Issue