1
0
Fork 0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-05-11 09:36:08 +02:00

Compare commits

...

33 Commits

Author SHA1 Message Date
Jon Crall 2566b2e094
Merge a580404b46 into 04daf02b94 2024-04-06 07:07:55 +03:00
Hugo van Kemenade 04daf02b94
[pre-commit.ci] pre-commit autoupdate (#624) 2024-04-01 21:02:10 +03:00
Hugo van Kemenade 362c88a8b5
Use Black mirror 2024-04-01 11:58:55 -06:00
pre-commit-ci[bot] e96c8cfee3
[pre-commit.ci] pre-commit autoupdate
updates:
- [github.com/asottile/pyupgrade: v3.15.0 → v3.15.2](https://github.com/asottile/pyupgrade/compare/v3.15.0...v3.15.2)
- [github.com/psf/black: 23.12.1 → 24.3.0](https://github.com/psf/black/compare/23.12.1...24.3.0)
- [github.com/PyCQA/flake8: 6.1.0 → 7.0.0](https://github.com/PyCQA/flake8/compare/6.1.0...7.0.0)
2024-04-01 17:55:50 +00:00
Hugo van Kemenade fbf7afff00
Update pypa/cibuildwheel action to v2.17.0 (#623) 2024-04-01 08:47:36 +03:00
renovate[bot] 08f14e5938
Update pypa/cibuildwheel action to v2.17.0 2024-04-01 01:26:13 +00:00
joncrall a580404b46 Fixed bug 2024-01-17 12:12:43 -05:00
pre-commit-ci[bot] fd951a31f1 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-31 01:30:43 +00:00
joncrall ac5b143712
stats for fix-encode-surrogates 2022-05-30 21:29:51 -04:00
joncrall 2f3070d74f
wip 2022-05-29 19:11:03 -04:00
joncrall 9196d05d0b
wip 2022-05-29 19:10:51 -04:00
joncrall 10ed58b708
wip 2022-05-29 19:08:43 -04:00
joncrall 9358a546e1
name fix 2022-05-29 19:08:37 -04:00
pre-commit-ci[bot] 7dbb203450 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-29 23:08:03 +00:00
joncrall 80d096015e
Fix cysimdjson 2022-05-29 19:07:46 -04:00
joncrall b0bc25ab3c
Add simd libraries 2022-05-29 18:56:24 -04:00
pre-commit-ci[bot] 2b2aedb89f [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-29 17:27:14 +00:00
joncrall bd592fdd3b
Refactor core into measures and analysis submodules 2022-05-29 13:26:59 -04:00
pre-commit-ci[bot] eee2a5ff66 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-29 03:04:19 +00:00
joncrall 03ae1b8545
use aggregate mean std to plot errors 2022-05-28 23:04:04 -04:00
pre-commit-ci[bot] 283b5e5f9b [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-29 02:12:18 +00:00
joncrall a2fbbf10af
Merge branch 'benchmark_stats_v2' of github.com:Erotemic/ultrajson into benchmark_stats_v2 2022-05-28 22:12:04 -04:00
joncrall 3b29b746e3
wip 2022-05-28 22:12:02 -04:00
pre-commit-ci[bot] 470f440f3f [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-29 01:56:40 +00:00
joncrall 78cbf7ea71
tweaks 2022-05-28 21:56:26 -04:00
pre-commit-ci[bot] 3159c00889 [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
2022-05-27 14:19:03 +00:00
joncrall 4d0f705d6d wip 2022-05-27 10:15:48 -04:00
joncrall a89bc27ff5 add support for loads and complex object bench 2022-05-26 16:27:26 -04:00
joncrall 68c4a55284 Reorganize as separate module 2022-05-26 16:03:23 -04:00
joncrall daf8913cc2 log scale 2022-05-26 10:09:33 -04:00
joncrall d036df252f Port datasets 2022-05-26 07:36:27 -04:00
joncrall da6428296d Working on benchmarks with details statistical analysis 2022-05-23 00:52:30 -04:00
joncrall 2c47332ee3
working on benchmark framework with t-test analysis 2022-05-09 13:54:30 -04:00
16 changed files with 2670 additions and 6 deletions

View File

@ -37,7 +37,7 @@ jobs:
# https://github.com/pypa/cibuildwheel
- name: Build wheels
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.17.0
with:
output-dir: dist
# Options are supplied via environment variables:
@ -89,7 +89,7 @@ jobs:
# https://github.com/pypa/cibuildwheel
- name: Build wheels
uses: pypa/cibuildwheel@v2.16.5
uses: pypa/cibuildwheel@v2.17.0
with:
output-dir: dist
# Options are supplied via environment variables:

View File

@ -1,12 +1,12 @@
repos:
- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
rev: v3.15.2
hooks:
- id: pyupgrade
args: [--py38-plus]
- repo: https://github.com/psf/black
rev: 23.12.1
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.3.0
hooks:
- id: black
args: [--target-version=py38]
@ -17,7 +17,7 @@ repos:
- id: isort
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
rev: 7.0.0
hooks:
- id: flake8
additional_dependencies: [flake8-2020, flake8-implicit-str-concat]

View File

View File

@ -0,0 +1,8 @@
if __name__ == "__main__":
"""
CommandLine:
python -m json_benchmarks
"""
from json_benchmarks import core
core.main()

116
json_benchmarks/analysis.py Normal file
View File

@ -0,0 +1,116 @@
"""
The analysis of the measurements
"""
import scriptconfig as scfg
import ubelt as ub
class AnalysisConfig(scfg.Config):
default = {
"cache_dir": scfg.Value(
None,
help=ub.paragraph(
"""
Location for benchmark cache.
Defaults to $XDG_CACHE/ujson/benchmark_results/
"""
),
),
}
def normalize(self):
dpath = self["cache_dir"]
if dpath is None:
dpath = ub.Path.appdir("ujson/benchmark_results")
dpath = ub.Path(dpath)
self["cache_dir"] = dpath
def analyze_results(result_fpaths):
import json
from json_benchmarks import benchmarker
from json_benchmarks.benchmarker import util_stats
results = []
for fpath in ub.ProgIter(result_fpaths, desc="load results"):
data = json.loads(fpath.read_text())
for row in data["rows"]:
result = benchmarker.BenchmarkerResult.load(fpath)
results.extend(result.to_result_list())
RECORD_ALL = 0
metric_key = "time" if RECORD_ALL else "mean_time"
# results = benchmark.result.to_result_list()
analysis = benchmarker.result_analysis.ResultAnalysis(
results,
metrics=[metric_key],
params=["impl", "impl_version"],
metric_objectives={
"min_time": "min",
"mean_time": "min",
"time": "min",
},
)
analysis.analysis()
table = analysis.table
stats_table = util_stats.aggregate_stats(
table, suffix="_time", group_keys=["name", "impl_version"]
)
single_size = stats_table[
(stats_table["size"] == 256) | stats_table["size"].isnull()
]
# single_size_combo = aggregate_stats(single_size, None)
single_size_combo = util_stats.aggregate_stats(
single_size, suffix="_time", group_keys=["name", "impl_version"]
)
param_group = ["impl", "impl_version"]
single_size_combo["calls/sec"] = 1 / single_size_combo["mean_time"]
# _single_size_combo = single_size_combo.copy()
time_piv = single_size_combo.pivot(["input", "func"], param_group, "mean_time")
hz_piv = 1 / time_piv
# hzstr_piv = (1 / time_piv).applymap(lambda x: f"{x:,.02f}")
print("Table for size=256")
# print(hzstr_piv.to_markdown())
print(hz_piv.to_markdown(floatfmt=",.02f"))
print("")
print("Above metrics are in call/sec, larger is better.")
speedup_piv = hz_piv / hz_piv["json"].values
print(speedup_piv.to_markdown(floatfmt=",.02g"))
analysis.abalate(param_group)
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
xlabel = "size"
# Set these to empty lists if they are not used
group_labels = {
"fig": ["input"],
"col": ["func"],
# "fig": [],
# "col": ["func" "input"],
"hue": ["impl", "impl_version"],
"size": [],
}
import kwplot
kwplot.autosns()
self = analysis # NOQA
data = stats_table
plots = analysis.plot(
xlabel,
metric_key,
group_labels,
xscale="log",
yscale="log",
data=data,
)
plots
kwplot.show_if_requested()

View File

@ -0,0 +1,68 @@
"""
A helper module for executing, serializing, combining, and comparing benchmarks
"""
__mkinit__ = """
# Autogenerate this file
mkinit ~/code/ultrajson/json_benchmarks/benchmarker/__init__.py -w
"""
__version__ = "0.1.0"
from json_benchmarks.benchmarker import (
benchmarker,
process_context,
result_analysis,
util_json,
util_stats,
visualize,
)
from json_benchmarks.benchmarker.benchmarker import (
Benchmarker,
BenchmarkerConfig,
BenchmarkerResult,
)
from json_benchmarks.benchmarker.process_context import ProcessContext
from json_benchmarks.benchmarker.result_analysis import (
DEFAULT_METRIC_TO_OBJECTIVE,
Result,
ResultAnalysis,
SkillTracker,
)
from json_benchmarks.benchmarker.util_json import (
ensure_json_serializable,
find_json_unserializable,
indexable_allclose,
)
from json_benchmarks.benchmarker.util_stats import (
aggregate_stats,
combine_stats,
combine_stats_arrs,
stats_dict,
)
from json_benchmarks.benchmarker.visualize import benchmark_analysis
__all__ = [
"Benchmarker",
"BenchmarkerConfig",
"BenchmarkerResult",
"DEFAULT_METRIC_TO_OBJECTIVE",
"ProcessContext",
"Result",
"ResultAnalysis",
"SkillTracker",
"aggregate_stats",
"benchmark_analysis",
"benchmarker",
"combine_stats",
"combine_stats_arrs",
"ensure_json_serializable",
"find_json_unserializable",
"indexable_allclose",
"process_context",
"result_analysis",
"stats_dict",
"util_json",
"util_stats",
"visualize",
]

View File

@ -0,0 +1,233 @@
import json
from dataclasses import dataclass
import numpy as np
import timerit
import ubelt as ub
from json_benchmarks.benchmarker.process_context import ProcessContext
@dataclass
class BenchmarkerConfig:
name: str = None
num: int = 100
bestof: int = 10
class BenchmarkerResult:
"""
Serialization for a single benchmark result
"""
def __init__(self, context, rows):
self.context = context
self.rows = rows
def __json__(self):
data = {
"type": "benchmark_result",
"context": self.context,
"rows": self.rows,
}
return data
@classmethod
def from_json(cls, data):
assert data["type"] == "benchmark_result"
self = cls(data["context"], data["rows"])
return self
@classmethod
def load(cls, fpath):
with open(fpath) as file:
data = json.load(file)
self = cls.from_json(data)
return self
def to_result_list(self):
"""
Returns a list of result objects suitable for ResultAnalysis
Returns:
List[Result]
"""
from json_benchmarks.benchmarker import result_analysis
results = []
for row in self.rows:
result = result_analysis.Result(
name=row["name"],
metrics=row["metrics"],
params=row["params"].copy(),
)
machine = self.context["machine"]
assert not ub.dict_isect(result.params, machine)
result.params.update(machine)
results.append(result)
return results
class Benchmarker:
"""
Helper to organize the execution and serialization of a benchmark
Example:
>>> import numpy as np
>>> impl_lut = {
>>> 'numpy': np.sum,
>>> 'builtin': sum,
>>> }
>>> def data_lut(params):
>>> item = 42 if params['dtype'] == 'int' else 42.0
>>> data = [item] * params['size']
>>> return data
>>> basis = {
>>> 'impl': ['builtin', 'numpy'],
>>> 'size': [10, 10000],
>>> 'dtype': ['int', 'float'],
>>> }
>>> self = Benchmarker(name='demo', num=10, bestof=3, basis=basis)
>>> for params in self.iter_params():
>>> impl = impl_lut[params['impl']]
>>> data = data_lut(params)
>>> for timer in self.measure():
>>> with timer:
>>> impl(data)
>>> print('self.result = {}'.format(ub.repr2(self.result.__json__(), sort=0, nl=2, precision=8)))
>>> dpath = ub.Path.appdir('benchmarker/demo').ensuredir()
>>> self.dump_in_dpath(dpath)
"""
def __init__(self, basis={}, verbose=1, **kwargs):
self.basis = basis
self.config = BenchmarkerConfig(**kwargs)
self.ti = timerit.Timerit(
num=self.config.num,
bestof=self.config.bestof,
verbose=verbose,
)
self.context = ProcessContext(name=self.config.name)
self.rows = []
self.RECORD_ALL = 0
self.result = None
def dump_in_dpath(self, dpath):
dpath = ub.Path(dpath)
timestamp = self.context.obj["stop_timestamp"]
fname = f"benchmarks_{self.config.name}_{timestamp}.json"
fpath = dpath / fname
with open(fpath, "w") as file:
json.dump(self.result.__json__(), file)
return fpath
def iter_params(self):
self.context.start()
if isinstance(self.basis, dict):
grid_iter = ub.named_product(self.basis)
else:
grid_iter = ub.flatten([ub.named_product(b) for b in self.basis])
for params in grid_iter:
self.params = params
self.key = ub.repr2(params, compact=1, si=1)
yield params
obj = self.context.stop()
self.result = BenchmarkerResult(obj, self.rows)
def measure(self):
yield from self.ti.reset(self.key)
rows = self.rows
ti = self.ti
key = self.key
params = self.params
times = ti.robust_times()
if self.RECORD_ALL:
for time in times:
metrics = {
"time": time,
}
row = {
"name": key,
"metrics": metrics,
"params": params,
}
rows.append(row)
else:
from json_benchmarks.benchmarker import util_stats
times = np.array(ti.robust_times())
metrics = util_stats.stats_dict(times, "_time")
row = {
"metrics": metrics,
"params": params,
"name": key,
}
rows.append(row)
def _test_demo():
import numpy as np
from json_benchmarks.benchmarker import BenchmarkerResult, result_analysis
from json_benchmarks.benchmarker.benchmarker import Benchmarker
impl_lut = {
"numpy": np.sum,
"builtin": sum,
}
def data_lut(params):
item = 42 if params["dtype"] == "int" else 42.0
data = [item] * params["size"]
return data
basis = {
"impl": ["builtin", "numpy"],
"size": [10, 10000],
"dtype": ["int", "float"],
}
dpath = ub.Path.appdir("benchmarker/agg_demo").delete().ensuredir()
def run_one_benchmark():
self = Benchmarker(name="agg_demo", num=10, bestof=3, basis=basis)
for params in self.iter_params():
impl = impl_lut[params["impl"]]
data = data_lut(params)
for timer in self.measure():
with timer:
impl(data)
fpath = self.dump_in_dpath(dpath)
return fpath
# Run the benchmark multiple times
fpaths = []
for _ in range(5):
fpath = run_one_benchmark()
fpaths.append(fpath)
results = []
for fpath in fpaths:
data = json.loads(fpath.read_text())
for row in data["rows"]:
result = BenchmarkerResult.load(fpath)
results.extend(result.to_result_list())
analysis = result_analysis.ResultAnalysis(
results,
metrics=["min", "mean"],
params=["impl"],
metric_objectives={
"min": "min",
"mean": "min",
},
)
analysis.analysis()
# single_df = pd.DataFrame(data['rows'])
# context = data['context']
# single_df

View File

@ -0,0 +1,123 @@
import platform
import socket
import sys
import ubelt as ub
class ProcessContext:
"""
Context manager to track the context under which a result was computed
Example:
>>> from json_benchmarks.benchmarker.process_context import * # NOQA
>>> self = ProcessContext()
>>> obj = self.start().stop()
>>> print('obj = {}'.format(ub.repr2(obj, nl=2)))
"""
def __init__(self, name=None, args=None, config=None):
if args is None:
args = sys.argv
self.obj = {
"type": "process_context",
"name": name,
"args": args,
"config": config,
"machine": None,
"start_timestamp": None,
"stop_timestamp": None,
}
def _timestamp(self):
import datetime
timestamp = (
datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
)
timestamp = timestamp.replace(":", "")
# timestamp = ub.timestamp()
return timestamp
def _hostinfo(self):
return {
"host": socket.gethostname(),
"user": ub.Path(ub.userhome()).name,
# 'cwd': os.getcwd(),
}
def _osinfo(self):
(
uname_system,
_,
uname_release,
uname_version,
_,
uname_processor,
) = platform.uname()
return {
"os_name": uname_system,
"os_release": uname_release,
"os_version": uname_version,
"arch": uname_processor,
}
def _pyinfo(self):
return {
"py_impl": platform.python_implementation(),
"py_version": sys.version.replace("\n", ""),
}
def _meminfo(self):
import psutil
# TODO: could collect memory info at start and stop and intermediate
# stages. Here we just want info that is static wrt to the machine.
# For now, just get the total available.
svmem_info = psutil.virtual_memory()
return {
"mem_total": svmem_info.total,
}
def _cpuinfo(self):
import cpuinfo
_cpu_info = cpuinfo.get_cpu_info()
cpu_info = {
"cpu_brand": _cpu_info["brand_raw"],
}
return cpu_info
def _machine(self):
return ub.dict_union(
self._hostinfo(),
self._meminfo(),
self._cpuinfo(),
self._osinfo(),
self._pyinfo(),
)
def start(self):
self.obj.update(
{
"machine": self._machine(),
"start_timestamp": self._timestamp(),
"stop_timestamp": None,
}
)
return self
def stop(self):
self.obj.update(
{
"stop_timestamp": self._timestamp(),
}
)
return self.obj
def __enter__(self):
return self.start()
def __exit__(self, a, b, c):
self.stop()

View File

@ -0,0 +1,1089 @@
import itertools as it
import math
import warnings
import numpy as np
import pandas as pd
import scipy
import scipy.stats # NOQA
import ubelt as ub
# a list of common objectives
DEFAULT_METRIC_TO_OBJECTIVE = {
"time": "min",
"ap": "max",
"acc": "max",
"f1": "max",
"mcc": "max",
#
"loss": "min",
"brier": "min",
}
class Result(ub.NiceRepr):
"""
Storage of names, parameters, and quality metrics for a single experiment.
Attributes:
name (str | None):
Name of the experiment. Optional. This is unused in the analysis.
(i.e. names will never be used computationally. Use them for keys)
params (Dict[str, object]): configuration of the experiment.
This is a dictionary mapping a parameter name to its value.
metrics (Dict[str, float]): quantitative results of the experiment
This is a dictionary for each quality metric computed on this
result.
meta (Dict | None): any other metadata about this result.
This is unused in the analysis.
Example:
>>> self = Result.demo(rng=32)
>>> print('self = {}'.format(self))
self = <Result(name=53f57161,f1=0.33,acc=0.75,param1=1,param2=6.67,param3=a)>
Example:
>>> self = Result.demo(mode='alt', rng=32)
>>> print('self = {}'.format(self))
"""
def __init__(self, name, params, metrics, meta=None):
self.name = name
self.params = params
self.metrics = metrics
self.meta = meta
def to_dict(self):
row = ub.dict_union({"name": self.name}, self.metrics, self.params)
return row
def __nice__(self):
row = self.to_dict()
text = ub.repr2(row, compact=True, precision=2, sort=0)
return text
@classmethod
def demo(cls, mode="null", rng=None):
import string
import kwarray
import numpy as np
rng = kwarray.ensure_rng(rng)
if mode == "null":
# The null hypothesis should generally be true here,
# there is no relation between the results and parameters
demo_param_space = {
"param1": list(range(3)),
"param2": np.linspace(0, 10, 10),
"param3": list(string.ascii_lowercase[0:3]),
}
params = {k: rng.choice(b) for k, b in demo_param_space.items()}
metrics = {
"f1": rng.rand(),
"acc": rng.rand(),
}
elif mode == "alt":
# The alternative hypothesis should be true here, there is a
# relationship between results two of the params.
from scipy.special import expit
params = {
"u": rng.randint(0, 1 + 1),
"v": rng.randint(-1, 1 + 1),
"x": rng.randint(-2, 3 + 1),
"y": rng.randint(-1, 2 + 1),
"z": rng.randint(-0, 3 + 1),
}
noise = np.random.randn() * 1
r = 3 * params["x"] + params["y"] ** 2 + 0.3 * params["z"] ** 3
acc = expit(r / 20 + noise)
metrics = {
"acc": acc,
}
else:
raise KeyError(mode)
name = ub.hash_data(params)[0:8]
self = cls(name, params, metrics)
return self
class ResultAnalysis(ub.NiceRepr):
"""
Groups and runs stats on results
Runs statistical tests on sets of configuration-metrics pairs
Attributes:
results (List[Result]): list of results
ignore_metrics (Set[str]): metrics to ignore
ignore_params (Set[str]): parameters to ignore
metric_objectives (Dict[str, str]):
indicate if each metrix should be maximized "max" or minimized
"min"
metrics (List[str]):
only consider these metrics
params (List[str]):
if given, only consider these params
abalation_orders (Set[int]):
The number of parameters to be held constant in each statistical
grouping. Defaults to 1, so it groups together results where 1
variable is held constant. Including 2 will include pairwise
settings of parameters to be held constant. Using -1 or -2 means
all but 1 or 2 parameters will be held constant, repsectively.
default_objective (str):
assume max or min for unknown metrics
Example:
>>> self = ResultAnalysis.demo()
>>> self.analysis()
Example:
>>> self = ResultAnalysis.demo(num=5000, mode='alt')
>>> self.analysis()
Example:
>>> # Given a list of experiments, configs, and results
>>> # Create a ResultAnalysis object
>>> results = ResultAnalysis([
>>> Result('expt0', {'param1': 2, 'param3': 'b'}, {'f1': 0.75}),
>>> Result('expt1', {'param1': 0, 'param3': 'c'}, {'f1': 0.92}),
>>> Result('expt2', {'param1': 1, 'param3': 'b'}, {'f1': 0.77}),
>>> Result('expt3', {'param1': 1, 'param3': 'a'}, {'f1': 0.67}),
>>> Result('expt4', {'param1': 0, 'param3': 'c'}, {'f1': 0.98}),
>>> Result('expt5', {'param1': 2, 'param3': 'a'}, {'f1': 0.86}),
>>> Result('expt6', {'param1': 1, 'param3': 'c'}, {'f1': 0.77}),
>>> Result('expt7', {'param1': 1, 'param3': 'c'}, {'f1': 0.41}),
>>> Result('expt8', {'param1': 1, 'param3': 'a'}, {'f1': 0.64}),
>>> Result('expt9', {'param1': 0, 'param3': 'b'}, {'f1': 0.95}),
>>> ])
>>> # Calling the analysis method prints something like the following
>>> results.analysis()
PARAMETER 'param1' - f1
=======================
f1 mean std max min num best
param1
0 0.950 0.030000 0.98 0.92 3.0 0.98
2 0.805 0.077782 0.86 0.75 2.0 0.86
1 0.652 0.147377 0.77 0.41 5.0 0.77
ANOVA hypothesis (roughly): the param 'param1' has no effect on the metric
Reject this hypothesis if the p value is less than a threshold
Rank-ANOVA: p=0.0397
Mean-ANOVA: p=0.0277
Pairwise T-Tests
Is param1=0 about as good as param1=2?
ttest_ind: p=0.2058
Is param1=1 about as good as param1=2?
ttest_ind: p=0.1508
PARAMETER 'param3' - f1
=======================
f1 mean std max min num best
param3
c 0.770000 0.255734 0.98 0.41 4.0 0.98
b 0.823333 0.110151 0.95 0.75 3.0 0.95
a 0.723333 0.119304 0.86 0.64 3.0 0.86
ANOVA hypothesis (roughly): the param 'param3' has no effect on the metric
Reject this hypothesis if the p value is less than a threshold
Rank-ANOVA: p=0.5890
Mean-ANOVA: p=0.8145
Pairwise T-Tests
Is param3=b about as good as param3=c?
ttest_ind: p=0.7266
Is param3=a about as good as param3=b?
ttest_ind: p=0.3466
ttest_rel: p=0.3466
Is param3=a about as good as param3=c?
ttest_ind: p=0.7626
"""
def __init__(
self,
results,
metrics=None,
params=None,
ignore_params=None,
ignore_metrics=None,
metric_objectives=None,
abalation_orders={1},
default_objective="max",
p_threshold=0.05,
):
self.results = results
if ignore_metrics is None:
ignore_metrics = set()
if ignore_params is None:
ignore_params = set()
self.ignore_params = ignore_params
self.ignore_metrics = ignore_metrics
self.abalation_orders = abalation_orders
self.default_objective = default_objective
# encode if we want to maximize or minimize a metric
if metric_objectives is None:
metric_objectives = {}
self.metric_objectives = DEFAULT_METRIC_TO_OBJECTIVE.copy()
self.metric_objectives.update(metric_objectives)
self.params = params
self.metrics = metrics
self.statistics = None
self.p_threshold = p_threshold
self._description = {}
self._description["built"] = False
self._description["num_results"] = len(self.results)
def __nice__(self):
return ub.repr2(self._description, si=1, sv=1)
@classmethod
def demo(cls, num=10, mode="null", rng=None):
import kwarray
rng = kwarray.ensure_rng(rng)
results = [Result.demo(mode=mode, rng=rng) for _ in range(num)]
if mode == "null":
self = cls(results, metrics={"f1", "acc"})
else:
self = cls(results, metrics={"acc"})
return self
def run(self):
self.build()
self.report()
def analysis(self):
# alias for run
return self.run()
self.build()
self.report()
@ub.memoize_property
def table(self):
rows = [r.to_dict() for r in self.results]
table = pd.DataFrame(rows)
return table
def metric_table(self):
rows = [r.to_dict() for r in self.results]
table = pd.DataFrame(rows)
return table
@ub.memoize_property
def varied(self):
config_rows = [r.params for r in self.results]
sentinel = object()
# pd.DataFrame(config_rows).channels
varied = dict(ub.varied_values(config_rows, default=sentinel, min_variations=1))
# remove nans
varied = {
k: {v for v in vs if not (isinstance(v, float) and math.isnan(v))}
for k, vs in varied.items()
}
varied = {k: vs for k, vs in varied.items() if len(vs)}
return varied
def abalation_groups(self, param_group, k=2):
"""
Return groups where the specified parameter(s) are varied, but all
other non-ignored parameters are held the same.
Args:
param_group (str | List[str]):
One or more parameters that are allowed to vary
k (int):
minimum number of items a group must contain to be returned
Returns:
List[DataFrame]:
a list of subsets of in the table where all but the specified
(non-ignored) parameters are allowed to vary.
Example:
>>> self = ResultAnalysis.demo()
>>> param = 'param2'
>>> self.abalation_groups(param)
"""
if not ub.iterable(param_group):
param_group = [param_group]
table = self.table
config_rows = [r.params for r in self.results]
config_keys = list(map(set, config_rows))
# if self.params:
# config_keys = list(self.params)
if self.ignore_params:
config_keys = [c - self.ignore_params for c in config_keys]
isect_params = set.intersection(*config_keys)
other_params = sorted(isect_params - set(param_group))
groups = []
for key, group in table.groupby(other_params, dropna=False):
if len(group) >= k:
groups.append(group)
return groups
def _objective_is_ascending(self, metric_key):
"""
Args:
metric_key (str): the metric in question
Returns:
bool:
True if we should minimize the objective (lower is better)
False if we should maximize the objective (higher is better)
"""
objective = self.metric_objectives.get(metric_key, None)
if objective is None:
warnings.warn(f"warning assume {self.default_objective} for {metric_key=}")
objective = self.default_objective
ascending = objective == "min"
return ascending
def abalate(self, param_group):
"""
TODO:
rectify with test-group
Example:
>>> self = ResultAnalysis.demo(100)
>>> param = 'param2'
>>> # xdoctest: +REQUIRES(module:openskill)
>>> self.abalate(param)
>>> self = ResultAnalysis.demo()
>>> param_group = ['param2', 'param3']
>>> # xdoctest: +REQUIRES(module:openskill)
>>> self.abalate(param_group)
"""
if self.table is None:
self.table = self.build_table()
if not ub.iterable(param_group):
param_group = [param_group]
# For hashable generic dictionary
from collections import namedtuple
gd = namedtuple("config", param_group)
# from types import SimpleNamespace
param_unique_vals_ = (
self.table[param_group].drop_duplicates().to_dict("records")
)
param_unique_vals = [gd(**d) for d in param_unique_vals_]
# param_unique_vals = {p: self.table[p].unique().tolist() for p in param_group}
score_improvements = ub.ddict(list)
scored_obs = []
skillboard = SkillTracker(param_unique_vals)
groups = self.abalation_groups(param_group, k=2)
for group in groups:
for metric_key in self.metrics:
ascending = self._objective_is_ascending(metric_key)
group = group.sort_values(metric_key, ascending=ascending)
subgroups = group.groupby(param_group)
if ascending:
best_idx = subgroups[metric_key].idxmax()
else:
best_idx = subgroups[metric_key].idxmin()
best_group = group.loc[best_idx]
best_group = best_group.sort_values(metric_key, ascending=ascending)
for x1, x2 in it.product(best_group.index, best_group.index):
if x1 != x2:
r1 = best_group.loc[x1]
r2 = best_group.loc[x2]
k1 = gd(**r1[param_group])
k2 = gd(**r2[param_group])
diff = r1[metric_key] - r2[metric_key]
score_improvements[(k1, k2, metric_key)].append(diff)
# metric_vals = best_group[metric_key].values
# diffs = metric_vals[None, :] - metric_vals[:, None]
best_group.set_index(param_group)
# best_group[param_group]
# best_group[metric_key].diff()
scored_ranking = best_group[param_group + [metric_key]].reset_index(
drop=True
)
scored_obs.append(scored_ranking)
ranking = [
gd(**d) for d in scored_ranking[param_group].to_dict("records")
]
skillboard.observe(ranking)
print(
"skillboard.ratings = {}".format(
ub.repr2(skillboard.ratings, nl=1, align=":")
)
)
win_probs = skillboard.predict_win()
print(f"win_probs = {ub.repr2(win_probs, nl=1)}")
for key, improves in score_improvements.items():
k1, k2, metric_key = key
improves = np.array(improves)
pos_delta = improves[improves > 0]
print(
f"\nWhen {k1} is better than {k2}, the improvement in {metric_key} is"
)
print(pd.DataFrame([pd.Series(pos_delta).describe().T]))
return scored_obs
def test_group(self, param_group, metric_key):
"""
Get stats for a particular metric / constant group
Args:
param_group (List[str]): group of parameters to hold constant.
metric_key (str): The metric to test.
Returns:
dict
# TODO : document these stats clearly and accurately
Example:
>>> self = ResultAnalysis.demo(num=100)
>>> print(self.table)
>>> param_group = ['param2', 'param1']
>>> metric_key = 'f1'
>>> stats_row = self.test_group(param_group, metric_key)
>>> print('stats_row = {}'.format(ub.repr2(stats_row, nl=2, sort=0, precision=2)))
"""
param_group_name = ",".join(param_group)
stats_row = {
"param_name": param_group_name,
"metric": metric_key,
}
# param_values = varied[param_name]
# stats_row['param_values'] = param_values
ascending = self._objective_is_ascending(metric_key)
# Find all items with this particular param value
value_to_metric_group = {}
value_to_metric_stats = {}
value_to_metric = {}
varied_cols = sorted(self.varied.keys())
# Not sure if this is the right name, these are the other param keys
# that we are not directly investigating, but might have an impact.
# We use these to select comparable rows for pairwise t-tests
nuisance_cols = sorted(set(self.varied.keys()) - set(param_group))
for param_value, group in self.table.groupby(param_group):
metric_group = group[["name", metric_key] + varied_cols]
metric_vals = metric_group[metric_key]
metric_vals = metric_vals.dropna()
if len(metric_vals) > 0:
metric_stats = metric_vals.describe()
value_to_metric_stats[param_value] = metric_stats
value_to_metric_group[param_value] = metric_group
value_to_metric[param_value] = metric_vals.values
moments = pd.DataFrame(value_to_metric_stats).T
moments = moments.sort_values("mean", ascending=ascending)
moments.index.name = param_group_name
moments.columns.name = metric_key
ranking = moments["mean"].index.to_list()
param_to_rank = ub.invert_dict(dict(enumerate(ranking)))
# Determine a set of value pairs to do pairwise comparisons on
value_pairs = ub.oset()
# value_pairs.update(
# map(frozenset, ub.iter_window(moments.index, 2)))
value_pairs.update(
map(
frozenset,
ub.iter_window(
moments.sort_values("mean", ascending=ascending).index, 2
),
)
)
# https://en.wikipedia.org/wiki/Kruskal%E2%80%93Wallis_one-way_analysis_of_variance
# If the researcher can make the assumptions of an identically
# shaped and scaled distribution for all groups, except for any
# difference in medians, then the null hypothesis is that the
# medians of all groups are equal, and the alternative
# hypothesis is that at least one population median of one
# group is different from the population median of at least one
# other group.
try:
anova_krus_result = scipy.stats.kruskal(*value_to_metric.values())
except ValueError:
anova_krus_result = scipy.stats.stats.KruskalResult(np.nan, np.nan)
# https://en.wikipedia.org/wiki/One-way_analysis_of_variance
# The One-Way ANOVA tests the null hypothesis, which states
# that samples in all groups are drawn from populations with
# the same mean values
if len(value_to_metric) > 1:
anova_1way_result = scipy.stats.f_oneway(*value_to_metric.values())
else:
anova_1way_result = scipy.stats.stats.F_onewayResult(np.nan, np.nan)
stats_row["anova_rank_H"] = anova_krus_result.statistic
stats_row["anova_rank_p"] = anova_krus_result.pvalue
stats_row["anova_mean_F"] = anova_1way_result.statistic
stats_row["anova_mean_p"] = anova_1way_result.pvalue
stats_row["moments"] = moments
pair_stats_list = []
for pair in value_pairs:
pair_stats = {}
param_val1, param_val2 = pair
metric_vals1 = value_to_metric[param_val1]
metric_vals2 = value_to_metric[param_val2]
rank1 = param_to_rank[param_val1]
rank2 = param_to_rank[param_val2]
pair_stats["winner"] = param_val1 if rank1 < rank2 else param_val2
pair_stats["value1"] = param_val1
pair_stats["value2"] = param_val2
pair_stats["n1"] = len(metric_vals1)
pair_stats["n2"] = len(metric_vals2)
TEST_ONLY_FOR_DIFFERENCE = True
if TEST_ONLY_FOR_DIFFERENCE:
if ascending:
# We want to minimize the metric
alternative = "less" if rank1 < rank2 else "greater"
else:
# We want to maximize the metric
alternative = "greater" if rank1 < rank2 else "less"
else:
alternative = "two-sided"
ind_kw = dict(
equal_var=False,
alternative=alternative,
)
ttest_ind_result = scipy.stats.ttest_ind(
metric_vals1, metric_vals2, **ind_kw
)
if 0:
from benchmarker.benchmarker import stats_dict
stats1 = stats_dict(metric_vals1)
stats2 = stats_dict(metric_vals2)
scipy.stats.ttest_ind_from_stats(
stats1["mean"],
stats1["std"],
stats1["nobs"],
stats2["mean"],
stats2["std"],
stats2["nobs"],
**ind_kw,
)
# metric_vals1, metric_vals2, equal_var=False)
scipy.stats.ttest_ind_from_stats
pair_stats["ttest_ind"] = ttest_ind_result
# Do relative checks, need to find comparable subgroups
metric_group1 = value_to_metric_group[param_val1]
metric_group2 = value_to_metric_group[param_val2]
nuisance_vals1 = metric_group1[nuisance_cols]
nuisance_vals2 = metric_group2[nuisance_cols]
nk_to_group1 = dict(list(nuisance_vals1.groupby(nuisance_cols)))
nk_to_group2 = dict(list(nuisance_vals2.groupby(nuisance_cols)))
common = set(nk_to_group1) & set(nk_to_group2)
comparable_indexes1 = []
comparable_indexes2 = []
if common:
for nk in common:
group1 = nk_to_group1[nk]
group2 = nk_to_group2[nk]
# TODO: Not sure if taking the product of everything within
# the comparable group is correct or not. I think it is ok.
for i, j in it.product(group1.index, group2.index):
comparable_indexes1.append(i)
comparable_indexes2.append(j)
comparable_groups1 = metric_group1.loc[comparable_indexes1, metric_key]
comparable_groups2 = metric_group2.loc[comparable_indexes2, metric_key]
# Does this need to have the values aligned?
# I think that is the case giving my understanding of paired
# t-tests, but the docs need a PR to make that more clear.
ttest_rel_result = scipy.stats.ttest_rel(
comparable_groups1, comparable_groups2
)
pair_stats["n_common"] = len(common)
pair_stats["ttest_rel"] = ttest_rel_result
pair_stats_list.append(pair_stats)
stats_row["pairwise"] = pair_stats_list
return stats_row
def build(self):
import itertools as it
if len(self.results) < 2:
raise Exception("need at least 2 results")
varied = self.varied.copy()
if self.ignore_params:
for k in self.ignore_params:
varied.pop(k, None)
if self.params:
varied = ub.dict_isect(varied, self.params)
# Experimental:
# Find Auto-abalation groups
# TODO: when the group size is -1, instead of showing all of the group
# settings, for each group setting do the k=1 analysis within that group
varied_param_names = list(varied.keys())
num_varied_params = len(varied)
held_constant_orders = {
num_varied_params + i if i < 0 else i for i in self.abalation_orders
}
held_constant_orders = [i for i in held_constant_orders if i > 0]
held_constant_groups = []
for k in held_constant_orders:
held_constant_groups.extend(
list(map(list, it.combinations(varied_param_names, k)))
)
if self.metrics is None:
avail_metrics = set.intersection(
*[set(r.metrics.keys()) for r in self.results]
)
metrics_of_interest = sorted(avail_metrics - set(self.ignore_metrics))
else:
metrics_of_interest = self.metrics
self.metrics_of_interest = metrics_of_interest
self._description["metrics_of_interest"] = metrics_of_interest
self._description["num_groups"] = len(held_constant_groups)
# Analyze the impact of each parameter
self.statistics = statistics = []
for param_group in held_constant_groups:
for metric_key in metrics_of_interest:
stats_row = self.test_group(param_group, metric_key)
statistics.append(stats_row)
self.stats_table = pd.DataFrame(
[
ub.dict_diff(d, {"pairwise", "param_values", "moments"})
for d in self.statistics
]
)
if len(self.stats_table):
self.stats_table = self.stats_table.sort_values("anova_rank_p")
self._description["built"] = True
def report(self):
stat_groups = ub.group_items(self.statistics, key=lambda x: x["param_name"])
stat_groups_items = list(stat_groups.items())
# Modify this order to change the grouping pattern
grid = ub.named_product(
{
"stat_group_item": stat_groups_items,
"metrics": self.metrics_of_interest,
}
)
for grid_item in grid:
self._report_one(grid_item)
print(self.stats_table)
def _report_one(self, grid_item):
p_threshold = self.p_threshold
metric_key = grid_item["metrics"]
stat_groups_item = grid_item["stat_group_item"]
param_name, stat_group = stat_groups_item
stats_row = ub.group_items(stat_group, key=lambda x: x["metric"])[metric_key][0]
title = f"PARAMETER: {param_name} - METRIC: {metric_key}"
print("\n\n")
print(title)
print("=" * len(title))
print(stats_row["moments"])
anova_rank_p = stats_row["anova_rank_p"]
anova_mean_p = stats_row["anova_mean_p"]
# Rougly speaking
print("")
print(f"ANOVA: If p is low, the param {param_name!r} might have an effect")
print(
ub.color_text(
f" Rank-ANOVA: p={anova_rank_p:0.8f}",
"green" if anova_rank_p < p_threshold else None,
)
)
print(
ub.color_text(
f" Mean-ANOVA: p={anova_mean_p:0.8f}",
"green" if anova_mean_p < p_threshold else None,
)
)
print("")
print("Pairwise T-Tests")
for pairstat in stats_row["pairwise"]:
# Is this backwards?
value1 = pairstat["value1"]
value2 = pairstat["value2"]
winner = pairstat["winner"]
if value2 == winner:
value1, value2 = value2, value1
print(
f" If p is low, {param_name}={value1} may outperform {param_name}={value2}."
)
if "ttest_ind" in pairstat:
ttest_ind_result = pairstat["ttest_ind"]
print(
ub.color_text(
f" ttest_ind: p={ttest_ind_result.pvalue:0.8f}",
"green" if ttest_ind_result.pvalue < p_threshold else None,
)
)
if "ttest_rel" in pairstat:
n_common = pairstat["n_common"]
ttest_rel_result = pairstat["ttest_ind"]
print(
ub.color_text(
f" ttest_rel: p={ttest_rel_result.pvalue:0.8f}, n_pairs={n_common}",
"green" if ttest_rel_result.pvalue < p_threshold else None,
)
)
def conclusions(self):
conclusions = []
for stat in self.statistics:
param_name = stat["param_name"]
metric = stat["metric"]
for pairstat in stat["pairwise"]:
value1 = pairstat["value1"]
value2 = pairstat["value2"]
winner = pairstat["winner"]
if value2 == winner:
value1, value2 = value2, value1
pvalue = stat = pairstat["ttest_ind"].pvalue
txt = f"p={pvalue:0.8f}, If p is low, {param_name}={value1} may outperform {value2} on {metric}."
conclusions.append(txt)
return conclusions
def plot(self, xlabel, metric_key, group_labels, data=None, **kwargs):
"""
Args:
group_labels (dict):
Tells seaborn what attributes to use to distinsuish curves like
hue, size, marker. Also can contain "col" for use with
FacetGrid, and "fig" to separate different configurations into
different figures.
Returns:
List[Dict]:
A list for each figure containing info abou that figure for any
postprocessing.
Example:
>>> self = ResultAnalysis.demo(num=1000, mode='alt')
>>> self.analysis()
>>> print('self = {}'.format(self))
>>> print('self.varied = {}'.format(ub.repr2(self.varied, nl=1)))
>>> # xdoctest: +REQUIRES(module:kwplot)
>>> import kwplot
>>> kwplot.autosns()
>>> xlabel = 'x'
>>> metric_key = 'acc'
>>> group_labels = {
>>> 'fig': ['u'],
>>> 'col': ['y', 'v'],
>>> 'hue': ['z'],
>>> 'size': [],
>>> }
>>> kwargs = {'xscale': 'log', 'yscale': 'log'}
>>> self.plot(xlabel, metric_key, group_labels, **kwargs)
"""
print("Init seaborn and pyplot")
import seaborn as sns
sns.set()
from matplotlib import pyplot as plt # NOQA
print("Starting plot")
if data is None:
data = self.table
data = data.sort_values(metric_key)
print("Compute group labels")
for gname, labels in group_labels.items():
if len(labels):
new_col = []
for row in data[labels].to_dict("records"):
item = ub.repr2(row, compact=1, si=1)
new_col.append(item)
gkey = gname + "_key"
data[gkey] = new_col
plot_kws = {
"x": xlabel,
"y": metric_key,
}
for gname, labels in group_labels.items():
if labels:
plot_kws[gname] = gname + "_key"
# Your variables may change
# ax = plt.figure().gca()
fig_params = plot_kws.pop("fig", [])
facet_kws = {
"sharex": True,
"sharey": True,
}
# facet_kws['col'] = plot_kws.pop("col", None)
# facet_kws['row'] = plot_kws.pop("row", None)
# if not facet_kws['row']:
# facet_kws['col_wrap'] = 5
plot_kws["row"] = plot_kws.get("row", None)
# if not plot_kws['row']:
# plot_kws['col_wrap'] = 5
if not fig_params:
groups = [("", data)]
else:
groups = data.groupby(fig_params)
if "marker" not in plot_kws:
plot_kws["marker"] = "o"
# We will want to overwrite this with our own std estimate
plot_kws["ci"] = "sd"
# err_style='band',
# err_kws=None,
# Use a consistent pallete across plots
unique_hues = data["hue_key"].unique()
palette = ub.dzip(unique_hues, sns.color_palette(n_colors=len(unique_hues)))
plot_kws["palette"] = palette
# kwplot.close_figures()
plots = []
base_fnum = 1
print("Start plots")
# hack
hack_groups = [(k, v) for k, v in groups if k != "input=Complex object"]
for fnum, (fig_key, group) in enumerate(hack_groups, start=base_fnum):
# TODO: seaborn doesn't give us any option to reuse an existing
# figure or even specify what it's handle should be. A patch should
# be submitted to add that feature, but in the meantime work around
# it and use the figures they give us.
# fig = plt.figure(fnum)
# fig.clf()
facet = sns.relplot(
data=group,
kind="line",
# kind="scatter",
facet_kws=facet_kws,
**plot_kws,
)
from json_benchmarks.benchmarker.util_stats import aggregate_stats
# print(f'facet._col_var={facet._col_var}')
if facet._col_var is not None:
facet_data_groups = dict(list(facet.data.groupby(facet._col_var)))
else:
facet_data_groups = None
# facet_data_group_iter = iter(facet_data_groups.keys())
for ax in facet.axes.ravel():
col_key = ax.get_title().split("=", 1)[-1].strip()
# col_key = next(facet_data_group_iter)
if facet_data_groups is not None:
col_data = facet_data_groups[col_key]
else:
col_data = facet.data
col_data["mean_time"]
col_data["std_time"]
xlabel = plot_kws["x"]
ylabel = plot_kws["y"]
subgroups = col_data.groupby(plot_kws["hue"])
for subgroup_key, subgroup in subgroups:
# combine stds in multiple groups on the x and manually draw errors
suffix = "_" + ylabel.partition("_")[2]
if "mean_" in ylabel:
std_label = ylabel.replace("mean_", "std_")
combo_group = aggregate_stats(
subgroup, suffix=suffix, group_keys=[plot_kws["x"]]
)
_xdata = combo_group[xlabel].values
_ydata_mean = combo_group[ylabel].values
_ydata_std = combo_group[std_label].values
std_label = ylabel.replace("mean_", "std_")
# Plot bars 3 standard deviations from the mean to
# get a 99.7% interval
num_std = 3
y_data_min = _ydata_mean - num_std * _ydata_std
y_data_max = _ydata_mean + num_std * _ydata_std
spread_alpha = 0.3
color = palette[subgroup_key]
ax.fill_between(
_xdata,
y_data_min,
y_data_max,
alpha=spread_alpha,
color=color,
zorder=1,
)
# zorder=0)
xscale = kwargs.get("xscale", None)
yscale = kwargs.get("yscale", None)
for ax in facet.axes.ravel():
if xscale is not None:
try:
ax.set_xscale(xscale)
except ValueError:
pass
if yscale is not None:
try:
ax.set_yscale(yscale)
except ValueError:
pass
fig = facet.figure
fig.suptitle(fig_key)
fig.tight_layout()
# facet = sns.FacetGrid(group, **facet_kws)
# facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, **plot_kws)
# facet.add_legend()
plot = {
"fig": fig,
"facet": facet,
}
plots.append(plot)
# if fnum >= 1:
# break
# print("Adjust plots")
# for plot in plots:
# xscale = kwargs.get("xscale", None)
# yscale = kwargs.get("yscale", None)
# facet = plot["facet"]
# facet_data_groups = dict(list(facet.data.groupby(facet._col_var)))
# facet_data_group_iter = iter(facet_data_groups.keys())
# for ax in facet.axes.ravel():
# if xscale is not None:
# try:
# ax.set_xscale(xscale)
# except ValueError:
# pass
# if yscale is not None:
# try:
# ax.set_yscale(yscale)
# except ValueError:
# pass
print("Finish")
return plots
class SkillTracker:
"""
Wrapper around openskill
Args:
player_ids (List[T]):
a list of ids (usually ints) used to represent each player
Example:
>>> # xdoctest: +REQUIRES(module:openskill)
>>> self = SkillTracker([1, 2, 3, 4, 5])
>>> self.observe([2, 3]) # Player 2 beat player 3.
>>> self.observe([1, 2, 5, 3]) # Player 3 didnt play this round.
>>> self.observe([2, 3, 4, 5, 1]) # Everyone played, player 2 won.
>>> win_probs = self.predict_win()
>>> print('win_probs = {}'.format(ub.repr2(win_probs, nl=1, precision=2)))
win_probs = {
1: 0.20,
2: 0.21,
3: 0.19,
4: 0.20,
5: 0.20,
}
Requirements:
openskill
"""
def __init__(self, player_ids):
import openskill
self.player_ids = player_ids
self.ratings = {m: openskill.Rating() for m in player_ids}
# self.observations = []
def predict_win(self):
"""
Estimate the probability that a particular player will win given the
current ratings.
Returns:
Dict[T, float]: mapping from player ids to win probabilites
"""
from openskill import predict_win
teams = [[p] for p in list(self.ratings.keys())]
ratings = [[r] for r in self.ratings.values()]
probs = predict_win(ratings)
win_probs = {team[0]: prob for team, prob in zip(teams, probs)}
return win_probs
def observe(self, ranking):
"""
After simulating a round, pass the ranked order of who won
(winner is first, looser is last) to this function. And it
updates the rankings.
Args:
ranking (List[T]):
ranking of all the players that played in this round
winners are at the front (0-th place) of the list.
"""
import openskill
# self.observations.append(ranking)
ratings = self.ratings
team_standings = [[r] for r in ub.take(ratings, ranking)]
# new_values = openskill.rate(team_standings) # Not inplace
# new_ratings = [openskill.Rating(*new[0]) for new in new_values]
new_team_ratings = openskill.rate(team_standings)
new_ratings = [new[0] for new in new_team_ratings]
ratings.update(ub.dzip(ranking, new_ratings))

View File

@ -0,0 +1,240 @@
import copy
import json
import pathlib
from collections import OrderedDict
import numpy as np
import ubelt as ub
def ensure_json_serializable(dict_, normalize_containers=False, verbose=0):
"""
Attempt to convert common types (e.g. numpy) into something json complient
Convert numpy and tuples into lists
Args:
normalize_containers (bool, default=False):
if True, normalizes dict containers to be standard python
structures.
Example:
>>> data = ub.ddict(lambda: int)
>>> data['foo'] = ub.ddict(lambda: int)
>>> data['bar'] = np.array([1, 2, 3])
>>> data['foo']['a'] = 1
>>> data['foo']['b'] = (1, np.array([1, 2, 3]), {3: np.int32(3), 4: np.float16(1.0)})
>>> dict_ = data
>>> print(ub.repr2(data, nl=-1))
>>> assert list(find_json_unserializable(data))
>>> result = ensure_json_serializable(data, normalize_containers=True)
>>> print(ub.repr2(result, nl=-1))
>>> assert not list(find_json_unserializable(result))
>>> assert type(result) is dict
"""
dict_ = copy.deepcopy(dict_)
def _norm_container(c):
if isinstance(c, dict):
# Cast to a normal dictionary
if isinstance(c, OrderedDict):
if type(c) is not OrderedDict:
c = OrderedDict(c)
else:
if type(c) is not dict:
c = dict(c)
return c
walker = ub.IndexableWalker(dict_)
for prefix, value in walker:
if isinstance(value, tuple):
new_value = list(value)
walker[prefix] = new_value
elif isinstance(value, np.ndarray):
new_value = value.tolist()
walker[prefix] = new_value
elif isinstance(value, (np.integer)):
new_value = int(value)
walker[prefix] = new_value
elif isinstance(value, (np.floating)):
new_value = float(value)
walker[prefix] = new_value
elif isinstance(value, (np.complexfloating)):
new_value = complex(value)
walker[prefix] = new_value
elif isinstance(value, pathlib.Path):
new_value = str(value)
walker[prefix] = new_value
elif hasattr(value, "__json__"):
new_value = value.__json__()
walker[prefix] = new_value
elif normalize_containers:
if isinstance(value, dict):
new_value = _norm_container(value)
walker[prefix] = new_value
if normalize_containers:
# normalize the outer layer
dict_ = _norm_container(dict_)
return dict_
def find_json_unserializable(data, quickcheck=False):
"""
Recurse through json datastructure and find any component that
causes a serialization error. Record the location of these errors
in the datastructure as we recurse through the call tree.
Args:
data (object): data that should be json serializable
quickcheck (bool): if True, check the entire datastructure assuming
its ok before doing the python-based recursive logic.
Returns:
List[Dict]: list of "bad part" dictionaries containing items
'value' - the value that caused the serialization error
'loc' - which contains a list of key/indexes that can be used
to lookup the location of the unserializable value.
If the "loc" is a list, then it indicates a rare case where
a key in a dictionary is causing the serialization error.
Example:
>>> part = ub.ddict(lambda: int)
>>> part['foo'] = ub.ddict(lambda: int)
>>> part['bar'] = np.array([1, 2, 3])
>>> part['foo']['a'] = 1
>>> # Create a dictionary with two unserializable parts
>>> data = [1, 2, {'nest1': [2, part]}, {frozenset({'badkey'}): 3, 2: 4}]
>>> parts = list(find_json_unserializable(data))
>>> print('parts = {}'.format(ub.repr2(parts, nl=1)))
>>> # Check expected structure of bad parts
>>> assert len(parts) == 2
>>> part = parts[1]
>>> assert list(part['loc']) == [2, 'nest1', 1, 'bar']
>>> # We can use the "loc" to find the bad value
>>> for part in parts:
>>> # "loc" is a list of directions containing which keys/indexes
>>> # to traverse at each descent into the data structure.
>>> directions = part['loc']
>>> curr = data
>>> special_flag = False
>>> for key in directions:
>>> if isinstance(key, list):
>>> # special case for bad keys
>>> special_flag = True
>>> break
>>> else:
>>> # normal case for bad values
>>> curr = curr[key]
>>> if special_flag:
>>> assert part['data'] in curr.keys()
>>> assert part['data'] is key[1]
>>> else:
>>> assert part['data'] is curr
"""
needs_check = True
if quickcheck:
try:
# Might be a more efficient way to do this check. We duplicate a lot of
# work by doing the check for unserializable data this way.
json.dumps(data)
except Exception:
# If there is unserializable data, find out where it is.
# is_serializable = False
pass
else:
# is_serializable = True
needs_check = False
if needs_check:
# mode = 'new'
# if mode == 'new':
scalar_types = (int, float, str, type(None))
container_types = (tuple, list, dict)
serializable_types = scalar_types + container_types
walker = ub.IndexableWalker(data)
for prefix, value in walker:
*root, key = prefix
if not isinstance(key, scalar_types):
# Special case where a dict key is the error value
# Purposely make loc non-hashable so its not confused with
# an address. All we can know in this case is that they key
# is at this level, there is no concept of where.
yield {"loc": root + [[".keys", key]], "data": key}
elif not isinstance(value, serializable_types):
yield {"loc": prefix, "data": value}
def indexable_allclose(dct1, dct2, return_info=False):
"""
Walks through two nested data structures and ensures that everything is
roughly the same.
Args:
dct1: a nested indexable item
dct2: a nested indexable item
Example:
>>> dct1 = {
>>> 'foo': [1.222222, 1.333],
>>> 'bar': 1,
>>> 'baz': [],
>>> }
>>> dct2 = {
>>> 'foo': [1.22222, 1.333],
>>> 'bar': 1,
>>> 'baz': [],
>>> }
>>> assert indexable_allclose(dct1, dct2)
"""
walker1 = ub.IndexableWalker(dct1)
walker2 = ub.IndexableWalker(dct2)
flat_items1 = [
(path, value)
for path, value in walker1
if not isinstance(value, walker1.indexable_cls) or len(value) == 0
]
flat_items2 = [
(path, value)
for path, value in walker2
if not isinstance(value, walker1.indexable_cls) or len(value) == 0
]
flat_items1 = sorted(flat_items1)
flat_items2 = sorted(flat_items2)
if len(flat_items1) != len(flat_items2):
info = {"faillist": ["length mismatch"]}
final_flag = False
else:
passlist = []
faillist = []
for t1, t2 in zip(flat_items1, flat_items2):
p1, v1 = t1
p2, v2 = t2
assert p1 == p2
flag = v1 == v2
if not flag:
if (
isinstance(v1, float)
and isinstance(v2, float)
and np.isclose(v1, v2)
):
flag = True
if flag:
passlist.append(p1)
else:
faillist.append((p1, v1, v2))
final_flag = len(faillist) == 0
info = {
"passlist": passlist,
"faillist": faillist,
}
if return_info:
return final_flag, info
else:
return final_flag

View File

@ -0,0 +1,235 @@
import numpy as np
import ubelt as ub
def __tabulate_issue():
# MWE for tabulate issue
# The decimals are not aligned when using "," in the floatfmt
import tabulate
data = [
[
13213.2,
3213254.23,
432432.231,
],
[432432.0, 432.3, 3.2],
]
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=",.02f"))
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=".02f"))
def __groupby_issue():
# MWE of an issue with pandas groupby
import pandas as pd
data = pd.DataFrame(
[
{"p1": "a", "p2": 1, "p3": 0},
{"p1": "a", "p2": 1, "p3": 0},
{"p1": "a", "p2": 2, "p3": 0},
{"p1": "b", "p2": 2, "p3": 0},
{"p1": "b", "p2": 1, "p3": 0},
{"p1": "b", "p2": 1, "p3": 0},
{"p1": "b", "p2": 1, "p3": 0},
]
)
by = "p1"
key = list(data.groupby(by))[0][0]
result = {"by": by, "key": key, "type(key)": type(key)}
print(f"result = {ub.repr2(result, nl=1)}")
assert not ub.iterable(
key
), "`by` is specified as a scalar, so getting `key` as a scalar makes sense"
by = ["p1"]
key = list(data.groupby(by))[0][0]
result = {"by": by, "key": key, "type(key)": type(key)}
print(f"result = {ub.repr2(result, nl=1)}")
assert not ub.iterable(key), (
"`by` is specified as a list of scalars (with one element), but we "
"still get `key` as a scalar. This does not make sense"
)
by = ["p1", "p2"]
key = list(data.groupby(by))[0][0]
result = {"by": by, "key": key, "type(key)": type(key)}
print(f"result = {ub.repr2(result, nl=1)}")
assert ub.iterable(key), (
"`by` is specified as a list of scalars (with multiple elements), "
"and we still get `key` as a tuple of values. This makes sense"
)
def aggregate_stats(data, suffix="", group_keys=None):
"""
Given columns interpreted as containing stats, aggregate those stats
within each group. For each row, any non-group, non-stat column
with consistent values across that columns in the group is kept as-is,
otherwise the new column for that row is set to None.
Args:
data (DataFrame):
a data frame with columns: 'mean', 'std', 'min', 'max', and 'nobs'
(possibly with a suffix)
suffix (str):
if the nobs, std, mean, min, and max have a suffix, specify it
group_keys (List[str]):
pass
Returns:
DataFrame:
New dataframe where grouped rows have been aggregated into a single
row.
Example:
>>> import sys, ubelt
>>> sys.path.append(ubelt.expandpath('~/code/ultrajson'))
>>> from json_benchmarks.benchmarker.util_stats import * # NOQA
>>> import pandas as pd
>>> data = pd.DataFrame([
>>> #
>>> {'mean': 8, 'std': 1, 'min': 0, 'max': 1, 'nobs': 2, 'p1': 'a', 'p2': 1},
>>> {'mean': 6, 'std': 2, 'min': 0, 'max': 1, 'nobs': 3, 'p1': 'a', 'p2': 1},
>>> {'mean': 7, 'std': 3, 'min': 0, 'max': 2, 'nobs': 5, 'p1': 'a', 'p2': 2},
>>> {'mean': 5, 'std': 4, 'min': 0, 'max': 3, 'nobs': 7, 'p1': 'a', 'p2': 1},
>>> #
>>> {'mean': 3, 'std': 1, 'min': 0, 'max': 20, 'nobs': 6, 'p1': 'b', 'p2': 1},
>>> {'mean': 0, 'std': 2, 'min': 0, 'max': 20, 'nobs': 26, 'p1': 'b', 'p2': 2},
>>> {'mean': 9, 'std': 3, 'min': 0, 'max': 20, 'nobs': 496, 'p1': 'b', 'p2': 1},
>>> #
>>> {'mean': 5, 'std': 0, 'min': 0, 'max': 1, 'nobs': 2, 'p1': 'c', 'p2': 2},
>>> {'mean': 5, 'std': 0, 'min': 0, 'max': 1, 'nobs': 7, 'p1': 'c', 'p2': 2},
>>> #
>>> {'mean': 5, 'std': 2, 'min': 0, 'max': 2, 'nobs': 7, 'p1': 'd', 'p2': 2},
>>> #
>>> {'mean': 5, 'std': 2, 'min': 0, 'max': 2, 'nobs': 7, 'p1': 'e', 'p2': 1},
>>> ])
>>> print(data)
>>> new_data = aggregate_stats(data)
>>> print(new_data)
>>> new_data1 = aggregate_stats(data, group_keys=['p1'])
>>> print(new_data1)
>>> new_data2 = aggregate_stats(data, group_keys=['p2'])
>>> print(new_data2)
"""
import pandas as pd
# Stats groupings
raw_stats_cols = ["nobs", "std", "mean", "max", "min"]
stats_cols = [c + suffix for c in raw_stats_cols]
mapper = dict(zip(stats_cols, raw_stats_cols))
unmapper = dict(zip(raw_stats_cols, stats_cols))
non_stats_cols = list(ub.oset(data.columns) - stats_cols)
if group_keys is None:
group_keys = non_stats_cols
non_group_keys = list(ub.oset(non_stats_cols) - group_keys)
new_rows = []
for group_vals, group in list(data.groupby(group_keys)):
# hack, is this a pandas bug in 1.4.1? Is it fixed? (Not in 1.4.2)
if isinstance(group_keys, list) and len(group_keys) == 1:
# For some reason, when we specify group keys as a list of one
# element, we get a squeezed value out
group_vals = (group_vals,)
stat_data = group[stats_cols].rename(mapper, axis=1)
new_stats = combine_stats_arrs(stat_data)
new_time_stats = ub.map_keys(unmapper, new_stats)
new_row = ub.dzip(group_keys, group_vals)
if non_group_keys:
for k in non_group_keys:
unique_vals = group[k].unique()
if len(unique_vals) == 1:
new_row[k] = unique_vals[0]
else:
new_row[k] = None
new_row.update(new_time_stats)
new_rows.append(new_row)
new_data = pd.DataFrame(new_rows)
return new_data
def stats_dict(data, suffix=""):
stats = {
"nobs" + suffix: len(data),
"mean" + suffix: data.mean(),
"std" + suffix: data.std(),
"min" + suffix: data.min(),
"max" + suffix: data.max(),
}
return stats
def combine_stats(s1, s2):
"""
Helper for combining mean and standard deviation of multiple measurements
Args:
s1 (dict): stats dict containing mean, std, and n
s2 (dict): stats dict containing mean, std, and n
Example:
>>> basis = {
>>> 'nobs1': [1, 10, 100, 10000],
>>> 'nobs2': [1, 10, 100, 10000],
>>> }
>>> for params in ub.named_product(basis):
>>> data1 = np.random.rand(params['nobs1'])
>>> data2 = np.random.rand(params['nobs2'])
>>> data3 = np.hstack([data1, data2])
>>> s1 = stats_dict(data1)
>>> s2 = stats_dict(data2)
>>> s3 = stats_dict(data3)
>>> # Check that our combo works
>>> combo_s3 = combine_stats(s1, s2)
>>> compare = pd.DataFrame({'raw': s3, 'combo': combo_s3})
>>> print(compare)
>>> assert np.allclose(compare.raw, compare.combo)
References:
.. [SO7753002] https://stackoverflow.com/questions/7753002/adding-combining-standard-deviations
.. [SO2971315] https://math.stackexchange.com/questions/2971315/how-do-i-combine-standard-deviations-of-two-groups
"""
stats = [s1, s2]
data = {
"nobs": np.array([s["nobs"] for s in stats]),
"mean": np.array([s["mean"] for s in stats]),
"std": np.array([s["std"] for s in stats]),
"min": np.array([s["min"] for s in stats]),
"max": np.array([s["max"] for s in stats]),
}
return combine_stats_arrs(data)
def combine_stats_arrs(data):
sizes = data["nobs"]
means = data["mean"]
stds = data["std"]
mins = data["min"]
maxs = data["max"]
varis = stds * stds
# TODO: ddof
# https://github.com/Erotemic/misc/blob/28cf797b9b0f8bd82e3ebee2f6d0a688ecee2838/learn/stats.py#L128
combo_size = sizes.sum()
combo_mean = (sizes * means).sum() / combo_size
mean_deltas = means - combo_mean
sv = (sizes * varis).sum()
sm = (sizes * (mean_deltas * mean_deltas)).sum()
combo_vars = (sv + sm) / combo_size
combo_std = np.sqrt(combo_vars)
combo_stats = {
"nobs": combo_size,
"mean": combo_mean,
"std": combo_std,
"min": mins.min(),
"max": maxs.max(),
}
return combo_stats

View File

@ -0,0 +1,119 @@
import pandas as pd
import ubelt as ub
def benchmark_analysis(
rows,
xlabel,
group_labels,
basis,
):
# xlabel = "size"
# Set these to empty lists if they are not used
# group_labels = {
# "col": ["input"],
# "hue": ["impl"],
# "size": [],
# }
# group_keys = {}
# for gname, labels in group_labels.items():
# group_keys[gname + "_key"] = ub.repr2(
# ub.dict_isect(params, labels), compact=1, si=1
# )
# key = ub.repr2(params, compact=1, si=1)
from process_tracker.result_analysis import SkillTracker
RECORD_ALL = 0
USE_OPENSKILL = True
RECORD_ALL = 0
metric_key = "time" if RECORD_ALL else "min"
# The rows define a long-form pandas data array.
# Data in long-form makes it very easy to use seaborn.
data = pd.DataFrame(rows)
data = data.sort_values(metric_key)
if RECORD_ALL:
# Show the min / mean if we record all
min_times = data.groupby("key").min().rename({"time": "min"}, axis=1)
mean_times = (
data.groupby("key")[["time"]].mean().rename({"time": "mean"}, axis=1)
)
stats_data = pd.concat([min_times, mean_times], axis=1)
stats_data = stats_data.sort_values("min")
else:
stats_data = data
if USE_OPENSKILL:
# Track the "skill" of each method
# The idea is that each setting of parameters is a game, and each
# "impl" is a player. We rank the players by which is fastest, and
# update their ranking according to the Weng-Lin Bayes ranking model.
# This does not take the fact that some "games" (i.e. parameter
# settings) are more important than others, but it should be fairly
# robust on average.
skillboard = SkillTracker(basis["impl"])
other_keys = sorted(
set(stats_data.columns)
- {"key", "impl", "min", "mean", "hue_key", "size_key", "style_key"}
)
for params, variants in stats_data.groupby(other_keys):
variants = variants.sort_values("mean")
ranking = variants["impl"].reset_index(drop=True)
mean_speedup = variants["mean"].max() / variants["mean"]
stats_data.loc[mean_speedup.index, "mean_speedup"] = mean_speedup
min_speedup = variants["min"].max() / variants["min"]
stats_data.loc[min_speedup.index, "min_speedup"] = min_speedup
if USE_OPENSKILL:
skillboard.observe(ranking)
print("Statistics:")
print(stats_data)
if USE_OPENSKILL:
win_probs = skillboard.predict_win()
win_probs = ub.sorted_vals(win_probs, reverse=True)
print(
"Aggregated Rankings = {}".format(
ub.repr2(win_probs, nl=1, precision=4, align=":")
)
)
plot = True
if plot:
# import seaborn as sns
# kwplot autosns works well for IPython and script execution.
# not sure about notebooks.
import seaborn as sns
sns.set()
from matplotlib import pyplot as plt
plotkw = {}
for gname, labels in group_labels.items():
if labels:
plotkw[gname] = gname + "_key"
# Your variables may change
# ax = plt.figure().gca()
col = plotkw.pop("col")
facet = sns.FacetGrid(data, col=col, sharex=False, sharey=False)
facet.map_dataframe(sns.lineplot, x=xlabel, y=metric_key, marker="o", **plotkw)
facet.add_legend()
# sns.lineplot(data=data, )
# ax.set_title('JSON Benchmarks')
# ax.set_xlabel('Size')
# ax.set_ylabel('Time')
# ax.set_xscale('log')
# ax.set_yscale('log')
try:
__IPYTHON__
except NameError:
plt.show()

82
json_benchmarks/core.py Normal file
View File

@ -0,0 +1,82 @@
"""
Main definition of the benchmarks
"""
import scriptconfig as scfg
import ubelt as ub
from json_benchmarks import analysis, measures
class CoreConfig(scfg.Config):
"""
Benchmark JSON implementations
"""
default = {
"mode": scfg.Value(
"all",
position=1,
choices=["all", "single", "run", "analyze"],
help=ub.paragraph(
"""
By default all benchmarks are run, saved, and aggregated
with any other existing benchmarks for analysis and
visualization.
In "single" mode, other existing benchmarks are ignord.
In "run" mode, the benchmarks are run, but no analysis is done.
In "analyze" mode, no benchmarks are run, but any existing
benchmarks are loaded for analysis and visualization.
"""
),
),
"cache_dir": scfg.Value(
None,
help=ub.paragraph(
"""
Location for benchmark cache.
Defaults to $XDG_CACHE/ujson/benchmark_results/
"""
),
),
}
def normalize(self):
dpath = self["cache_dir"]
if dpath is None:
dpath = ub.Path.appdir("ujson/benchmark_results")
dpath = ub.Path(dpath)
self["cache_dir"] = dpath
def main(cmdline=True, **kwargs):
"""
Example:
>>> import sys, ubelt
>>> sys.path.append(ubelt.expandpath('~/code/ultrajson'))
>>> from json_benchmarks.core import * # NOQA
>>> import kwplot
>>> kwplot.autosns()
>>> cmdline = False
>>> kwargs = {}
>>> main(cmdline, **kwargs)
"""
config = CoreConfig(cmdline=cmdline, data=kwargs)
dpath = config["cache_dir"]
print(f"dpath={dpath}")
run = config["mode"] in {"all", "single", "run"}
if run:
result_fpath = measures.benchmark_json()
print(f"result_fpath = {result_fpath!r}")
result_fpaths = [result_fpath]
agg = config["mode"] not in {"single"}
if agg:
result_fpaths = list(dpath.glob("benchmarks*.json"))
analyze = config["mode"] in {"all", "single", "analyze"}
if analyze:
analysis.analyze_results(result_fpaths)

120
json_benchmarks/datagen.py Normal file
View File

@ -0,0 +1,120 @@
import random
import sys
import ubelt as ub
def json_test_data_generators():
"""
Generates data for benchmarks with various sizes
Returns:
Dict[str, callable]:
a mapping from test data name to its generator
Example:
>>> data_lut = json_test_data_generators()
>>> size = 2
>>> keys = sorted(set(data_lut) - {'Complex object'})
>>> for key in keys:
>>> func = data_lut[key]
>>> test_object = func(size)
>>> print('key = {!r}'.format(key))
>>> print('test_object = {!r}'.format(test_object))
"""
data_lut = {}
def _register_data(name):
def _wrap(func):
data_lut[name] = func
return _wrap
# seed if desired
# rng = random.Random(0)
rng = random
@_register_data("Array with doubles")
def array_with_doubles(size):
test_object = [sys.maxsize * rng.random() for _ in range(size)]
return test_object
@_register_data("Array with UTF-8 strings")
def array_with_utf8_strings(size):
utf8_string = (
"نظام الحكم سلطاني وراثي "
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
)
test_object = [utf8_string for _ in range(size)]
return test_object
@_register_data("Medium complex object")
def medium_complex_object(size):
user = {
"userId": 3381293,
"age": 213,
"username": "johndoe",
"fullname": "John Doe the Second",
"isAuthorized": True,
"liked": 31231.31231202,
"approval": 31.1471,
"jobs": [1, 2],
"currJob": None,
}
friends = [user, user, user, user, user, user, user, user]
test_object = [[user, friends] for _ in range(size)]
return test_object
@_register_data("Array with True values")
def true_values(size):
test_object = [True for _ in range(size)]
return test_object
@_register_data("Array of Dict[str, int]")
def array_of_dict_string_int(size):
test_object = [
{str(rng.random() * 20): int(rng.random() * 1000000)} for _ in range(size)
]
return test_object
@_register_data("Dict of List[Dict[str, int]]")
def dict_of_list_dict_str_int(size):
keys = set()
while len(keys) < size:
key = str(rng.random() * 20)
keys.add(key)
test_object = {
key: [
{str(rng.random() * 20): int(rng.random() * 1000000)}
for _ in range(256)
]
for key in keys
}
return test_object
@_register_data("Complex object")
def complex_object(size):
import json
# TODO: might be better to reigster this file with setup.py or
# download it via some mechanism
try:
dpath = ub.Path(__file__).parent
fpath = dpath / "sample.json"
if not fpath.exists():
raise Exception
except Exception:
import ujson
dpath = ub.Path(ujson.__file__).parent / "tests"
fpath = dpath / "sample.json"
if not fpath.exists():
raise Exception
with open(fpath) as f:
test_object = json.load(f)
if size is not None:
test_object = [test_object] * size
return test_object
return data_lut

View File

@ -0,0 +1,99 @@
"""
Define the json libraries we are considering
"""
KNOWN_LIBRARIES = [
{"modname": "ujson", "distname": "ujson"},
# {"modname": "nujson", "distname": "nujson"},
# {"modname": "orjson", "distname": "orjson"},
# {"modname": "simplejson", "distname": "simplejson"},
{"modname": "json", "distname": "<stdlib>"},
# {"modname": "simdjson", "distname": "pysimdjson"},
# {"modname": "cysimdjson", "distname": "cysimdjson"},
# {"modname": "libpy_simdjson", "distname": "libpy-simdjson"},
]
KNOWN_MODNAMES = [info["modname"] for info in KNOWN_LIBRARIES]
# TODO:
# def distname_to_modnames(distname):
# # TODO: nice way to switch between a module's import name and it's distribution name
# # References:
# # https://stackoverflow.com/questions/49764802/get-module-name-programmatically-with-only-pypi-package-name/49764960#49764960
# import distlib.database
# distlib.database.DistributionPath().get_distribution(distname)
# # import importlib.metadata
# # importlib.metadata.metadata(distname)
# # importlib.util.find_spec(modname)
# # import simdjson
# # import pkg_resources
# # pkg_resources.get_distribution('pysimdjson')
class Compatability:
"""
Expose a common API for all tested implmentations
"""
@staticmethod
def lut_dumps(module):
if module.__name__ == "cysimdjson":
return None
elif module.__name__ == "simdjson":
return None
else:
return getattr(module, "dumps", None)
@staticmethod
def lut_loads(module):
if module.__name__ == "cysimdjson":
parser = module.JSONParser()
return parser.loads
else:
return getattr(module, "loads", None)
def available_json_impls():
"""
Return a dictionary of information about each json implementation
Example:
>>> from json_benchmarks.libraries import * # NOQA
>>> json_impls = available_json_impls()
>>> print('json_impls = {}'.format(ub.repr2(json_impls, nl=1)))
"""
import importlib
import pkg_resources
known_libinfo = KNOWN_LIBRARIES
json_impls = {}
for libinfo in known_libinfo:
modname = libinfo["modname"]
distname = libinfo["distname"]
try:
module = importlib.import_module(modname)
except ImportError:
pass
else:
mod_version = getattr(module, "__version__", None)
if distname == "<stdlib>":
pkg_version = mod_version
else:
pkg_version = pkg_resources.get_distribution(distname).version
if mod_version is not None:
assert mod_version == pkg_version
version = pkg_version
dumps = Compatability.lut_dumps(module)
loads = Compatability.lut_loads(module)
impl_info = {
"module": module,
"modname": modname,
"distname": distname,
"version": version,
"dumps": dumps,
"loads": loads,
}
json_impls[modname] = impl_info
return json_impls

132
json_benchmarks/measures.py Normal file
View File

@ -0,0 +1,132 @@
"""
The definitions of the measurements we want to take
"""
import json
import scriptconfig as scfg
import ubelt as ub
from json_benchmarks import libraries
class MeasurementConfig(scfg.Config):
default = {
"disable": scfg.Value(
[],
choices=libraries.KNOWN_MODNAMES,
help=ub.paragraph(
"""
Remove specified libraries from the benchmarks
"""
),
),
"factor": scfg.Value(
1.0,
help=ub.paragraph(
"""
Specify as a fraction to speed up benchmarks for development /
testing
"""
),
),
"cache_dir": scfg.Value(
None,
help=ub.paragraph(
"""
Location for benchmark cache.
Defaults to $XDG_CACHE/ujson/benchmark_results/
"""
),
),
}
def normalize(self):
dpath = self["cache_dir"]
if dpath is None:
dpath = ub.Path.appdir("ujson/benchmark_results")
dpath = ub.Path(dpath)
self["cache_dir"] = dpath
def benchmark_json():
from json_benchmarks import benchmarker, datagen, libraries
json_impls = libraries.available_json_impls()
data_lut = datagen.json_test_data_generators()
# These are the parameters that we benchmark over
common_basis = {
"impl": list(json_impls.keys()),
"func": ["dumps", "loads"],
}
sized_basis = {
"input": [
"Array with doubles",
"Array with UTF-8 strings",
# 'Medium complex object',
"Array with True values",
"Array of Dict[str, int]",
# 'Dict of List[Dict[str, int]]',
# 'Complex object'
],
"size": [1, 2, 4, 8, 16, 32, 128, 256, 512, 1024, 2048, 4096, 8192],
}
predefined_basis = {
"input": ["Complex object"],
"size": [None],
}
basis = [
ub.dict_union(common_basis, predefined_basis),
ub.dict_union(common_basis, sized_basis),
]
# The Benchmarker class is a new experimental API around timerit to
# abstract away the details of timing a process over a grid of parameters,
# serializing the results, and aggregating results from disparate runs.
benchmark = benchmarker.Benchmarker(
name="bench_json",
num=100,
bestof=10,
verbose=3,
basis=basis,
)
def is_blocked(params):
if params["input"] == "Complex object":
# Some libraries can't handle the complex object
if params["impl"] in {"orjson", "libpy_simdjson"}:
return True
# For each variation of your experiment, create a row.
for params in benchmark.iter_params():
if is_blocked(params):
continue
# Make any modifications you need to compute input kwargs for each
# method here.
impl_info = json_impls[params["impl"]]
params["impl_version"] = impl_info["version"]
method = impl_info[params["func"]]
if method is None:
# Not all libraries implement all methods
continue
py_data = data_lut[params["input"]](params["size"])
if params["func"] == "dumps":
data = py_data
elif params["func"] == "loads":
data = json.dumps(py_data)
# Timerit will run some user-specified number of loops.
# and compute time stats with similar methodology to timeit
try:
for timer in benchmark.measure():
# Put any setup logic you dont want to time here.
# ...
with timer:
# Put the logic you want to time here
method(data)
except Exception as ex:
print(f"Failed to time: ex={ex}. Skipping")
dpath = ub.Path.appdir("ujson/benchmark_results").ensuredir()
result_fpath = benchmark.dump_in_dpath(dpath)
return result_fpath