mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-06-04 20:56:15 +02:00
Merge branch 'benchmark_stats_v2' of github.com:Erotemic/ultrajson into benchmark_stats_v2
This commit is contained in:
commit
a2fbbf10af
|
@ -9,35 +9,65 @@ mkinit ~/code/ultrajson/json_benchmarks/benchmarker/__init__.py -w
|
|||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
from json_benchmarks.benchmarker import aggregate
|
||||
from json_benchmarks.benchmarker import benchmarker
|
||||
from json_benchmarks.benchmarker import process_context
|
||||
from json_benchmarks.benchmarker import result_analysis
|
||||
from json_benchmarks.benchmarker import util_json
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
from json_benchmarks.benchmarker import visualize
|
||||
|
||||
from json_benchmarks.benchmarker.aggregate import (demo, demo_data,)
|
||||
from json_benchmarks.benchmarker.benchmarker import (Benchmarker,
|
||||
BenchmarkerConfig,
|
||||
BenchmarkerResult,)
|
||||
from json_benchmarks.benchmarker.process_context import (ProcessContext,)
|
||||
from json_benchmarks.benchmarker import (
|
||||
aggregate,
|
||||
benchmarker,
|
||||
process_context,
|
||||
result_analysis,
|
||||
util_json,
|
||||
util_stats,
|
||||
visualize,
|
||||
)
|
||||
from json_benchmarks.benchmarker.aggregate import demo, demo_data
|
||||
from json_benchmarks.benchmarker.benchmarker import (
|
||||
Benchmarker,
|
||||
BenchmarkerConfig,
|
||||
BenchmarkerResult,
|
||||
)
|
||||
from json_benchmarks.benchmarker.process_context import ProcessContext
|
||||
from json_benchmarks.benchmarker.result_analysis import (
|
||||
DEFAULT_METRIC_TO_OBJECTIVE, Result, ResultAnalysis, SkillTracker,)
|
||||
from json_benchmarks.benchmarker.util_json import (ensure_json_serializable,
|
||||
find_json_unserializable,
|
||||
indexable_allclose,)
|
||||
from json_benchmarks.benchmarker.util_stats import (aggregate_stats,
|
||||
combine_stats,
|
||||
combine_stats_arrs,
|
||||
stats_dict,)
|
||||
from json_benchmarks.benchmarker.visualize import (benchmark_analysis,)
|
||||
DEFAULT_METRIC_TO_OBJECTIVE,
|
||||
Result,
|
||||
ResultAnalysis,
|
||||
SkillTracker,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_json import (
|
||||
ensure_json_serializable,
|
||||
find_json_unserializable,
|
||||
indexable_allclose,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_stats import (
|
||||
aggregate_stats,
|
||||
combine_stats,
|
||||
combine_stats_arrs,
|
||||
stats_dict,
|
||||
)
|
||||
from json_benchmarks.benchmarker.visualize import benchmark_analysis
|
||||
|
||||
__all__ = ['Benchmarker', 'BenchmarkerConfig', 'BenchmarkerResult',
|
||||
'DEFAULT_METRIC_TO_OBJECTIVE', 'ProcessContext', 'Result',
|
||||
'ResultAnalysis', 'SkillTracker', 'aggregate', 'aggregate_stats',
|
||||
'benchmark_analysis', 'benchmarker', 'combine_stats',
|
||||
'combine_stats_arrs', 'demo', 'demo_data',
|
||||
'ensure_json_serializable', 'find_json_unserializable',
|
||||
'indexable_allclose', 'process_context', 'result_analysis',
|
||||
'stats_dict', 'util_json', 'util_stats', 'visualize']
|
||||
__all__ = [
|
||||
"Benchmarker",
|
||||
"BenchmarkerConfig",
|
||||
"BenchmarkerResult",
|
||||
"DEFAULT_METRIC_TO_OBJECTIVE",
|
||||
"ProcessContext",
|
||||
"Result",
|
||||
"ResultAnalysis",
|
||||
"SkillTracker",
|
||||
"aggregate",
|
||||
"aggregate_stats",
|
||||
"benchmark_analysis",
|
||||
"benchmarker",
|
||||
"combine_stats",
|
||||
"combine_stats_arrs",
|
||||
"demo",
|
||||
"demo_data",
|
||||
"ensure_json_serializable",
|
||||
"find_json_unserializable",
|
||||
"indexable_allclose",
|
||||
"process_context",
|
||||
"result_analysis",
|
||||
"stats_dict",
|
||||
"util_json",
|
||||
"util_stats",
|
||||
"visualize",
|
||||
]
|
||||
|
|
|
@ -159,6 +159,7 @@ class Benchmarker:
|
|||
rows.append(row)
|
||||
else:
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
|
||||
times = np.array(ti.robust_times())
|
||||
metrics = util_stats.stats_dict(times, "_time")
|
||||
row = {
|
||||
|
|
|
@ -823,6 +823,7 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
"""
|
||||
print('Init seaborn and pyplot')
|
||||
import seaborn as sns
|
||||
|
||||
sns.set()
|
||||
from matplotlib import pyplot as plt # NOQA
|
||||
|
||||
|
@ -918,9 +919,9 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
|
||||
print('Adjust plots')
|
||||
for plot in plots:
|
||||
xscale = kwargs.get('xscale', None)
|
||||
yscale = kwargs.get('yscale', None)
|
||||
for ax in plot['facet'].axes.ravel():
|
||||
xscale = kwargs.get("xscale", None)
|
||||
yscale = kwargs.get("yscale", None)
|
||||
for ax in plot["facet"].axes.ravel():
|
||||
if xscale is not None:
|
||||
try:
|
||||
ax.set_xscale(xscale)
|
||||
|
|
|
@ -1,68 +1,68 @@
|
|||
import ubelt as ub
|
||||
import numpy as np
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
def __tabulate_issue():
|
||||
# MWE for tabulate issue
|
||||
# The decimals are not aligned when using "," in the floatfmt
|
||||
import tabulate
|
||||
|
||||
data = [
|
||||
[13213.2, 3213254.23, 432432.231,],
|
||||
[432432., 432.3, 3.2]
|
||||
[
|
||||
13213.2,
|
||||
3213254.23,
|
||||
432432.231,
|
||||
],
|
||||
[432432.0, 432.3, 3.2],
|
||||
]
|
||||
print(tabulate.tabulate(data, headers=['a', 'b'], floatfmt=',.02f'))
|
||||
print(tabulate.tabulate(data, headers=['a', 'b'], floatfmt='.02f'))
|
||||
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=",.02f"))
|
||||
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=".02f"))
|
||||
|
||||
|
||||
def __groupby_issue():
|
||||
# MWE of an issue with pandas groupby
|
||||
import pandas as pd
|
||||
data = pd.DataFrame([
|
||||
{'p1': 'a', 'p2': 1, 'p3': 0},
|
||||
{'p1': 'a', 'p2': 1, 'p3': 0},
|
||||
{'p1': 'a', 'p2': 2, 'p3': 0},
|
||||
{'p1': 'b', 'p2': 2, 'p3': 0},
|
||||
{'p1': 'b', 'p2': 1, 'p3': 0},
|
||||
{'p1': 'b', 'p2': 1, 'p3': 0},
|
||||
{'p1': 'b', 'p2': 1, 'p3': 0},
|
||||
])
|
||||
|
||||
by = 'p1'
|
||||
data = pd.DataFrame(
|
||||
[
|
||||
{"p1": "a", "p2": 1, "p3": 0},
|
||||
{"p1": "a", "p2": 1, "p3": 0},
|
||||
{"p1": "a", "p2": 2, "p3": 0},
|
||||
{"p1": "b", "p2": 2, "p3": 0},
|
||||
{"p1": "b", "p2": 1, "p3": 0},
|
||||
{"p1": "b", "p2": 1, "p3": 0},
|
||||
{"p1": "b", "p2": 1, "p3": 0},
|
||||
]
|
||||
)
|
||||
|
||||
by = "p1"
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {
|
||||
'by': by,
|
||||
'key': key,
|
||||
'type(key)': type(key)
|
||||
}
|
||||
print('result = {}'.format(ub.repr2(result, nl=1)))
|
||||
result = {"by": by, "key": key, "type(key)": type(key)}
|
||||
print(f"result = {ub.repr2(result, nl=1)}")
|
||||
assert not ub.iterable(
|
||||
key
|
||||
), "`by` is specified as a scalar, so getting `key` as a scalar makes sense"
|
||||
|
||||
by = ["p1"]
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {"by": by, "key": key, "type(key)": type(key)}
|
||||
print(f"result = {ub.repr2(result, nl=1)}")
|
||||
assert not ub.iterable(key), (
|
||||
'`by` is specified as a scalar, so getting `key` as a scalar makes sense')
|
||||
"`by` is specified as a list of scalars (with one element), but we "
|
||||
"still get `key` as a scalar. This does not make sense"
|
||||
)
|
||||
|
||||
by = ['p1']
|
||||
by = ["p1", "p2"]
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {
|
||||
'by': by,
|
||||
'key': key,
|
||||
'type(key)': type(key)
|
||||
}
|
||||
print('result = {}'.format(ub.repr2(result, nl=1)))
|
||||
assert not ub.iterable(key), (
|
||||
'`by` is specified as a list of scalars (with one element), but we '
|
||||
'still get `key` as a scalar. This does not make sense')
|
||||
|
||||
by = ['p1', 'p2']
|
||||
key = list(data.groupby(by))[0][0]
|
||||
result = {
|
||||
'by': by,
|
||||
'key': key,
|
||||
'type(key)': type(key)
|
||||
}
|
||||
print('result = {}'.format(ub.repr2(result, nl=1)))
|
||||
result = {"by": by, "key": key, "type(key)": type(key)}
|
||||
print(f"result = {ub.repr2(result, nl=1)}")
|
||||
assert ub.iterable(key), (
|
||||
'`by` is specified as a list of scalars (with multiple elements), '
|
||||
'and we still get `key` as a tuple of values. This makes sense')
|
||||
"`by` is specified as a list of scalars (with multiple elements), "
|
||||
"and we still get `key` as a tuple of values. This makes sense"
|
||||
)
|
||||
|
||||
|
||||
def aggregate_stats(data, suffix='', group_keys=None):
|
||||
def aggregate_stats(data, suffix="", group_keys=None):
|
||||
"""
|
||||
Given columns interpreted as containing stats, aggregate those stats
|
||||
within each group. For each row, any non-group, non-stat column
|
||||
|
|
|
@ -40,9 +40,9 @@ class JSONBenchmarkConfig(scfg.Config):
|
|||
|
||||
In "analyze" mode, no benchmarks are run, but any existing
|
||||
benchmarks are loaded for analysis and visualization.
|
||||
""")
|
||||
"""
|
||||
),
|
||||
),
|
||||
|
||||
"disable": scfg.Value(
|
||||
[],
|
||||
choices=KNOWN_LIBRARIES,
|
||||
|
@ -82,6 +82,7 @@ class JSONBenchmarkConfig(scfg.Config):
|
|||
|
||||
def available_json_impls():
|
||||
import importlib
|
||||
|
||||
known_modnames = KNOWN_LIBRARIES
|
||||
json_impls = {}
|
||||
for libname in known_modnames:
|
||||
|
@ -206,7 +207,9 @@ def analyze_results(result_fpaths):
|
|||
|
||||
single_size = table[(table["size"] == 256) | table["size"].isnull()]
|
||||
# single_size_combo = aggregate_stats(single_size, None)
|
||||
single_size_combo = util_stats.aggregate_stats(single_size, suffix='_time', group_keys=["name"])
|
||||
single_size_combo = util_stats.aggregate_stats(
|
||||
single_size, suffix="_time", group_keys=["name"]
|
||||
)
|
||||
|
||||
param_group = ["impl", "impl_version"]
|
||||
single_size_combo["calls/sec"] = 1 / single_size_combo["mean_time"]
|
||||
|
@ -216,16 +219,16 @@ def analyze_results(result_fpaths):
|
|||
# )
|
||||
time_piv = single_size_combo.pivot(["input", "func"], param_group, "mean_time")
|
||||
|
||||
hz_piv = (1 / time_piv)
|
||||
hz_piv = 1 / time_piv
|
||||
# hzstr_piv = (1 / time_piv).applymap(lambda x: f"{x:,.02f}")
|
||||
print("Table for size=256")
|
||||
# print(hzstr_piv.to_markdown())
|
||||
print(hz_piv.to_markdown(floatfmt=',.02f'))
|
||||
print(hz_piv.to_markdown(floatfmt=",.02f"))
|
||||
print("")
|
||||
print("Above metrics are in call/sec, larger is better.")
|
||||
|
||||
speedup_piv = hz_piv / hz_piv['json'].values
|
||||
print(speedup_piv.to_markdown(floatfmt=',.02g'))
|
||||
speedup_piv = hz_piv / hz_piv["json"].values
|
||||
print(speedup_piv.to_markdown(floatfmt=",.02g"))
|
||||
|
||||
analysis.abalate(param_group)
|
||||
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
|
||||
|
@ -241,12 +244,16 @@ def analyze_results(result_fpaths):
|
|||
"size": [],
|
||||
}
|
||||
import kwplot
|
||||
|
||||
kwplot.autosns()
|
||||
self = analysis
|
||||
|
||||
plots = analysis.plot(
|
||||
xlabel, metric_key, group_labels,
|
||||
xscale='log', yscale='log',
|
||||
xlabel,
|
||||
metric_key,
|
||||
group_labels,
|
||||
xscale="log",
|
||||
yscale="log",
|
||||
)
|
||||
plots
|
||||
kwplot.show_if_requested()
|
||||
|
@ -267,16 +274,16 @@ def main(cmdline=True, **kwargs):
|
|||
config = JSONBenchmarkConfig(cmdline=cmdline, data=kwargs)
|
||||
dpath = config["cache_dir"]
|
||||
|
||||
run = config['mode'] in {'all', 'single', 'run'}
|
||||
run = config["mode"] in {"all", "single", "run"}
|
||||
if run:
|
||||
result_fpath = benchmark_json()
|
||||
print(f"result_fpath = {result_fpath!r}")
|
||||
result_fpaths = [result_fpath]
|
||||
|
||||
agg = config['mode'] not in {'single'}
|
||||
agg = config["mode"] not in {"single"}
|
||||
if agg:
|
||||
result_fpaths = list(dpath.glob("benchmarks*.json"))
|
||||
|
||||
analyze = config['mode'] in {'all', 'single', 'analyze'}
|
||||
analyze = config["mode"] in {"all", "single", "analyze"}
|
||||
if analyze:
|
||||
analyze_results(result_fpaths)
|
||||
|
|
Loading…
Reference in New Issue