1
0
Fork 0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-06-04 20:56:15 +02:00

Merge branch 'benchmark_stats_v2' of github.com:Erotemic/ultrajson into benchmark_stats_v2

This commit is contained in:
joncrall 2022-05-28 22:12:04 -04:00
commit a2fbbf10af
No known key found for this signature in database
GPG Key ID: BE04D092BDD81C0D
5 changed files with 128 additions and 89 deletions

View File

@ -9,35 +9,65 @@ mkinit ~/code/ultrajson/json_benchmarks/benchmarker/__init__.py -w
__version__ = "0.1.0"
from json_benchmarks.benchmarker import aggregate
from json_benchmarks.benchmarker import benchmarker
from json_benchmarks.benchmarker import process_context
from json_benchmarks.benchmarker import result_analysis
from json_benchmarks.benchmarker import util_json
from json_benchmarks.benchmarker import util_stats
from json_benchmarks.benchmarker import visualize
from json_benchmarks.benchmarker.aggregate import (demo, demo_data,)
from json_benchmarks.benchmarker.benchmarker import (Benchmarker,
BenchmarkerConfig,
BenchmarkerResult,)
from json_benchmarks.benchmarker.process_context import (ProcessContext,)
from json_benchmarks.benchmarker import (
aggregate,
benchmarker,
process_context,
result_analysis,
util_json,
util_stats,
visualize,
)
from json_benchmarks.benchmarker.aggregate import demo, demo_data
from json_benchmarks.benchmarker.benchmarker import (
Benchmarker,
BenchmarkerConfig,
BenchmarkerResult,
)
from json_benchmarks.benchmarker.process_context import ProcessContext
from json_benchmarks.benchmarker.result_analysis import (
DEFAULT_METRIC_TO_OBJECTIVE, Result, ResultAnalysis, SkillTracker,)
from json_benchmarks.benchmarker.util_json import (ensure_json_serializable,
find_json_unserializable,
indexable_allclose,)
from json_benchmarks.benchmarker.util_stats import (aggregate_stats,
combine_stats,
combine_stats_arrs,
stats_dict,)
from json_benchmarks.benchmarker.visualize import (benchmark_analysis,)
DEFAULT_METRIC_TO_OBJECTIVE,
Result,
ResultAnalysis,
SkillTracker,
)
from json_benchmarks.benchmarker.util_json import (
ensure_json_serializable,
find_json_unserializable,
indexable_allclose,
)
from json_benchmarks.benchmarker.util_stats import (
aggregate_stats,
combine_stats,
combine_stats_arrs,
stats_dict,
)
from json_benchmarks.benchmarker.visualize import benchmark_analysis
__all__ = ['Benchmarker', 'BenchmarkerConfig', 'BenchmarkerResult',
'DEFAULT_METRIC_TO_OBJECTIVE', 'ProcessContext', 'Result',
'ResultAnalysis', 'SkillTracker', 'aggregate', 'aggregate_stats',
'benchmark_analysis', 'benchmarker', 'combine_stats',
'combine_stats_arrs', 'demo', 'demo_data',
'ensure_json_serializable', 'find_json_unserializable',
'indexable_allclose', 'process_context', 'result_analysis',
'stats_dict', 'util_json', 'util_stats', 'visualize']
__all__ = [
"Benchmarker",
"BenchmarkerConfig",
"BenchmarkerResult",
"DEFAULT_METRIC_TO_OBJECTIVE",
"ProcessContext",
"Result",
"ResultAnalysis",
"SkillTracker",
"aggregate",
"aggregate_stats",
"benchmark_analysis",
"benchmarker",
"combine_stats",
"combine_stats_arrs",
"demo",
"demo_data",
"ensure_json_serializable",
"find_json_unserializable",
"indexable_allclose",
"process_context",
"result_analysis",
"stats_dict",
"util_json",
"util_stats",
"visualize",
]

View File

@ -159,6 +159,7 @@ class Benchmarker:
rows.append(row)
else:
from json_benchmarks.benchmarker import util_stats
times = np.array(ti.robust_times())
metrics = util_stats.stats_dict(times, "_time")
row = {

View File

@ -823,6 +823,7 @@ class ResultAnalysis(ub.NiceRepr):
"""
print('Init seaborn and pyplot')
import seaborn as sns
sns.set()
from matplotlib import pyplot as plt # NOQA
@ -918,9 +919,9 @@ class ResultAnalysis(ub.NiceRepr):
print('Adjust plots')
for plot in plots:
xscale = kwargs.get('xscale', None)
yscale = kwargs.get('yscale', None)
for ax in plot['facet'].axes.ravel():
xscale = kwargs.get("xscale", None)
yscale = kwargs.get("yscale", None)
for ax in plot["facet"].axes.ravel():
if xscale is not None:
try:
ax.set_xscale(xscale)

View File

@ -1,68 +1,68 @@
import ubelt as ub
import numpy as np
import ubelt as ub
def __tabulate_issue():
# MWE for tabulate issue
# The decimals are not aligned when using "," in the floatfmt
import tabulate
data = [
[13213.2, 3213254.23, 432432.231,],
[432432., 432.3, 3.2]
[
13213.2,
3213254.23,
432432.231,
],
[432432.0, 432.3, 3.2],
]
print(tabulate.tabulate(data, headers=['a', 'b'], floatfmt=',.02f'))
print(tabulate.tabulate(data, headers=['a', 'b'], floatfmt='.02f'))
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=",.02f"))
print(tabulate.tabulate(data, headers=["a", "b"], floatfmt=".02f"))
def __groupby_issue():
# MWE of an issue with pandas groupby
import pandas as pd
data = pd.DataFrame([
{'p1': 'a', 'p2': 1, 'p3': 0},
{'p1': 'a', 'p2': 1, 'p3': 0},
{'p1': 'a', 'p2': 2, 'p3': 0},
{'p1': 'b', 'p2': 2, 'p3': 0},
{'p1': 'b', 'p2': 1, 'p3': 0},
{'p1': 'b', 'p2': 1, 'p3': 0},
{'p1': 'b', 'p2': 1, 'p3': 0},
])
by = 'p1'
data = pd.DataFrame(
[
{"p1": "a", "p2": 1, "p3": 0},
{"p1": "a", "p2": 1, "p3": 0},
{"p1": "a", "p2": 2, "p3": 0},
{"p1": "b", "p2": 2, "p3": 0},
{"p1": "b", "p2": 1, "p3": 0},
{"p1": "b", "p2": 1, "p3": 0},
{"p1": "b", "p2": 1, "p3": 0},
]
)
by = "p1"
key = list(data.groupby(by))[0][0]
result = {
'by': by,
'key': key,
'type(key)': type(key)
}
print('result = {}'.format(ub.repr2(result, nl=1)))
result = {"by": by, "key": key, "type(key)": type(key)}
print(f"result = {ub.repr2(result, nl=1)}")
assert not ub.iterable(
key
), "`by` is specified as a scalar, so getting `key` as a scalar makes sense"
by = ["p1"]
key = list(data.groupby(by))[0][0]
result = {"by": by, "key": key, "type(key)": type(key)}
print(f"result = {ub.repr2(result, nl=1)}")
assert not ub.iterable(key), (
'`by` is specified as a scalar, so getting `key` as a scalar makes sense')
"`by` is specified as a list of scalars (with one element), but we "
"still get `key` as a scalar. This does not make sense"
)
by = ['p1']
by = ["p1", "p2"]
key = list(data.groupby(by))[0][0]
result = {
'by': by,
'key': key,
'type(key)': type(key)
}
print('result = {}'.format(ub.repr2(result, nl=1)))
assert not ub.iterable(key), (
'`by` is specified as a list of scalars (with one element), but we '
'still get `key` as a scalar. This does not make sense')
by = ['p1', 'p2']
key = list(data.groupby(by))[0][0]
result = {
'by': by,
'key': key,
'type(key)': type(key)
}
print('result = {}'.format(ub.repr2(result, nl=1)))
result = {"by": by, "key": key, "type(key)": type(key)}
print(f"result = {ub.repr2(result, nl=1)}")
assert ub.iterable(key), (
'`by` is specified as a list of scalars (with multiple elements), '
'and we still get `key` as a tuple of values. This makes sense')
"`by` is specified as a list of scalars (with multiple elements), "
"and we still get `key` as a tuple of values. This makes sense"
)
def aggregate_stats(data, suffix='', group_keys=None):
def aggregate_stats(data, suffix="", group_keys=None):
"""
Given columns interpreted as containing stats, aggregate those stats
within each group. For each row, any non-group, non-stat column

View File

@ -40,9 +40,9 @@ class JSONBenchmarkConfig(scfg.Config):
In "analyze" mode, no benchmarks are run, but any existing
benchmarks are loaded for analysis and visualization.
""")
"""
),
),
"disable": scfg.Value(
[],
choices=KNOWN_LIBRARIES,
@ -82,6 +82,7 @@ class JSONBenchmarkConfig(scfg.Config):
def available_json_impls():
import importlib
known_modnames = KNOWN_LIBRARIES
json_impls = {}
for libname in known_modnames:
@ -206,7 +207,9 @@ def analyze_results(result_fpaths):
single_size = table[(table["size"] == 256) | table["size"].isnull()]
# single_size_combo = aggregate_stats(single_size, None)
single_size_combo = util_stats.aggregate_stats(single_size, suffix='_time', group_keys=["name"])
single_size_combo = util_stats.aggregate_stats(
single_size, suffix="_time", group_keys=["name"]
)
param_group = ["impl", "impl_version"]
single_size_combo["calls/sec"] = 1 / single_size_combo["mean_time"]
@ -216,16 +219,16 @@ def analyze_results(result_fpaths):
# )
time_piv = single_size_combo.pivot(["input", "func"], param_group, "mean_time")
hz_piv = (1 / time_piv)
hz_piv = 1 / time_piv
# hzstr_piv = (1 / time_piv).applymap(lambda x: f"{x:,.02f}")
print("Table for size=256")
# print(hzstr_piv.to_markdown())
print(hz_piv.to_markdown(floatfmt=',.02f'))
print(hz_piv.to_markdown(floatfmt=",.02f"))
print("")
print("Above metrics are in call/sec, larger is better.")
speedup_piv = hz_piv / hz_piv['json'].values
print(speedup_piv.to_markdown(floatfmt=',.02g'))
speedup_piv = hz_piv / hz_piv["json"].values
print(speedup_piv.to_markdown(floatfmt=",.02g"))
analysis.abalate(param_group)
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
@ -241,12 +244,16 @@ def analyze_results(result_fpaths):
"size": [],
}
import kwplot
kwplot.autosns()
self = analysis
plots = analysis.plot(
xlabel, metric_key, group_labels,
xscale='log', yscale='log',
xlabel,
metric_key,
group_labels,
xscale="log",
yscale="log",
)
plots
kwplot.show_if_requested()
@ -267,16 +274,16 @@ def main(cmdline=True, **kwargs):
config = JSONBenchmarkConfig(cmdline=cmdline, data=kwargs)
dpath = config["cache_dir"]
run = config['mode'] in {'all', 'single', 'run'}
run = config["mode"] in {"all", "single", "run"}
if run:
result_fpath = benchmark_json()
print(f"result_fpath = {result_fpath!r}")
result_fpaths = [result_fpath]
agg = config['mode'] not in {'single'}
agg = config["mode"] not in {"single"}
if agg:
result_fpaths = list(dpath.glob("benchmarks*.json"))
analyze = config['mode'] in {'all', 'single', 'analyze'}
analyze = config["mode"] in {"all", "single", "analyze"}
if analyze:
analyze_results(result_fpaths)