mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-05-25 11:06:19 +02:00
Refactor core into measures and analysis submodules
This commit is contained in:
parent
eee2a5ff66
commit
bd592fdd3b
|
@ -0,0 +1,112 @@
|
|||
"""
|
||||
The analysis of the measurements
|
||||
"""
|
||||
import scriptconfig as scfg
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
class AnalysisConfig(scfg.Config):
|
||||
default = {
|
||||
"cache_dir": scfg.Value(
|
||||
None,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Location for benchmark cache.
|
||||
Defaults to $XDG_CACHE/ujson/benchmark_results/
|
||||
"""
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
def normalize(self):
|
||||
dpath = self["cache_dir"]
|
||||
if dpath is None:
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results")
|
||||
dpath = ub.Path(dpath)
|
||||
self["cache_dir"] = dpath
|
||||
|
||||
|
||||
def analyze_results(result_fpaths):
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
from json_benchmarks import benchmarker
|
||||
import json
|
||||
|
||||
results = []
|
||||
for fpath in ub.ProgIter(result_fpaths, desc="load results"):
|
||||
data = json.loads(fpath.read_text())
|
||||
for row in data["rows"]:
|
||||
result = benchmarker.BenchmarkerResult.load(fpath)
|
||||
results.extend(result.to_result_list())
|
||||
|
||||
RECORD_ALL = 0
|
||||
metric_key = "time" if RECORD_ALL else "mean_time"
|
||||
|
||||
# results = benchmark.result.to_result_list()
|
||||
|
||||
analysis = benchmarker.result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=[metric_key],
|
||||
params=["impl"],
|
||||
metric_objectives={
|
||||
"min_time": "min",
|
||||
"mean_time": "min",
|
||||
"time": "min",
|
||||
},
|
||||
)
|
||||
analysis.analysis()
|
||||
|
||||
table = analysis.table
|
||||
stats_table = util_stats.aggregate_stats(table, suffix="_time", group_keys=["name"])
|
||||
|
||||
single_size = stats_table[
|
||||
(stats_table["size"] == 256) | stats_table["size"].isnull()
|
||||
]
|
||||
# single_size_combo = aggregate_stats(single_size, None)
|
||||
single_size_combo = util_stats.aggregate_stats(
|
||||
single_size, suffix="_time", group_keys=["name"]
|
||||
)
|
||||
|
||||
param_group = ["impl", "impl_version"]
|
||||
single_size_combo["calls/sec"] = 1 / single_size_combo["mean_time"]
|
||||
# _single_size_combo = single_size_combo.copy()
|
||||
time_piv = single_size_combo.pivot(["input", "func"], param_group, "mean_time")
|
||||
|
||||
hz_piv = 1 / time_piv
|
||||
# hzstr_piv = (1 / time_piv).applymap(lambda x: f"{x:,.02f}")
|
||||
print("Table for size=256")
|
||||
# print(hzstr_piv.to_markdown())
|
||||
print(hz_piv.to_markdown(floatfmt=",.02f"))
|
||||
print("")
|
||||
print("Above metrics are in call/sec, larger is better.")
|
||||
|
||||
speedup_piv = hz_piv / hz_piv["json"].values
|
||||
print(speedup_piv.to_markdown(floatfmt=",.02g"))
|
||||
|
||||
analysis.abalate(param_group)
|
||||
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
|
||||
|
||||
xlabel = "size"
|
||||
# Set these to empty lists if they are not used
|
||||
group_labels = {
|
||||
"fig": ["input"],
|
||||
"col": ["func"],
|
||||
# "fig": [],
|
||||
# "col": ["func" "input"],
|
||||
"hue": ["impl", "impl_version"],
|
||||
"size": [],
|
||||
}
|
||||
import kwplot
|
||||
kwplot.autosns()
|
||||
self = analysis # NOQA
|
||||
|
||||
data = stats_table
|
||||
plots = analysis.plot(
|
||||
xlabel,
|
||||
metric_key,
|
||||
group_labels,
|
||||
xscale="log",
|
||||
yscale="log",
|
||||
data=data,
|
||||
)
|
||||
plots
|
||||
kwplot.show_if_requested()
|
|
@ -9,65 +9,33 @@ mkinit ~/code/ultrajson/json_benchmarks/benchmarker/__init__.py -w
|
|||
|
||||
__version__ = "0.1.0"
|
||||
|
||||
from json_benchmarks.benchmarker import (
|
||||
aggregate,
|
||||
benchmarker,
|
||||
process_context,
|
||||
result_analysis,
|
||||
util_json,
|
||||
util_stats,
|
||||
visualize,
|
||||
)
|
||||
from json_benchmarks.benchmarker.aggregate import demo, demo_data
|
||||
from json_benchmarks.benchmarker.benchmarker import (
|
||||
Benchmarker,
|
||||
BenchmarkerConfig,
|
||||
BenchmarkerResult,
|
||||
)
|
||||
from json_benchmarks.benchmarker.process_context import ProcessContext
|
||||
from json_benchmarks.benchmarker.result_analysis import (
|
||||
DEFAULT_METRIC_TO_OBJECTIVE,
|
||||
Result,
|
||||
ResultAnalysis,
|
||||
SkillTracker,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_json import (
|
||||
ensure_json_serializable,
|
||||
find_json_unserializable,
|
||||
indexable_allclose,
|
||||
)
|
||||
from json_benchmarks.benchmarker.util_stats import (
|
||||
aggregate_stats,
|
||||
combine_stats,
|
||||
combine_stats_arrs,
|
||||
stats_dict,
|
||||
)
|
||||
from json_benchmarks.benchmarker.visualize import benchmark_analysis
|
||||
from json_benchmarks.benchmarker import benchmarker
|
||||
from json_benchmarks.benchmarker import process_context
|
||||
from json_benchmarks.benchmarker import result_analysis
|
||||
from json_benchmarks.benchmarker import util_json
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
from json_benchmarks.benchmarker import visualize
|
||||
|
||||
__all__ = [
|
||||
"Benchmarker",
|
||||
"BenchmarkerConfig",
|
||||
"BenchmarkerResult",
|
||||
"DEFAULT_METRIC_TO_OBJECTIVE",
|
||||
"ProcessContext",
|
||||
"Result",
|
||||
"ResultAnalysis",
|
||||
"SkillTracker",
|
||||
"aggregate",
|
||||
"aggregate_stats",
|
||||
"benchmark_analysis",
|
||||
"benchmarker",
|
||||
"combine_stats",
|
||||
"combine_stats_arrs",
|
||||
"demo",
|
||||
"demo_data",
|
||||
"ensure_json_serializable",
|
||||
"find_json_unserializable",
|
||||
"indexable_allclose",
|
||||
"process_context",
|
||||
"result_analysis",
|
||||
"stats_dict",
|
||||
"util_json",
|
||||
"util_stats",
|
||||
"visualize",
|
||||
]
|
||||
from json_benchmarks.benchmarker.benchmarker import (Benchmarker,
|
||||
BenchmarkerConfig,
|
||||
BenchmarkerResult,)
|
||||
from json_benchmarks.benchmarker.process_context import (ProcessContext,)
|
||||
from json_benchmarks.benchmarker.result_analysis import (
|
||||
DEFAULT_METRIC_TO_OBJECTIVE, Result, ResultAnalysis, SkillTracker,)
|
||||
from json_benchmarks.benchmarker.util_json import (ensure_json_serializable,
|
||||
find_json_unserializable,
|
||||
indexable_allclose,)
|
||||
from json_benchmarks.benchmarker.util_stats import (aggregate_stats,
|
||||
combine_stats,
|
||||
combine_stats_arrs,
|
||||
stats_dict,)
|
||||
from json_benchmarks.benchmarker.visualize import (benchmark_analysis,)
|
||||
|
||||
__all__ = ['Benchmarker', 'BenchmarkerConfig', 'BenchmarkerResult',
|
||||
'DEFAULT_METRIC_TO_OBJECTIVE', 'ProcessContext', 'Result',
|
||||
'ResultAnalysis', 'SkillTracker', 'aggregate_stats',
|
||||
'benchmark_analysis', 'benchmarker', 'combine_stats',
|
||||
'combine_stats_arrs', 'ensure_json_serializable',
|
||||
'find_json_unserializable', 'indexable_allclose', 'process_context',
|
||||
'result_analysis', 'stats_dict', 'util_json', 'util_stats',
|
||||
'visualize']
|
||||
|
|
|
@ -1,74 +0,0 @@
|
|||
import json
|
||||
|
||||
import pandas as pd
|
||||
import ubelt as ub
|
||||
|
||||
|
||||
def demo_data():
|
||||
import numpy as np
|
||||
|
||||
from json_benchmarks.benchmarker.benchmarker import Benchmarker
|
||||
|
||||
impl_lut = {
|
||||
"numpy": np.sum,
|
||||
"builtin": sum,
|
||||
}
|
||||
|
||||
def data_lut(params):
|
||||
item = 42 if params["dtype"] == "int" else 42.0
|
||||
data = [item] * params["size"]
|
||||
return data
|
||||
|
||||
basis = {
|
||||
"impl": ["builtin", "numpy"],
|
||||
"size": [10, 10000],
|
||||
"dtype": ["int", "float"],
|
||||
}
|
||||
|
||||
dpath = ub.Path.appdir("benchmarker/agg_demo").delete().ensuredir()
|
||||
|
||||
def run_one_benchmark():
|
||||
self = Benchmarker(name="agg_demo", num=10, bestof=3, basis=basis)
|
||||
for params in self.iter_params():
|
||||
impl = impl_lut[params["impl"]]
|
||||
data = data_lut(params)
|
||||
for timer in self.measure():
|
||||
with timer:
|
||||
impl(data)
|
||||
fpath = self.dump_in_dpath(dpath)
|
||||
return fpath
|
||||
|
||||
# Run the benchmark multiple times
|
||||
fpaths = []
|
||||
for _ in range(5):
|
||||
fpath = run_one_benchmark()
|
||||
fpaths.append(fpath)
|
||||
|
||||
return fpaths
|
||||
|
||||
|
||||
def demo():
|
||||
from json_benchmarks.benchmarker import BenchmarkerResult, result_analysis
|
||||
|
||||
fpaths = demo_data()
|
||||
|
||||
results = []
|
||||
for fpath in fpaths:
|
||||
data = json.loads(fpath.read_text())
|
||||
for row in data["rows"]:
|
||||
result = BenchmarkerResult.load(fpath)
|
||||
results.extend(result.to_result_list())
|
||||
|
||||
analysis = result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=["min", "mean"],
|
||||
params=["impl"],
|
||||
metric_objectives={
|
||||
"min": "min",
|
||||
"mean": "min",
|
||||
},
|
||||
)
|
||||
analysis.analysis()
|
||||
# single_df = pd.DataFrame(data['rows'])
|
||||
# context = data['context']
|
||||
# single_df
|
|
@ -168,3 +168,65 @@ class Benchmarker:
|
|||
"name": key,
|
||||
}
|
||||
rows.append(row)
|
||||
|
||||
|
||||
def _test_demo():
|
||||
from json_benchmarks.benchmarker import BenchmarkerResult, result_analysis
|
||||
from json_benchmarks.benchmarker.benchmarker import Benchmarker
|
||||
import numpy as np
|
||||
|
||||
impl_lut = {
|
||||
"numpy": np.sum,
|
||||
"builtin": sum,
|
||||
}
|
||||
|
||||
def data_lut(params):
|
||||
item = 42 if params["dtype"] == "int" else 42.0
|
||||
data = [item] * params["size"]
|
||||
return data
|
||||
|
||||
basis = {
|
||||
"impl": ["builtin", "numpy"],
|
||||
"size": [10, 10000],
|
||||
"dtype": ["int", "float"],
|
||||
}
|
||||
|
||||
dpath = ub.Path.appdir("benchmarker/agg_demo").delete().ensuredir()
|
||||
|
||||
def run_one_benchmark():
|
||||
self = Benchmarker(name="agg_demo", num=10, bestof=3, basis=basis)
|
||||
for params in self.iter_params():
|
||||
impl = impl_lut[params["impl"]]
|
||||
data = data_lut(params)
|
||||
for timer in self.measure():
|
||||
with timer:
|
||||
impl(data)
|
||||
fpath = self.dump_in_dpath(dpath)
|
||||
return fpath
|
||||
|
||||
# Run the benchmark multiple times
|
||||
fpaths = []
|
||||
for _ in range(5):
|
||||
fpath = run_one_benchmark()
|
||||
fpaths.append(fpath)
|
||||
|
||||
results = []
|
||||
for fpath in fpaths:
|
||||
data = json.loads(fpath.read_text())
|
||||
for row in data["rows"]:
|
||||
result = BenchmarkerResult.load(fpath)
|
||||
results.extend(result.to_result_list())
|
||||
|
||||
analysis = result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=["min", "mean"],
|
||||
params=["impl"],
|
||||
metric_objectives={
|
||||
"min": "min",
|
||||
"mean": "min",
|
||||
},
|
||||
)
|
||||
analysis.analysis()
|
||||
# single_df = pd.DataFrame(data['rows'])
|
||||
# context = data['context']
|
||||
# single_df
|
||||
|
|
|
@ -935,8 +935,12 @@ class ResultAnalysis(ub.NiceRepr):
|
|||
_ydata_mean = combo_group[ylabel].values
|
||||
_ydata_std = combo_group[std_label].values
|
||||
std_label = ylabel.replace("mean_", "std_")
|
||||
y_data_min = _ydata_mean - _ydata_std
|
||||
y_data_max = _ydata_mean + _ydata_std
|
||||
|
||||
# Plot bars 3 standard deviations from the mean to
|
||||
# get a 99.7% interval
|
||||
num_std = 3
|
||||
y_data_min = _ydata_mean - num_std * _ydata_std
|
||||
y_data_max = _ydata_mean + num_std * _ydata_std
|
||||
spread_alpha = 0.3
|
||||
color = palette[subgroup_key]
|
||||
ax.fill_between(
|
||||
|
|
|
@ -1,24 +1,14 @@
|
|||
"""
|
||||
Main definition of the benchmarks
|
||||
"""
|
||||
import json
|
||||
|
||||
import scriptconfig as scfg
|
||||
import ubelt as ub
|
||||
|
||||
from json_benchmarks import benchmarker, datagen
|
||||
from json_benchmarks.benchmarker import util_stats
|
||||
|
||||
KNOWN_LIBRARIES = [
|
||||
"ujson",
|
||||
"nujson",
|
||||
"orjson",
|
||||
"simplejson",
|
||||
"json",
|
||||
]
|
||||
from json_benchmarks import measures
|
||||
from json_benchmarks import analysis
|
||||
|
||||
|
||||
class JSONBenchmarkConfig(scfg.Config):
|
||||
class CoreConfig(scfg.Config):
|
||||
"""
|
||||
Benchmark JSON implementations
|
||||
"""
|
||||
|
@ -43,24 +33,7 @@ class JSONBenchmarkConfig(scfg.Config):
|
|||
"""
|
||||
),
|
||||
),
|
||||
"disable": scfg.Value(
|
||||
[],
|
||||
choices=KNOWN_LIBRARIES,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Remove specified libraries from the benchmarks
|
||||
"""
|
||||
),
|
||||
),
|
||||
"factor": scfg.Value(
|
||||
1.0,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Specify as a fraction to speed up benchmarks for development /
|
||||
testing
|
||||
"""
|
||||
),
|
||||
),
|
||||
|
||||
"cache_dir": scfg.Value(
|
||||
None,
|
||||
help=ub.paragraph(
|
||||
|
@ -80,190 +53,6 @@ class JSONBenchmarkConfig(scfg.Config):
|
|||
self["cache_dir"] = dpath
|
||||
|
||||
|
||||
def available_json_impls():
|
||||
import importlib
|
||||
|
||||
known_modnames = KNOWN_LIBRARIES
|
||||
json_impls = {}
|
||||
for libname in known_modnames:
|
||||
try:
|
||||
module = importlib.import_module(libname)
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
json_impls[libname] = {
|
||||
"module": module,
|
||||
"version": module.__version__,
|
||||
}
|
||||
return json_impls
|
||||
|
||||
|
||||
def benchmark_json():
|
||||
json_impls = available_json_impls()
|
||||
|
||||
data_lut = datagen.json_test_data_generators()
|
||||
|
||||
# These are the parameters that we benchmark over
|
||||
common_basis = {
|
||||
"impl": list(json_impls.keys()),
|
||||
"func": ["dumps", "loads"],
|
||||
}
|
||||
sized_basis = {
|
||||
"input": [
|
||||
"Array with doubles",
|
||||
"Array with UTF-8 strings",
|
||||
# 'Medium complex object',
|
||||
"Array with True values",
|
||||
"Array of Dict[str, int]",
|
||||
# 'Dict of List[Dict[str, int]]',
|
||||
# 'Complex object'
|
||||
],
|
||||
"size": [1, 2, 4, 8, 16, 32, 128, 256, 512],
|
||||
# 1024, 2048, 4096, 8192, 12288],
|
||||
}
|
||||
predefined_basis = {
|
||||
"input": ["Complex object"],
|
||||
"size": [None],
|
||||
}
|
||||
|
||||
basis = [
|
||||
ub.dict_union(common_basis, predefined_basis),
|
||||
ub.dict_union(common_basis, sized_basis),
|
||||
]
|
||||
|
||||
# The Benchmarker class is a new experimental API around timerit to
|
||||
# abstract away the details of timing a process over a grid of parameters,
|
||||
# serializing the results, and aggregating results from disparate runs.
|
||||
benchmark = benchmarker.Benchmarker(
|
||||
name="bench_json",
|
||||
num=1000,
|
||||
bestof=100,
|
||||
verbose=3,
|
||||
basis=basis,
|
||||
)
|
||||
|
||||
def is_blocked(params):
|
||||
if params["input"] == "Complex object" and params["impl"] == "orjson":
|
||||
return True
|
||||
|
||||
# For each variation of your experiment, create a row.
|
||||
for params in benchmark.iter_params():
|
||||
if is_blocked(params):
|
||||
continue
|
||||
# Make any modifications you need to compute input kwargs for each
|
||||
# method here.
|
||||
impl_info = json_impls[params["impl"]]
|
||||
params["impl_version"] = impl_info["version"]
|
||||
module = impl_info["module"]
|
||||
if params["func"] == "dumps":
|
||||
method = module.dumps
|
||||
data = data_lut[params["input"]](params["size"])
|
||||
elif params["func"] == "loads":
|
||||
method = module.loads
|
||||
to_encode = data_lut[params["input"]](params["size"])
|
||||
data = json.dumps(to_encode)
|
||||
# Timerit will run some user-specified number of loops.
|
||||
# and compute time stats with similar methodology to timeit
|
||||
for timer in benchmark.measure():
|
||||
# Put any setup logic you dont want to time here.
|
||||
# ...
|
||||
with timer:
|
||||
# Put the logic you want to time here
|
||||
method(data)
|
||||
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results").ensuredir()
|
||||
result_fpath = benchmark.dump_in_dpath(dpath)
|
||||
return result_fpath
|
||||
|
||||
|
||||
def analyze_results(result_fpaths):
|
||||
import json
|
||||
|
||||
results = []
|
||||
for fpath in ub.ProgIter(result_fpaths, desc="load results"):
|
||||
data = json.loads(fpath.read_text())
|
||||
for row in data["rows"]:
|
||||
result = benchmarker.BenchmarkerResult.load(fpath)
|
||||
results.extend(result.to_result_list())
|
||||
|
||||
RECORD_ALL = 0
|
||||
metric_key = "time" if RECORD_ALL else "mean_time"
|
||||
|
||||
# results = benchmark.result.to_result_list()
|
||||
|
||||
analysis = benchmarker.result_analysis.ResultAnalysis(
|
||||
results,
|
||||
metrics=[metric_key],
|
||||
params=["impl"],
|
||||
metric_objectives={
|
||||
"min_time": "min",
|
||||
"mean_time": "min",
|
||||
"time": "min",
|
||||
},
|
||||
)
|
||||
analysis.analysis()
|
||||
|
||||
table = analysis.table
|
||||
stats_table = util_stats.aggregate_stats(table, suffix="_time", group_keys=["name"])
|
||||
|
||||
single_size = stats_table[
|
||||
(stats_table["size"] == 256) | stats_table["size"].isnull()
|
||||
]
|
||||
# single_size_combo = aggregate_stats(single_size, None)
|
||||
single_size_combo = util_stats.aggregate_stats(
|
||||
single_size, suffix="_time", group_keys=["name"]
|
||||
)
|
||||
|
||||
param_group = ["impl", "impl_version"]
|
||||
single_size_combo["calls/sec"] = 1 / single_size_combo["mean_time"]
|
||||
# _single_size_combo = single_size_combo.copy()
|
||||
# _single_size_combo["calls/sec"] = _single_size_combo["calls/sec"].apply(
|
||||
#
|
||||
# )
|
||||
time_piv = single_size_combo.pivot(["input", "func"], param_group, "mean_time")
|
||||
|
||||
hz_piv = 1 / time_piv
|
||||
# hzstr_piv = (1 / time_piv).applymap(lambda x: f"{x:,.02f}")
|
||||
print("Table for size=256")
|
||||
# print(hzstr_piv.to_markdown())
|
||||
print(hz_piv.to_markdown(floatfmt=",.02f"))
|
||||
print("")
|
||||
print("Above metrics are in call/sec, larger is better.")
|
||||
|
||||
speedup_piv = hz_piv / hz_piv["json"].values
|
||||
print(speedup_piv.to_markdown(floatfmt=",.02g"))
|
||||
|
||||
analysis.abalate(param_group)
|
||||
# benchmark_analysis(rows, xlabel, group_labels, basis, RECORD_ALL)
|
||||
|
||||
xlabel = "size"
|
||||
# Set these to empty lists if they are not used
|
||||
group_labels = {
|
||||
"fig": ["input"],
|
||||
"col": ["func"],
|
||||
# "fig": [],
|
||||
# "col": ["func" "input"],
|
||||
"hue": ["impl", "impl_version"],
|
||||
"size": [],
|
||||
}
|
||||
import kwplot
|
||||
|
||||
kwplot.autosns()
|
||||
self = analysis
|
||||
|
||||
data = stats_table
|
||||
plots = analysis.plot(
|
||||
xlabel,
|
||||
metric_key,
|
||||
group_labels,
|
||||
xscale="log",
|
||||
yscale="log",
|
||||
data=data,
|
||||
)
|
||||
plots
|
||||
kwplot.show_if_requested()
|
||||
|
||||
|
||||
def main(cmdline=True, **kwargs):
|
||||
"""
|
||||
Example:
|
||||
|
@ -276,12 +65,12 @@ def main(cmdline=True, **kwargs):
|
|||
>>> kwargs = {}
|
||||
>>> main(cmdline, **kwargs)
|
||||
"""
|
||||
config = JSONBenchmarkConfig(cmdline=cmdline, data=kwargs)
|
||||
config = CoreConfig(cmdline=cmdline, data=kwargs)
|
||||
dpath = config["cache_dir"]
|
||||
|
||||
run = config["mode"] in {"all", "single", "run"}
|
||||
if run:
|
||||
result_fpath = benchmark_json()
|
||||
result_fpath = measures.benchmark_json()
|
||||
print(f"result_fpath = {result_fpath!r}")
|
||||
result_fpaths = [result_fpath]
|
||||
|
||||
|
@ -291,4 +80,4 @@ def main(cmdline=True, **kwargs):
|
|||
|
||||
analyze = config["mode"] in {"all", "single", "analyze"}
|
||||
if analyze:
|
||||
analyze_results(result_fpaths)
|
||||
analysis.analyze_results(result_fpaths)
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
"""
|
||||
Define the json libraries we are considering
|
||||
"""
|
||||
|
||||
KNOWN_LIBRARIES = [
|
||||
{'modname': "ujson", 'distname': 'ujson'},
|
||||
{'modname': "nujson", 'distname': 'nujson'},
|
||||
{'modname': "orjson", 'distname': 'orjson'},
|
||||
{'modname': "simplejson", 'distname': 'simplejson'},
|
||||
{'modname': "json", 'distname': "<stdlib>"},
|
||||
{'modname': "simdjson", 'distname': 'pysimdjson'},
|
||||
]
|
||||
|
||||
KNOWN_MODNAMES = [info['modname'] for info in KNOWN_LIBRARIES]
|
||||
|
||||
|
||||
# TODO:
|
||||
# def distname_to_modnames(distname):
|
||||
# # TODO: nice way to switch between a module's import name and it's distribution name
|
||||
# # References:
|
||||
# # https://stackoverflow.com/questions/49764802/get-module-name-programmatically-with-only-pypi-package-name/49764960#49764960
|
||||
# import distlib.database
|
||||
# distlib.database.DistributionPath().get_distribution(distname)
|
||||
# # import importlib.metadata
|
||||
# # importlib.metadata.metadata(distname)
|
||||
# # importlib.util.find_spec(modname)
|
||||
# # import simdjson
|
||||
# # import pkg_resources
|
||||
# # pkg_resources.get_distribution('pysimdjson')
|
||||
|
||||
|
||||
def available_json_impls():
|
||||
"""
|
||||
Return a dictionary of information about each json implementation
|
||||
|
||||
Example:
|
||||
>>> from json_benchmarks.libraries import * # NOQA
|
||||
>>> json_impls = available_json_impls()
|
||||
>>> print('json_impls = {}'.format(ub.repr2(json_impls, nl=1)))
|
||||
"""
|
||||
import importlib
|
||||
known_libinfo = KNOWN_LIBRARIES
|
||||
json_impls = {}
|
||||
for libinfo in known_libinfo:
|
||||
modname = libinfo['modname']
|
||||
distname = libinfo['distname']
|
||||
try:
|
||||
module = importlib.import_module(modname)
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
import pkg_resources
|
||||
mod_version = getattr(module, '__version__', None)
|
||||
if distname == '<stdlib>':
|
||||
pkg_version = mod_version
|
||||
else:
|
||||
pkg_version = pkg_resources.get_distribution(distname).version
|
||||
if mod_version is not None:
|
||||
assert mod_version == pkg_version
|
||||
version = pkg_version
|
||||
json_impls[modname] = {
|
||||
"module": module,
|
||||
"modname": modname,
|
||||
"distname": distname,
|
||||
"version": version,
|
||||
}
|
||||
return json_impls
|
|
@ -0,0 +1,126 @@
|
|||
"""
|
||||
The definitions of the measurements we want to take
|
||||
"""
|
||||
import scriptconfig as scfg
|
||||
import ubelt as ub
|
||||
import json
|
||||
from json_benchmarks import libraries
|
||||
|
||||
|
||||
class MeasurementConfig(scfg.Config):
|
||||
default = {
|
||||
"disable": scfg.Value(
|
||||
[],
|
||||
choices=libraries.KNOWN_MODNAMES,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Remove specified libraries from the benchmarks
|
||||
"""
|
||||
),
|
||||
),
|
||||
"factor": scfg.Value(
|
||||
1.0,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Specify as a fraction to speed up benchmarks for development /
|
||||
testing
|
||||
"""
|
||||
),
|
||||
),
|
||||
"cache_dir": scfg.Value(
|
||||
None,
|
||||
help=ub.paragraph(
|
||||
"""
|
||||
Location for benchmark cache.
|
||||
Defaults to $XDG_CACHE/ujson/benchmark_results/
|
||||
"""
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
def normalize(self):
|
||||
dpath = self["cache_dir"]
|
||||
if dpath is None:
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results")
|
||||
dpath = ub.Path(dpath)
|
||||
self["cache_dir"] = dpath
|
||||
|
||||
|
||||
def benchmark_json():
|
||||
from json_benchmarks import benchmarker
|
||||
from json_benchmarks import datagen
|
||||
from json_benchmarks import libraries
|
||||
|
||||
json_impls = libraries.available_json_impls()
|
||||
data_lut = datagen.json_test_data_generators()
|
||||
|
||||
# These are the parameters that we benchmark over
|
||||
common_basis = {
|
||||
"impl": list(json_impls.keys()),
|
||||
"func": ["dumps", "loads"],
|
||||
}
|
||||
sized_basis = {
|
||||
"input": [
|
||||
"Array with doubles",
|
||||
"Array with UTF-8 strings",
|
||||
# 'Medium complex object',
|
||||
"Array with True values",
|
||||
"Array of Dict[str, int]",
|
||||
# 'Dict of List[Dict[str, int]]',
|
||||
# 'Complex object'
|
||||
],
|
||||
"size": [1, 2, 4, 8, 16, 32, 128, 256, 512, 1024, 2048, 4096, 8192, 12288],
|
||||
}
|
||||
predefined_basis = {
|
||||
"input": ["Complex object"],
|
||||
"size": [None],
|
||||
}
|
||||
|
||||
basis = [
|
||||
ub.dict_union(common_basis, predefined_basis),
|
||||
ub.dict_union(common_basis, sized_basis),
|
||||
]
|
||||
|
||||
# The Benchmarker class is a new experimental API around timerit to
|
||||
# abstract away the details of timing a process over a grid of parameters,
|
||||
# serializing the results, and aggregating results from disparate runs.
|
||||
benchmark = benchmarker.Benchmarker(
|
||||
name="bench_json",
|
||||
num=1000,
|
||||
bestof=100,
|
||||
verbose=3,
|
||||
basis=basis,
|
||||
)
|
||||
|
||||
def is_blocked(params):
|
||||
if params["input"] == "Complex object" and params["impl"] == "orjson":
|
||||
return True
|
||||
|
||||
# For each variation of your experiment, create a row.
|
||||
for params in benchmark.iter_params():
|
||||
if is_blocked(params):
|
||||
continue
|
||||
# Make any modifications you need to compute input kwargs for each
|
||||
# method here.
|
||||
impl_info = json_impls[params["impl"]]
|
||||
params["impl_version"] = impl_info["version"]
|
||||
module = impl_info["module"]
|
||||
if params["func"] == "dumps":
|
||||
method = module.dumps
|
||||
data = data_lut[params["input"]](params["size"])
|
||||
elif params["func"] == "loads":
|
||||
method = module.loads
|
||||
to_encode = data_lut[params["input"]](params["size"])
|
||||
data = json.dumps(to_encode)
|
||||
# Timerit will run some user-specified number of loops.
|
||||
# and compute time stats with similar methodology to timeit
|
||||
for timer in benchmark.measure():
|
||||
# Put any setup logic you dont want to time here.
|
||||
# ...
|
||||
with timer:
|
||||
# Put the logic you want to time here
|
||||
method(data)
|
||||
|
||||
dpath = ub.Path.appdir("ujson/benchmark_results").ensuredir()
|
||||
result_fpath = benchmark.dump_in_dpath(dpath)
|
||||
return result_fpath
|
Loading…
Reference in New Issue