mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-05-29 23:16:07 +02:00
495 lines
14 KiB
Python
495 lines
14 KiB
Python
# coding=UTF-8
|
|
import json
|
|
import os
|
|
import platform
|
|
import random
|
|
import sys
|
|
import timeit
|
|
from collections import defaultdict
|
|
|
|
import ujson
|
|
|
|
# Will be set by "main" if user requests them
|
|
simplejson = None
|
|
nujson = None
|
|
orjson = None
|
|
|
|
USER = {
|
|
"userId": 3381293,
|
|
"age": 213,
|
|
"username": "johndoe",
|
|
"fullname": "John Doe the Second",
|
|
"isAuthorized": True,
|
|
"liked": 31231.31231202,
|
|
"approval": 31.1471,
|
|
"jobs": [1, 2],
|
|
"currJob": None,
|
|
}
|
|
FRIENDS = [USER, USER, USER, USER, USER, USER, USER, USER]
|
|
|
|
decode_data = None
|
|
test_object = None
|
|
|
|
benchmark_results = []
|
|
|
|
|
|
benchmark_registry = defaultdict(dict)
|
|
|
|
|
|
# =============================================================================
|
|
# Benchmark registration
|
|
# =============================================================================
|
|
|
|
|
|
def register_benchmark(libname, testname):
|
|
def _wrap(func):
|
|
benchmark_registry[testname][libname] = func
|
|
return func
|
|
|
|
return _wrap
|
|
|
|
|
|
# =============================================================================
|
|
# Logging benchmarking results.
|
|
# =============================================================================
|
|
def results_new_benchmark(name):
|
|
benchmark_results.append((name, {}, {}))
|
|
print(name)
|
|
|
|
|
|
def results_record_result(callback, is_encode, count):
|
|
callback_name = callback.__name__
|
|
library = callback_name.split("_")[-1]
|
|
try:
|
|
results = timeit.repeat(
|
|
f"{callback_name}()",
|
|
f"from __main__ import {callback_name}",
|
|
repeat=10,
|
|
number=count,
|
|
)
|
|
except (TypeError, json.decoder.JSONDecodeError):
|
|
return
|
|
result = count / min(results)
|
|
benchmark_results[-1][1 if is_encode else 2][library] = result
|
|
|
|
print(
|
|
"{} {}: {:,.02f} calls/sec".format(
|
|
library, "encode" if is_encode else "decode", result
|
|
)
|
|
)
|
|
|
|
|
|
def results_output_table(libraries):
|
|
uname_system, _, uname_release, uname_version, _, uname_processor = platform.uname()
|
|
print()
|
|
print("### Test machine")
|
|
print()
|
|
print(uname_system, uname_release, uname_processor, uname_version)
|
|
print()
|
|
print("### Versions")
|
|
print()
|
|
print(
|
|
"- {} {}".format(
|
|
platform.python_implementation(), sys.version.replace("\n", "")
|
|
)
|
|
)
|
|
for libname in libraries:
|
|
module = globals()[libname]
|
|
print(f"- {libname:<12} : {module.__version__}")
|
|
print()
|
|
|
|
column_widths = [max(len(r[0]) for r in benchmark_results)]
|
|
for library in libraries:
|
|
column_widths.append(max(10, len(library)))
|
|
|
|
columns = [" " * (width + 2) for width in column_widths]
|
|
for i, library in enumerate(libraries):
|
|
columns[i + 1] = (" " + library).ljust(column_widths[i + 1] + 2)
|
|
print("|{}|".format("|".join(columns)))
|
|
|
|
line = (
|
|
"|"
|
|
+ "-" * (column_widths[0] + 2)
|
|
+ "|"
|
|
+ ":|".join("-" * (width + 1) for width in column_widths[1:])
|
|
+ ":|"
|
|
)
|
|
print(line)
|
|
|
|
for name, encodes, decodes in benchmark_results:
|
|
columns = [" " * (width + 2) for width in column_widths]
|
|
columns[0] = (" " + name).ljust(column_widths[0] + 2)
|
|
print("|{}|".format("|".join(columns)))
|
|
|
|
columns = [None] * len(column_widths)
|
|
columns[0] = " encode".ljust(column_widths[0] + 2)
|
|
for i, library in enumerate(libraries):
|
|
if library in encodes:
|
|
columns[i + 1] = f"{encodes[library]:,.0f} ".rjust(
|
|
column_widths[i + 1] + 2
|
|
)
|
|
else:
|
|
columns[i + 1] = " " * (column_widths[i + 1] + 2)
|
|
print("|{}|".format("|".join(columns)))
|
|
|
|
if decodes:
|
|
columns = [None] * len(column_widths)
|
|
columns[0] = " decode".ljust(column_widths[0] + 2)
|
|
for i, library in enumerate(libraries):
|
|
if library in decodes:
|
|
columns[i + 1] = f"{decodes[library]:,.0f} ".rjust(
|
|
column_widths[i + 1] + 2
|
|
)
|
|
else:
|
|
columns[i + 1] = " " * (column_widths[i + 1] + 2)
|
|
print("|{}|".format("|".join(columns)))
|
|
|
|
print()
|
|
print("Above metrics are in call/sec, larger is better.")
|
|
|
|
|
|
# =============================================================================
|
|
# JSON encoding.
|
|
# =============================================================================
|
|
|
|
_testname = "dumps"
|
|
|
|
|
|
@register_benchmark("json", _testname)
|
|
def dumps_with_json():
|
|
json.dumps(test_object)
|
|
|
|
|
|
@register_benchmark("nujson", _testname)
|
|
def dumps_with_nujson():
|
|
nujson.dumps(test_object)
|
|
|
|
|
|
@register_benchmark("orjson", _testname)
|
|
def dumps_with_orjson():
|
|
orjson.dumps(test_object)
|
|
|
|
|
|
@register_benchmark("simplejson", _testname)
|
|
def dumps_with_simplejson():
|
|
simplejson.dumps(test_object)
|
|
|
|
|
|
@register_benchmark("ujson", _testname)
|
|
def dumps_with_ujson():
|
|
ujson.dumps(test_object, ensure_ascii=False)
|
|
|
|
|
|
# =============================================================================
|
|
# JSON encoding with sort_keys=True.
|
|
# =============================================================================
|
|
|
|
_testname = "dumps-sort_keys=True"
|
|
|
|
|
|
@register_benchmark("json", _testname)
|
|
def dumps_sorted_with_json():
|
|
json.dumps(test_object, sort_keys=True)
|
|
|
|
|
|
@register_benchmark("simplejson", _testname)
|
|
def dumps_sorted_with_simplejson():
|
|
simplejson.dumps(test_object, sort_keys=True)
|
|
|
|
|
|
@register_benchmark("nujson", _testname)
|
|
def dumps_sorted_with_nujson():
|
|
nujson.dumps(test_object, sort_keys=True)
|
|
|
|
|
|
@register_benchmark("orjson", _testname)
|
|
def dumps_sorted_with_orjson():
|
|
orjson.dumps(test_object, sort_keys=True)
|
|
|
|
|
|
@register_benchmark("ujson", _testname)
|
|
def dumps_sorted_with_ujson():
|
|
ujson.dumps(test_object, ensure_ascii=False, sort_keys=True)
|
|
|
|
|
|
# =============================================================================
|
|
# JSON decoding.
|
|
# =============================================================================
|
|
|
|
_testname = "loads"
|
|
|
|
|
|
@register_benchmark("json", _testname)
|
|
def loads_with_json():
|
|
json.loads(decode_data)
|
|
|
|
|
|
@register_benchmark("nujson", _testname)
|
|
def loads_with_nujson():
|
|
nujson.loads(decode_data)
|
|
|
|
|
|
@register_benchmark("orjson", _testname)
|
|
def loads_with_orjson():
|
|
orjson.loads(decode_data)
|
|
|
|
|
|
@register_benchmark("simplejson", _testname)
|
|
def loads_with_simplejson():
|
|
simplejson.loads(decode_data)
|
|
|
|
|
|
@register_benchmark("ujson", _testname)
|
|
def loads_with_ujson():
|
|
ujson.loads(decode_data)
|
|
|
|
|
|
# =============================================================================
|
|
# Benchmarks.
|
|
# =============================================================================
|
|
def run_decode(count, libraries):
|
|
_testname = "loads"
|
|
for libname in libraries:
|
|
func = benchmark_registry[_testname][libname]
|
|
results_record_result(func, False, count)
|
|
|
|
|
|
def run_encode(count, libraries):
|
|
_testname = "dumps"
|
|
for libname in libraries:
|
|
func = benchmark_registry[_testname][libname]
|
|
results_record_result(func, True, count)
|
|
|
|
|
|
def run_encode_sort_keys(count, libraries):
|
|
_testname = "dumps-sort_keys=True"
|
|
for libname in libraries:
|
|
func = benchmark_registry[_testname][libname]
|
|
results_record_result(func, True, count)
|
|
|
|
|
|
def benchmark_array_doubles(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Array with 256 doubles")
|
|
COUNT = max(int(10000 * factor), 1)
|
|
|
|
test_object = []
|
|
for x in range(256):
|
|
test_object.append(sys.maxsize * random.random())
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
def benchmark_array_utf8_strings(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Array with 256 UTF-8 strings")
|
|
COUNT = max(int(2000 * factor), 1)
|
|
|
|
test_object = []
|
|
for x in range(256):
|
|
test_object.append(
|
|
"نظام الحكم سلطاني وراثي "
|
|
"في الذكور من ذرية السيد تركي بن سعيد بن سلطان ويشترط فيمن يختار لولاية"
|
|
" الحكم من بينهم ان يكون مسلما رشيدا عاقلا ًوابنا شرعيا لابوين عمانيين "
|
|
)
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
def benchmark_array_byte_strings(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Array with 256 strings")
|
|
COUNT = max(int(10000 * factor), 1)
|
|
|
|
test_object = []
|
|
for x in range(256):
|
|
test_object.append("A pretty long string which is in a list")
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
def benchmark_medium_complex_object(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Medium complex object")
|
|
COUNT = max(int(5000 * factor), 1)
|
|
|
|
test_object = [
|
|
[USER, FRIENDS],
|
|
[USER, FRIENDS],
|
|
[USER, FRIENDS],
|
|
[USER, FRIENDS],
|
|
[USER, FRIENDS],
|
|
[USER, FRIENDS],
|
|
]
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
def benchmark_array_true_values(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Array with 256 True values")
|
|
COUNT = max(int(50000 * factor), 1)
|
|
|
|
test_object = []
|
|
for x in range(256):
|
|
test_object.append(True)
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
def benchmark_array_of_dict_string_int_pairs(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Array with 256 dict{string, int} pairs")
|
|
COUNT = max(int(5000 * factor), 1)
|
|
|
|
test_object = []
|
|
for x in range(256):
|
|
test_object.append({str(random.random() * 20): int(random.random() * 1000000)})
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
def benchmark_dict_of_arrays_of_dict_string_int_pairs(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Dict with 256 arrays with 256 dict{string, int} pairs")
|
|
COUNT = max(int(50 * factor), 1)
|
|
|
|
test_object = {}
|
|
for y in range(256):
|
|
arrays = []
|
|
for x in range(256):
|
|
arrays.append({str(random.random() * 20): int(random.random() * 1000000)})
|
|
test_object[str(random.random() * 20)] = arrays
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
results_new_benchmark(
|
|
"Dict with 256 arrays with 256 dict{string, int} pairs, outputting sorted keys"
|
|
)
|
|
run_encode_sort_keys(COUNT, libraries)
|
|
|
|
test_object = None
|
|
|
|
|
|
def benchmark_complex_object(libraries, factor=1):
|
|
global decode_data, test_object
|
|
results_new_benchmark("Complex object")
|
|
COUNT = int(100 * factor)
|
|
|
|
with open(os.path.join(os.path.dirname(__file__), "sample.json")) as f:
|
|
test_object = json.load(f)
|
|
run_encode(COUNT, libraries)
|
|
|
|
decode_data = json.dumps(test_object)
|
|
test_object = None
|
|
run_decode(COUNT, libraries)
|
|
|
|
decode_data = None
|
|
|
|
|
|
# =============================================================================
|
|
# Main.
|
|
# =============================================================================
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
import importlib
|
|
|
|
parser = argparse.ArgumentParser(
|
|
prog="ujson-benchmarks",
|
|
description="Benchmark ujson against other json implementations",
|
|
)
|
|
|
|
known_libraries = [
|
|
"ujson",
|
|
"nujson",
|
|
"orjson",
|
|
"simplejson",
|
|
"json",
|
|
]
|
|
|
|
parser.add_argument(
|
|
"--disable",
|
|
nargs="+",
|
|
choices=known_libraries,
|
|
help=("Remove specified libraries from the benchmarks"),
|
|
default=[],
|
|
)
|
|
|
|
parser.add_argument(
|
|
"--factor",
|
|
type=float,
|
|
default=1.0,
|
|
help=("Specify as a fraction speed up benchmarks for development / testing"),
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
disabled_libraires = set(args.disable)
|
|
enabled_libraries = {}
|
|
for libname in known_libraries:
|
|
if libname not in disabled_libraires:
|
|
try:
|
|
module = importlib.import_module(libname)
|
|
except ImportError:
|
|
raise ImportError(f"{libname} is not available")
|
|
else:
|
|
enabled_libraries[libname] = module
|
|
|
|
# Ensure the modules are avilable in a the global scope
|
|
for libname, module in enabled_libraries.items():
|
|
print(f"Enabled {libname} benchmarks")
|
|
globals()[libname] = module
|
|
|
|
libraries = list(enabled_libraries.keys())
|
|
factor = args.factor
|
|
benchmark_array_doubles(libraries, factor)
|
|
benchmark_array_utf8_strings(libraries, factor)
|
|
benchmark_array_byte_strings(libraries, factor)
|
|
benchmark_medium_complex_object(libraries, factor)
|
|
benchmark_array_true_values(libraries, factor)
|
|
benchmark_array_of_dict_string_int_pairs(libraries, factor)
|
|
benchmark_dict_of_arrays_of_dict_string_int_pairs(libraries, factor)
|
|
benchmark_complex_object(libraries, factor)
|
|
|
|
results_output_table(libraries)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|