1
0
Fork 0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-06-10 22:36:06 +02:00
ultrajson/tests/test_ujson.py

1240 lines
35 KiB
Python
Raw Normal View History

import datetime as dt
import decimal
import enum
2020-03-15 09:51:19 +01:00
import io
import json
import math
2022-06-09 23:14:50 +02:00
import os.path
2020-03-08 09:46:49 +01:00
import re
2022-06-09 23:14:50 +02:00
import subprocess
import sys
import uuid
2020-03-08 18:35:01 +01:00
from collections import OrderedDict
from pathlib import Path
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
import pytest
2012-06-11 19:41:56 +02:00
import ujson
2012-06-12 00:27:04 +02:00
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
def assert_almost_equal(a, b):
assert round(abs(a - b), 7) == 0
2012-06-11 19:41:56 +02:00
2020-03-08 11:50:17 +01:00
def test_encode_decimal():
2020-03-08 09:46:49 +01:00
sut = decimal.Decimal("1337.1337")
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert decoded == 1337.1337
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_string_conversion():
2020-03-08 20:21:59 +01:00
test_input = "A string \\ / \b \f \n \r \t </script> &"
2020-03-08 09:46:49 +01:00
not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"'
html_encoded = (
'"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"'
)
not_slashes_escaped = '"A string \\\\ / \\b \\f \\n \\r \\t </script> &"'
def helper(expected_output, **encode_kwargs):
2020-03-08 20:21:59 +01:00
output = ujson.encode(test_input, **encode_kwargs)
2020-03-08 09:46:49 +01:00
assert output == expected_output
if encode_kwargs.get("escape_forward_slashes", True):
2020-03-08 20:21:59 +01:00
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
# Default behavior assumes encode_html_chars=False.
helper(not_html_encoded, ensure_ascii=True)
helper(not_html_encoded, ensure_ascii=False)
# Make sure explicit encode_html_chars=False works.
helper(not_html_encoded, ensure_ascii=True, encode_html_chars=False)
helper(not_html_encoded, ensure_ascii=False, encode_html_chars=False)
# Make sure explicit encode_html_chars=True does the encoding.
helper(html_encoded, ensure_ascii=True, encode_html_chars=True)
helper(html_encoded, ensure_ascii=False, encode_html_chars=True)
# Do escape forward slashes if disabled.
helper(not_slashes_escaped, escape_forward_slashes=False)
2020-03-08 11:50:17 +01:00
def test_write_escaped_string():
2020-03-08 09:46:49 +01:00
assert "\"\\u003cimg src='\\u0026amp;'\\/\\u003e\"" == ujson.dumps(
"<img src='&amp;'/>", encode_html_chars=True
)
2020-03-08 11:50:17 +01:00
def test_double_long_issue():
2020-03-08 09:46:49 +01:00
sut = {"a": -4342969734183514}
encoded = json.dumps(sut)
decoded = json.loads(encoded)
assert sut == decoded
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert sut == decoded
2020-03-08 11:50:17 +01:00
def test_double_long_decimal_issue():
2020-03-08 09:46:49 +01:00
sut = {"a": -12345678901234.56789012}
encoded = json.dumps(sut)
decoded = json.loads(encoded)
assert sut == decoded
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert sut == decoded
2020-11-11 16:54:57 +01:00
# NOTE: can't match exponents -9 to -5; Python 0-pads
@pytest.mark.parametrize("val", [1e-10, 1e-4, 1e10, 1e15, 1e16, 1e30])
2020-11-11 14:41:51 +01:00
def test_encode_float_string_rep(val):
assert ujson.dumps(val) == json.dumps(val)
2020-03-08 11:50:17 +01:00
def test_encode_decode_long_decimal():
2020-03-08 09:46:49 +01:00
sut = {"a": -528656961.4399388}
encoded = ujson.dumps(sut)
ujson.decode(encoded)
2020-03-08 11:50:17 +01:00
def test_decimal_decode_test():
2020-03-08 09:46:49 +01:00
sut = {"a": 4.56}
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert_almost_equal(sut["a"], decoded["a"])
2020-03-08 11:50:17 +01:00
def test_encode_double_conversion():
2020-03-08 20:21:59 +01:00
test_input = math.pi
output = ujson.encode(test_input)
assert round(test_input, 5) == round(json.loads(output), 5)
assert round(test_input, 5) == round(ujson.decode(output), 5)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_double_neg_conversion():
2020-03-08 20:21:59 +01:00
test_input = -math.pi
output = ujson.encode(test_input)
2020-03-08 09:46:49 +01:00
2020-03-08 20:21:59 +01:00
assert round(test_input, 5) == round(json.loads(output), 5)
assert round(test_input, 5) == round(ujson.decode(output), 5)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_array_of_nested_arrays():
2020-03-08 20:21:59 +01:00
test_input = [[[[]]]] * 20
output = ujson.encode(test_input)
assert test_input == json.loads(output)
# assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_array_of_doubles():
2020-03-08 20:21:59 +01:00
test_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10
output = ujson.encode(test_input)
assert test_input == json.loads(output)
# assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_string_conversion2():
2020-03-08 20:21:59 +01:00
test_input = "A string \\ / \b \f \n \r \t"
output = ujson.encode(test_input)
assert test_input == json.loads(output)
2020-03-08 09:46:49 +01:00
assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
2020-03-08 20:21:59 +01:00
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_control_escaping():
2020-03-08 20:21:59 +01:00
test_input = "\x19"
enc = ujson.encode(test_input)
2020-03-08 09:46:49 +01:00
dec = ujson.decode(enc)
2020-03-08 20:21:59 +01:00
assert test_input == dec
2020-03-15 09:51:19 +01:00
assert enc == json.dumps(test_input)
2020-03-08 09:46:49 +01:00
# Characters outside of Basic Multilingual Plane(larger than
# 16 bits) are represented as \UXXXXXXXX in python but should be encoded
# as \uXXXX\uXXXX in json.
2020-03-08 11:50:17 +01:00
def test_encode_unicode_bmp():
2020-03-08 09:46:49 +01:00
s = "\U0001f42e\U0001f42e\U0001F42D\U0001F42D" # 🐮🐮🐭🐭
encoded = ujson.dumps(s)
encoded_json = json.dumps(s)
if len(s) == 4:
assert len(encoded) == len(s) * 12 + 2
else:
assert len(encoded) == len(s) * 6 + 2
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
2020-03-15 09:51:19 +01:00
# ujson outputs a UTF-8 encoded str object
encoded = ujson.dumps(s, ensure_ascii=False)
2020-03-08 09:46:49 +01:00
# json outputs an unicode object
encoded_json = json.dumps(s, ensure_ascii=False)
assert len(encoded) == len(s) + 2 # original length + quotes
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
2020-03-08 11:50:17 +01:00
def test_encode_symbols():
2020-03-08 09:46:49 +01:00
s = "\u273f\u2661\u273f" # ✿♡✿
encoded = ujson.dumps(s)
encoded_json = json.dumps(s)
assert len(encoded) == len(s) * 6 + 2 # 6 characters + quotes
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
2020-03-15 09:51:19 +01:00
# ujson outputs a UTF-8 encoded str object
encoded = ujson.dumps(s, ensure_ascii=False)
2020-03-08 09:46:49 +01:00
# json outputs an unicode object
encoded_json = json.dumps(s, ensure_ascii=False)
assert len(encoded) == len(s) + 2 # original length + quotes
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
2020-03-08 11:50:17 +01:00
def test_encode_long_neg_conversion():
2020-03-08 20:21:59 +01:00
test_input = -9223372036854775808
output = ujson.encode(test_input)
2020-03-08 09:46:49 +01:00
json.loads(output)
ujson.decode(output)
2020-03-08 20:21:59 +01:00
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_list_conversion():
2020-03-08 20:21:59 +01:00
test_input = [1, 2, 3, 4]
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_dict_conversion():
2020-03-08 20:21:59 +01:00
test_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
2020-06-12 15:08:27 +02:00
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
2020-03-08 11:50:17 +01:00
def test_encode_dict_values_ref_counting():
2020-03-08 09:46:49 +01:00
import gc
gc.collect()
value = ["abc"]
data = {"1": value}
ref_count = sys.getrefcount(value)
ujson.dumps(data)
assert ref_count == sys.getrefcount(value)
2020-06-12 15:08:27 +02:00
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
@pytest.mark.parametrize("key", ["key", b"key", 1, True, False, None])
@pytest.mark.parametrize("sort_keys", [False, True])
def test_encode_dict_key_ref_counting(key, sort_keys):
import gc
gc.collect()
data = {key: "abc"}
ref_count = sys.getrefcount(key)
ujson.dumps(data, sort_keys=sort_keys)
assert ref_count == sys.getrefcount(key)
2020-03-08 11:50:17 +01:00
def test_encode_to_utf8():
2020-03-15 09:51:19 +01:00
test_input = b"\xe6\x97\xa5\xd1\x88".decode("utf-8")
2020-03-08 20:21:59 +01:00
enc = ujson.encode(test_input, ensure_ascii=False)
2020-03-08 09:46:49 +01:00
dec = ujson.decode(enc)
2020-03-08 20:21:59 +01:00
assert enc == json.dumps(test_input, ensure_ascii=False)
2020-03-08 09:46:49 +01:00
assert dec == json.loads(enc)
@pytest.mark.parametrize(
"test_input",
[
'{\n "obj": 31337\n}',
"{}",
"[]",
'{\n "a": {}\n}',
"[\n []\n]",
],
)
def test_encode_indent(test_input):
2020-09-17 14:39:38 +02:00
obj = ujson.decode(test_input)
output = ujson.encode(obj, indent=4)
assert test_input == output
assert output == json.dumps(obj, indent=4)
2020-09-17 14:39:38 +02:00
2020-03-08 11:50:17 +01:00
def test_decode_from_unicode():
2020-03-08 20:21:59 +01:00
test_input = '{"obj": 31337}'
dec1 = ujson.decode(test_input)
dec2 = ujson.decode(str(test_input))
2020-03-08 09:46:49 +01:00
assert dec1 == dec2
2020-03-08 11:50:17 +01:00
def test_encode_recursion_max():
2020-03-08 09:46:49 +01:00
# 8 is the max recursion depth
class O2:
member = 0
def toDict(self):
return {"member": self.member}
class O1:
member = 0
def toDict(self):
return {"member": self.member}
2020-03-08 20:21:59 +01:00
test_input = O1()
test_input.member = O2()
test_input.member.member = test_input
2020-03-08 09:46:49 +01:00
with pytest.raises(OverflowError):
2020-03-08 20:21:59 +01:00
ujson.encode(test_input)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_decode_dict():
2020-03-08 20:21:59 +01:00
test_input = "{}"
obj = ujson.decode(test_input)
2020-03-08 09:46:49 +01:00
assert {} == obj
2020-03-08 20:21:59 +01:00
test_input = '{"one": 1, "two": 2, "three": 3}'
obj = ujson.decode(test_input)
2020-03-08 09:46:49 +01:00
assert {"one": 1, "two": 2, "three": 3} == obj
2020-03-08 11:50:17 +01:00
def test_encode_unicode_4_bytes_utf8_fail():
2020-03-08 20:21:59 +01:00
test_input = b"\xfd\xbf\xbf\xbf\xbf\xbf"
2020-03-08 09:46:49 +01:00
with pytest.raises(OverflowError):
2020-05-08 17:57:46 +02:00
ujson.encode(test_input, reject_bytes=False)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_encode_null_character():
2020-03-08 20:21:59 +01:00
test_input = "31337 \x00 1337"
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
test_input = "\x00"
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
2020-03-08 09:46:49 +01:00
assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ")
2020-03-08 11:50:17 +01:00
def test_decode_null_character():
2020-03-08 20:21:59 +01:00
test_input = '"31337 \\u0000 31337"'
assert ujson.decode(test_input) == json.loads(test_input)
2020-03-08 09:46:49 +01:00
2020-03-08 11:50:17 +01:00
def test_dump_to_file():
2020-03-15 09:51:19 +01:00
f = io.StringIO()
2020-03-08 09:46:49 +01:00
ujson.dump([1, 2, 3], f)
assert "[1,2,3]" == f.getvalue()
2020-03-08 11:50:17 +01:00
def test_dump_to_file_like_object():
2020-03-08 20:21:59 +01:00
class FileLike:
2020-03-08 09:46:49 +01:00
def __init__(self):
self.bytes = ""
def write(self, bytes):
self.bytes += bytes
2020-03-08 20:21:59 +01:00
f = FileLike()
2020-03-08 09:46:49 +01:00
ujson.dump([1, 2, 3], f)
assert "[1,2,3]" == f.bytes
2020-03-08 11:50:17 +01:00
def test_dump_file_args_error():
2020-03-08 09:46:49 +01:00
with pytest.raises(TypeError):
ujson.dump([], "")
2020-03-08 11:50:17 +01:00
def test_load_file():
2020-03-15 09:51:19 +01:00
f = io.StringIO("[1,2,3,4]")
2020-03-08 09:46:49 +01:00
assert [1, 2, 3, 4] == ujson.load(f)
2020-03-08 11:50:17 +01:00
def test_load_file_like_object():
2020-03-08 20:21:59 +01:00
class FileLike:
2020-03-08 09:46:49 +01:00
def read(self):
try:
self.end
except AttributeError:
self.end = True
return "[1,2,3,4]"
2020-03-08 20:21:59 +01:00
f = FileLike()
2020-03-08 09:46:49 +01:00
assert [1, 2, 3, 4] == ujson.load(f)
2020-03-08 11:50:17 +01:00
def test_load_file_args_error():
2020-03-08 09:46:49 +01:00
with pytest.raises(TypeError):
ujson.load("[]")
def test_version():
assert re.search(
r"^\d+\.\d+(\.\d+)?", ujson.__version__
), "ujson.__version__ must be a string like '1.4.0'"
2020-03-08 11:50:17 +01:00
def test_decode_number_with32bit_sign_bit():
2020-03-08 09:46:49 +01:00
# Test that numbers that fit within 32 bits but would have the
# sign bit set (2**31 <= x < 2**32) are decoded properly.
docs = (
'{"id": 3590016419}',
'{"id": %s}' % 2**31,
'{"id": %s}' % 2**32,
'{"id": %s}' % ((2**32) - 1),
2020-03-08 09:46:49 +01:00
)
results = (3590016419, 2**31, 2**32, 2**32 - 1)
2020-03-08 09:46:49 +01:00
for doc, result in zip(docs, results):
assert ujson.decode(doc)["id"] == result
2020-03-08 11:50:17 +01:00
def test_encode_big_escape():
2020-03-08 09:46:49 +01:00
for x in range(10):
2020-03-15 09:51:19 +01:00
base = "\u00e5".encode()
2020-03-08 20:21:59 +01:00
test_input = base * 1024 * 1024 * 2
2020-05-08 17:57:46 +02:00
ujson.encode(test_input, reject_bytes=False)
2012-06-11 19:41:56 +02:00
2020-03-08 11:50:17 +01:00
def test_decode_big_escape():
2020-03-08 09:46:49 +01:00
for x in range(10):
2020-03-15 09:51:19 +01:00
base = "\u00e5".encode()
quote = b'"'
2020-03-08 20:21:59 +01:00
test_input = quote + (base * 1024 * 1024 * 2) + quote
ujson.decode(test_input)
2012-06-11 19:41:56 +02:00
2020-03-08 11:50:17 +01:00
def test_to_dict():
2020-03-08 09:46:49 +01:00
d = {"key": 31337}
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
class DictTest:
def toDict(self):
return d
2013-05-18 01:21:43 +02:00
2020-03-08 09:46:49 +01:00
def __json__(self):
return '"json defined"' # Fallback and shouldn't be called.
2013-05-18 01:21:43 +02:00
2020-03-08 09:46:49 +01:00
o = DictTest()
output = ujson.encode(o)
dec = ujson.decode(output)
assert dec == d
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
def test_object_with_json():
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
output_text = "this is the correct output"
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
class JSONTest:
def __json__(self):
return '"' + output_text + '"'
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
d = {"key": JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
assert dec == {"key": output_text}
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
def test_object_with_complex_json():
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
obj = {"foo": ["bar", "baz"]}
2013-05-18 01:21:43 +02:00
2020-03-08 09:46:49 +01:00
class JSONTest:
def __json__(self):
return ujson.encode(obj)
2020-03-08 09:46:49 +01:00
d = {"key": JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
assert dec == {"key": obj}
2012-06-11 19:41:56 +02:00
2020-03-08 09:46:49 +01:00
def test_object_with_json_type_error():
# __json__ must return a string, otherwise it should raise an error.
for return_value in (None, 1234, 12.34, True, {}):
class JSONTest:
def __json__(self):
2020-03-08 09:46:49 +01:00
return return_value
d = {"key": JSONTest()}
2020-03-08 09:46:49 +01:00
with pytest.raises(TypeError):
ujson.encode(d)
2020-03-08 09:46:49 +01:00
def test_object_with_json_attribute_error():
# If __json__ raises an error, make sure python actually raises it.
class JSONTest:
def __json__(self):
raise AttributeError
2020-03-08 09:46:49 +01:00
d = {"key": JSONTest()}
with pytest.raises(AttributeError):
ujson.encode(d)
2020-03-08 11:50:17 +01:00
def test_decode_array_empty():
2020-03-08 20:21:59 +01:00
test_input = "[]"
obj = ujson.decode(test_input)
2020-03-08 09:46:49 +01:00
assert [] == obj
def test_encode_surrogate_characters():
assert ujson.dumps("\udc7f") == r'"\udc7f"'
out = r'{"\ud800":"\udfff"}'
assert ujson.dumps({"\ud800": "\udfff"}) == out
assert ujson.dumps({"\ud800": "\udfff"}, sort_keys=True) == out
o = {b"\xed\xa0\x80": b"\xed\xbf\xbf"}
assert ujson.dumps(o, reject_bytes=False) == out
assert ujson.dumps(o, reject_bytes=False, sort_keys=True) == out
out2 = '{"\ud800":"\udfff"}'
assert ujson.dumps({"\ud800": "\udfff"}, ensure_ascii=False) == out2
assert ujson.dumps({"\ud800": "\udfff"}, ensure_ascii=False, sort_keys=True) == out2
@pytest.mark.parametrize(
"test_input, expected",
[
# Normal cases
(r'"\uD83D\uDCA9"', "\U0001F4A9"),
(r'"a\uD83D\uDCA9b"', "a\U0001F4A9b"),
# Unpaired surrogates
(r'"\uD800"', "\uD800"),
(r'"a\uD800b"', "a\uD800b"),
(r'"\uDEAD"', "\uDEAD"),
(r'"a\uDEADb"', "a\uDEADb"),
(r'"\uD83D\uD83D\uDCA9"', "\uD83D\U0001F4A9"),
(r'"\uDCA9\uD83D\uDCA9"', "\uDCA9\U0001F4A9"),
(r'"\uD83D\uDCA9\uD83D"', "\U0001F4A9\uD83D"),
(r'"\uD83D\uDCA9\uDCA9"', "\U0001F4A9\uDCA9"),
(r'"\uD83D \uDCA9"', "\uD83D \uDCA9"),
# No decoding of actual surrogate characters (rather than escaped ones)
('"\uD800"', "\uD800"),
('"\uDEAD"', "\uDEAD"),
('"\uD800a\uDEAD"', "\uD800a\uDEAD"),
('"\uD83D\uDCA9"', "\uD83D\uDCA9"),
],
)
def test_decode_surrogate_characters(test_input, expected):
assert ujson.loads(test_input) == expected
assert ujson.loads(test_input.encode("utf-8", "surrogatepass")) == expected
# Ensure that this matches stdlib's behaviour
assert json.loads(test_input) == expected
2020-03-08 11:50:17 +01:00
def test_sort_keys():
2020-03-08 09:46:49 +01:00
data = {"a": 1, "c": 1, "b": 1, "e": 1, "f": 1, "d": 1}
2020-03-08 18:35:01 +01:00
sorted_keys = ujson.dumps(data, sort_keys=True)
assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}'
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
"test_input",
[
"[31337]", # array one item
"18446744073709551615", # long unsigned value
"9223372036854775807", # big value
"-9223372036854775808", # small value
"{}\n\t ", # trailing whitespaces
],
)
def test_decode_no_assert(test_input):
ujson.decode(test_input)
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
2020-09-08 20:12:50 +02:00
"test_input, expected",
[
("31337", 31337),
("-31337", -31337),
2020-11-15 20:01:07 +01:00
("100000000000000000000.0", 1e20),
2020-09-08 20:12:50 +02:00
],
2020-03-08 18:35:01 +01:00
)
def test_decode(test_input, expected):
assert ujson.decode(test_input) == expected
@pytest.mark.parametrize(
"test_input",
[
"1337E40",
"1.337E40",
"1337E+9",
"1.337e+40",
"1337E40",
"1337e40",
"1.337E-4",
"1.337e-4",
],
)
def test_decode_numeric_int_exp(test_input):
output = ujson.decode(test_input)
assert output == json.loads(test_input)
2022-06-16 05:24:48 +02:00
@pytest.mark.parametrize(
"i",
[
-(10**25), # very negative
-(2**64), # too large in magnitude for a uint64
-(2**63) - 1, # too small for a int64
2**64, # too large for a uint64
10**25, # very positive
],
)
@pytest.mark.parametrize("mode", ["encode", "decode"])
def test_encode_decode_big_int(i, mode):
2022-06-16 05:24:48 +02:00
# Test ints that are too large to be represented by a C integer type
2022-06-17 16:15:33 +02:00
for python_object in (i, [i], {"i": i}):
json_string = json.dumps(python_object, separators=(",", ":"))
if mode == "encode":
if hasattr(sys, "pypy_version_info"):
# https://foss.heptapod.net/pypy/pypy/-/issues/3765
pytest.skip("PyPy can't serialise big ints")
2022-06-17 16:15:33 +02:00
assert ujson.encode(python_object) == json_string
if isinstance(python_object, dict):
assert ujson.encode(python_object, sort_keys=True) == json_string
else:
2022-06-17 16:15:33 +02:00
assert ujson.decode(json_string) == python_object
2022-06-16 05:24:48 +02:00
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
"test_input, expected",
[
('{{1337:""}}', ujson.JSONDecodeError), # broken dict key type leak test
('{{"key":"}', ujson.JSONDecodeError), # broken dict leak test
('{{"key":"}', ujson.JSONDecodeError), # broken dict leak test
("[[[true", ujson.JSONDecodeError), # broken list leak test
2020-03-08 18:35:01 +01:00
],
)
def test_decode_range_raises(test_input, expected):
for x in range(1000):
with pytest.raises(expected):
2020-03-08 18:35:01 +01:00
ujson.decode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
("fdsa sda v9sa fdsa", ujson.JSONDecodeError), # gibberish
("[", ujson.JSONDecodeError), # broken array start
("{", ujson.JSONDecodeError), # broken object start
("]", ujson.JSONDecodeError), # broken array end
("}", ujson.JSONDecodeError), # broken object end
('{"one":1,}', ujson.JSONDecodeError), # object trailing comma fail
('"TESTING', ujson.JSONDecodeError), # string unterminated
('"TESTING\\"', ujson.JSONDecodeError), # string bad escape
("tru", ujson.JSONDecodeError), # true broken
("fa", ujson.JSONDecodeError), # false broken
("n", ujson.JSONDecodeError), # null broken
("{{{{31337}}}}", ujson.JSONDecodeError), # dict with no key
('{{{{"key"}}}}', ujson.JSONDecodeError), # dict with no colon or value
('{{{{"key":}}}}', ujson.JSONDecodeError), # dict with no value
("[31337,]", ujson.JSONDecodeError), # array trailing comma fail
("[,31337]", ujson.JSONDecodeError), # array leading comma fail
("[,]", ujson.JSONDecodeError), # array only comma fail
("[]]", ujson.JSONDecodeError), # array unmatched bracket fail
("{}\n\t a", ujson.JSONDecodeError), # with trailing non whitespaces
('{"age", 44}', ujson.JSONDecodeError), # read bad object syntax
2020-03-08 18:35:01 +01:00
],
)
def test_decode_raises(test_input, expected):
with pytest.raises(expected):
ujson.decode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
("[", ujson.JSONDecodeError), # array depth too big
("{", ujson.JSONDecodeError), # object depth too big
],
)
def test_decode_raises_for_long_input(test_input, expected):
with pytest.raises(expected):
ujson.decode(test_input * (1024 * 1024))
def test_decode_exception_is_value_error():
assert issubclass(ujson.JSONDecodeError, ValueError)
assert ujson.JSONDecodeError is not ValueError
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
"test_input, expected",
[
(True, "true"),
(False, "false"),
(None, "null"),
([True, False, None], "[true,false,null]"),
((True, False, None), "[true,false,null]"),
],
)
def test_dumps(test_input, expected):
assert ujson.dumps(test_input) == expected
class SomeObject:
def __init__(self, message, exception=None):
self._message = message
self._exception = exception
def __repr__(self):
if self._exception:
raise self._exception
return self._message
@pytest.mark.parametrize(
"test_input, expected_exception, expected_message",
[
2020-03-15 09:51:19 +01:00
(set(), TypeError, "set() is not JSON serializable"),
({1, 2, 3}, TypeError, "{1, 2, 3} is not JSON serializable"),
(SomeObject("Some Object"), TypeError, "Some Object is not JSON serializable"),
(SomeObject("\ud800"), UnicodeEncodeError, None),
(SomeObject(None, KeyboardInterrupt), KeyboardInterrupt, None),
],
)
def test_dumps_raises(test_input, expected_exception, expected_message):
with pytest.raises(expected_exception) as e:
ujson.dumps(test_input)
if expected_message:
assert str(e.value) == expected_message
@pytest.mark.parametrize(
"test_input, expected_exception",
[
(float("nan"), OverflowError),
(float("inf"), OverflowError),
(-float("inf"), OverflowError),
],
)
def test_encode_raises_allow_nan(test_input, expected_exception):
with pytest.raises(expected_exception):
ujson.dumps(test_input, allow_nan=False)
2014-10-28 23:06:02 +01:00
2013-05-18 01:21:43 +02:00
def test_nan_inf_support():
2022-04-04 20:22:24 +02:00
# Test ported from pandas
text = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
data = ujson.loads(text)
2022-04-03 22:41:32 +02:00
expected = [
"a",
float("nan"),
"NaN",
float("inf"),
"Infinity",
-float("inf"),
"-Infinity",
]
for a, b in zip(data, expected):
2022-04-03 22:16:26 +02:00
assert a == b or math.isnan(a) and math.isnan(b)
2022-04-04 20:22:24 +02:00
def test_special_singletons():
pos_inf = ujson.loads("Infinity")
neg_inf = ujson.loads("-Infinity")
nan = ujson.loads("NaN")
null = ujson.loads("null")
assert math.isinf(pos_inf) and pos_inf > 0
assert math.isinf(neg_inf) and neg_inf < 0
assert math.isnan(nan)
assert null is None
@pytest.mark.parametrize(
"test_input, expected_message",
2022-04-04 20:22:24 +02:00
[
("n", "Unexpected character .* 'null'"),
("N", "Unexpected character .*'NaN'"),
("NA", "Unexpected character .* 'NaN'"),
("Na N", "Unexpected character .* 'NaN'"),
("nan", "Unexpected character .* 'null'"),
("none", "Unexpected character .* 'null'"),
("i", "Expected object or value"),
("I", "Unexpected character .* 'Infinity'"),
("Inf", "Unexpected character .* 'Infinity'"),
("InfinitY", "Unexpected character .* 'Infinity'"),
("-i", "Trailing data"),
("-I", "Unexpected character .* '-Infinity'"),
("-Inf", "Unexpected character .* '-Infinity'"),
("-InfinitY", "Unexpected character .* '-Infinity'"),
("- i", "Trailing data"),
("- I", "Trailing data"),
("- Inf", "Trailing data"),
("- InfinitY", "Trailing data"),
2022-04-04 20:22:24 +02:00
],
)
def test_incomplete_special_inputs(test_input, expected_message):
with pytest.raises(ujson.JSONDecodeError, match=expected_message):
2022-04-04 20:22:24 +02:00
ujson.loads(test_input)
@pytest.mark.parametrize(
"test_input, expected_message",
2022-04-04 20:22:24 +02:00
[
("NaNaNaN", "Trailing data"),
("Infinity and Beyond", "Trailing data"),
("-Infinity-and-Beyond", "Trailing data"),
("NaN!", "Trailing data"),
("Infinity!", "Trailing data"),
("-Infinity!", "Trailing data"),
2022-04-04 20:22:24 +02:00
],
)
def test_overcomplete_special_inputs(test_input, expected_message):
with pytest.raises(ujson.JSONDecodeError, match=expected_message):
2022-04-04 20:22:24 +02:00
ujson.loads(test_input)
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
"test_input",
[
{
"key1": "value1",
"key1": "value1",
"key1": "value1",
"key1": "value1",
"key1": "value1",
"key1": "value1",
},
{
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
},
],
)
def test_encode_no_assert(test_input):
ujson.encode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
(1.0, "1.0"),
(OrderedDict([(1, 1), (0, 0), (8, 8), (2, 2)]), '{"1":1,"0":0,"8":8,"2":2}'),
({"a": float("NaN")}, '{"a":NaN}'),
({"a": float("inf")}, '{"a":Infinity}'),
({"a": -float("inf")}, '{"a":-Infinity}'),
2020-03-08 18:35:01 +01:00
],
)
def test_encode(test_input, expected):
assert ujson.encode(test_input) == expected
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
"test_input",
[
[
9223372036854775807,
9223372036854775807,
9223372036854775807,
9223372036854775807,
9223372036854775807,
9223372036854775807,
],
2020-09-08 20:12:50 +02:00
[
18446744073709551615,
18446744073709551615,
18446744073709551615,
],
2020-03-08 18:35:01 +01:00
],
)
def test_encode_list_long_conversion(test_input):
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
2020-09-08 20:12:50 +02:00
"test_input",
[
9223372036854775807,
18446744073709551615,
],
2020-03-08 18:35:01 +01:00
)
def test_encode_long_conversion(test_input):
output = ujson.encode(test_input)
2020-03-08 18:35:01 +01:00
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
2020-09-08 20:12:50 +02:00
@pytest.mark.parametrize(
"test_input",
[
[[[[]]]],
31337,
-31337,
None,
True,
False,
],
)
2020-03-08 20:34:23 +01:00
def test_encode_decode(test_input):
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
@pytest.mark.parametrize(
"test_input",
[
"Räksmörgås اسامة بن محمد بن عوض بن لادن",
"\xe6\x97\xa5\xd1\x88",
"\xf0\x90\x8d\x86", # surrogate pair
"\xf0\x91\x80\xb0TRAILINGNORMAL", # 4 bytes UTF8
"\xf3\xbf\xbf\xbfTRAILINGNORMAL", # 4 bytes UTF8 highest
],
)
def test_encode_unicode(test_input):
enc = ujson.encode(test_input)
dec = ujson.decode(enc)
2020-03-15 09:51:19 +01:00
assert enc == json.dumps(test_input)
2020-03-08 20:34:23 +01:00
assert dec == json.loads(enc)
2020-03-08 18:35:01 +01:00
@pytest.mark.parametrize(
"test_input, expected",
[
("-1.1234567893", -1.1234567893),
("-1.234567893", -1.234567893),
("-1.34567893", -1.34567893),
("-1.4567893", -1.4567893),
("-1.567893", -1.567893),
("-1.67893", -1.67893),
("-1.7893", -1.7893),
("-1.893", -1.893),
("-1.3", -1.3),
("1.1234567893", 1.1234567893),
("1.234567893", 1.234567893),
("1.34567893", 1.34567893),
("1.4567893", 1.4567893),
("1.567893", 1.567893),
("1.67893", 1.67893),
("1.7893", 1.7893),
("1.893", 1.893),
("1.3", 1.3),
("true", True),
("false", False),
("null", None),
(" [ true, false,null] ", [True, False, None]),
],
)
def test_loads(test_input, expected):
assert ujson.loads(test_input) == expected
2015-04-09 11:29:10 +02:00
2020-05-08 17:40:03 +02:00
def test_reject_bytes_default():
data = {"a": b"b"}
2020-05-08 17:57:46 +02:00
with pytest.raises(TypeError):
ujson.dumps(data)
2020-05-08 17:40:03 +02:00
def test_reject_bytes_true():
data = {"a": b"b"}
with pytest.raises(TypeError):
ujson.dumps(data, reject_bytes=True)
def test_reject_bytes_false():
data = {"a": b"b"}
assert ujson.dumps(data, reject_bytes=False) == '{"a":"b"}'
def test_encode_special_keys():
data = {None: 0, True: 1, False: 2}
assert ujson.dumps(data) == '{"null":0,"true":1,"false":2}'
data = {None: 0}
assert ujson.dumps(data, sort_keys=True) == '{"null":0}'
data = {True: 1, False: 2}
assert ujson.dumps(data, sort_keys=True) == '{"false":2,"true":1}'
2020-10-30 21:48:27 +01:00
def test_default_function():
iso8601_time_format = "%Y-%m-%dT%H:%M:%S.%f"
class CustomObject:
pass
class UnjsonableObject:
pass
def default(value):
if isinstance(value, dt.datetime):
return value.strftime(iso8601_time_format)
elif isinstance(value, uuid.UUID):
return value.hex
elif isinstance(value, CustomObject):
raise ValueError("invalid value")
return value
now = dt.datetime.now()
expected_output = '"%s"' % now.strftime(iso8601_time_format)
assert ujson.dumps(now, default=default) == expected_output
uuid4 = uuid.uuid4()
expected_output = '"%s"' % uuid4.hex
assert ujson.dumps(uuid4, default=default) == expected_output
custom_obj = CustomObject()
with pytest.raises(ValueError, match="invalid value"):
ujson.dumps(custom_obj, default=default)
unjsonable_obj = UnjsonableObject()
with pytest.raises(TypeError, match="maximum recursion depth exceeded"):
ujson.dumps(unjsonable_obj, default=default)
@pytest.mark.parametrize("indent", list(range(65537, 65542)))
def test_dump_huge_indent(indent):
ujson.encode({"a": True}, indent=indent)
@pytest.mark.parametrize("first_length", list(range(2, 7)))
@pytest.mark.parametrize("second_length", list(range(10919, 10924)))
def test_dump_long_string(first_length, second_length):
ujson.dumps(["a" * first_length, "\x00" * second_length])
def test_dump_indented_nested_list():
a = _a = []
for i in range(20):
_a.append(list(range(i)))
_a = _a[-1]
ujson.dumps(a, indent=i)
@pytest.mark.parametrize("indent", [0, 1, 2, 4, 5, 8, 49])
def test_issue_334(indent):
path = Path(__file__).with_name("334-reproducer.json")
a = ujson.loads(path.read_bytes())
ujson.dumps(a, indent=indent)
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
def test_default_ref_counting():
class DefaultRefCountingClass:
def __init__(self, value):
self._value = value
def convert(self):
if self._value > 1:
return type(self)(self._value - 1)
return 0
import gc
gc.collect()
ujson.dumps(DefaultRefCountingClass(3), default=lambda x: x.convert())
assert not any(
type(o).__name__ == "DefaultRefCountingClass" for o in gc.get_objects()
)
@pytest.mark.parametrize("sort_keys", [False, True])
def test_obj_str_exception(sort_keys):
class Obj:
def __str__(self):
raise NotImplementedError
key = Obj()
getrefcount = getattr(sys, "getrefcount", lambda x: 0)
old = getrefcount(key)
with pytest.raises(NotImplementedError):
ujson.dumps({key: 1}, sort_keys=sort_keys)
assert getrefcount(key) == old
2022-06-09 23:14:50 +02:00
def no_memory_leak(func_code, n=None):
code = f"import functools, ujson; func = {func_code}"
path = os.path.join(os.path.dirname(__file__), "memory.py")
n = [str(n)] if n is not None else []
p = subprocess.run([sys.executable, path, code] + n)
2022-06-09 23:14:50 +02:00
assert p.returncode == 0
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
2022-06-09 23:14:50 +02:00
@pytest.mark.parametrize("input", ['["a" * 11000, b""]'])
def test_no_memory_leak_encoding_errors(input):
no_memory_leak(f"functools.partial(ujson.dumps, {input})")
@pytest.mark.parametrize(
"separators, expected",
[
(None, '{"a":0,"b":1}'),
((",", ":"), '{"a":0,"b":1}'),
((", ", ": "), '{"a": 0, "b": 1}'),
# And some weird values, even though they produce invalid JSON
(("\u203d", "\u00a1"), '{"a"\u00a10\u203d"b"\u00a11}'),
(("i\x00", "k\x00"), '{"a"k\x000i\x00"b"k\x001}'),
(("\udc80", "\udc81"), '{"a"\udc810\udc80"b"\udc811}'),
],
)
def test_separators(separators, expected):
assert ujson.dumps({"a": 0, "b": 1}, separators=separators) == expected
@pytest.mark.parametrize(
"separators, expected_exception",
[
(True, TypeError),
(0, TypeError),
(b"", TypeError),
((), ValueError),
((",",), ValueError),
((",", ":", "x"), ValueError),
((True, 0), TypeError),
((",", True), TypeError),
((True, ":"), TypeError),
((b",", b":"), TypeError),
],
)
def test_separators_errors(separators, expected_exception):
with pytest.raises(expected_exception):
ujson.dumps({"a": 0, "b": 1}, separators=separators)
def test_loads_bytes_like():
assert ujson.loads(b"123") == 123
if hasattr(sys, "pypy_version_info"):
with pytest.raises(TypeError, match="PyPy"):
ujson.loads(memoryview(b"{}"))
else:
assert ujson.loads(memoryview(b'["a", "b", "c"]')) == ["a", "b", "c"]
assert ujson.loads(bytearray(b"99")) == 99
assert ujson.loads('"🦄🐳"'.encode()) == "🦄🐳"
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
def test_loads_bytes_like_refcounting():
import gc
gc.collect()
buffer = b'{"a": 99}'
old = sys.getrefcount(buffer)
assert ujson.loads(buffer) == {"a": 99}
assert sys.getrefcount(buffer) == old
buffer = b'{"a": invalid}'
old = sys.getrefcount(buffer)
with pytest.raises(ValueError):
ujson.loads(buffer)
assert sys.getrefcount(buffer) == old
def test_loads_non_c_contiguous():
buffer = memoryview(b"".join(bytes([i]) + b"_" for i in b"[1, 2, 3]"))[::2]
assert not buffer.c_contiguous
assert ujson.loads(bytes(buffer)) == [1, 2, 3]
with pytest.raises(TypeError):
ujson.loads(buffer)
@pytest.mark.parametrize(
"enum_classes, value, expected",
[
((enum.IntEnum,), 42, "42"),
((float, enum.Enum), 3.1416, "3.1416"),
],
)
def test_enum(enum_classes, value, expected):
class MyEnum(*enum_classes):
FOO = value
assert ujson.dumps(MyEnum.FOO) == expected
"""
The following checks are not part of the standard test suite.
They can be run manually as follows:
python -c 'from tests.test_ujson import check_foo; check_foo()'
"""
def check_decode_decimal_no_int_overflow():
# Requires enough free RAM to hold a ~4GB string in memory
decoded = ujson.decode(r'[0.123456789,"{}"]'.format("a" * (2**32 - 5)))
assert decoded[0] == 0.123456789
2012-06-11 19:41:56 +02:00
"""
2020-03-08 11:50:17 +01:00
def test_decode_numeric_int_frc_overflow():
2012-06-11 19:41:56 +02:00
input = "X.Y"
raise NotImplementedError("Implement this test!")
2020-03-08 11:50:17 +01:00
def test_decode_string_unicode_escape():
2012-06-11 19:41:56 +02:00
input = "\u3131"
raise NotImplementedError("Implement this test!")
2020-03-08 11:50:17 +01:00
def test_decode_string_unicode_broken_escape():
2012-06-11 19:41:56 +02:00
input = "\u3131"
raise NotImplementedError("Implement this test!")
2020-03-08 11:50:17 +01:00
def test_decode_string_unicode_invalid_escape():
2012-06-11 19:41:56 +02:00
input = "\u3131"
raise NotImplementedError("Implement this test!")
2020-03-08 11:50:17 +01:00
def test_decode_string_utf8():
2012-06-11 19:41:56 +02:00
input = "someutfcharacters"
raise NotImplementedError("Implement this test!")
"""
"""
# Use this to look for memory leaks
2012-06-11 19:41:56 +02:00
if __name__ == '__main__':
2020-03-08 09:46:49 +01:00
import unittest
2012-06-11 19:41:56 +02:00
from guppy import hpy
hp = hpy()
hp.setrelheap()
while True:
try:
unittest.main()
except SystemExit:
pass
2012-06-11 19:41:56 +02:00
heap = hp.heapu()
print(heap)
"""