1
0
Fork 0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-05-06 12:56:15 +02:00
ultrajson/tests/test_ujson.py
2020-11-11 14:41:51 +01:00

858 lines
23 KiB
Python

import decimal
import io
import json
import math
import re
import sys
from collections import OrderedDict
import pytest
import ujson
def assert_almost_equal(a, b):
assert round(abs(a - b), 7) == 0
def test_encode_decimal():
sut = decimal.Decimal("1337.1337")
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert decoded == 1337.1337
def test_encode_string_conversion():
test_input = "A string \\ / \b \f \n \r \t </script> &"
not_html_encoded = '"A string \\\\ \\/ \\b \\f \\n \\r \\t <\\/script> &"'
html_encoded = (
'"A string \\\\ \\/ \\b \\f \\n \\r \\t \\u003c\\/script\\u003e \\u0026"'
)
not_slashes_escaped = '"A string \\\\ / \\b \\f \\n \\r \\t </script> &"'
def helper(expected_output, **encode_kwargs):
output = ujson.encode(test_input, **encode_kwargs)
assert output == expected_output
if encode_kwargs.get("escape_forward_slashes", True):
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
# Default behavior assumes encode_html_chars=False.
helper(not_html_encoded, ensure_ascii=True)
helper(not_html_encoded, ensure_ascii=False)
# Make sure explicit encode_html_chars=False works.
helper(not_html_encoded, ensure_ascii=True, encode_html_chars=False)
helper(not_html_encoded, ensure_ascii=False, encode_html_chars=False)
# Make sure explicit encode_html_chars=True does the encoding.
helper(html_encoded, ensure_ascii=True, encode_html_chars=True)
helper(html_encoded, ensure_ascii=False, encode_html_chars=True)
# Do escape forward slashes if disabled.
helper(not_slashes_escaped, escape_forward_slashes=False)
def test_write_escaped_string():
assert "\"\\u003cimg src='\\u0026amp;'\\/\\u003e\"" == ujson.dumps(
"<img src='&amp;'/>", encode_html_chars=True
)
def test_double_long_issue():
sut = {"a": -4342969734183514}
encoded = json.dumps(sut)
decoded = json.loads(encoded)
assert sut == decoded
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert sut == decoded
def test_double_long_decimal_issue():
sut = {"a": -12345678901234.56789012}
encoded = json.dumps(sut)
decoded = json.loads(encoded)
assert sut == decoded
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert sut == decoded
@pytest.mark.parametrize("val", [1e10, 1e15, 1e16, 1e20])
def test_encode_float_string_rep(val):
assert ujson.dumps(val) == json.dumps(val)
def test_encode_decode_long_decimal():
sut = {"a": -528656961.4399388}
encoded = ujson.dumps(sut)
ujson.decode(encoded)
def test_decimal_decode_test():
sut = {"a": 4.56}
encoded = ujson.encode(sut)
decoded = ujson.decode(encoded)
assert_almost_equal(sut["a"], decoded["a"])
def test_encode_double_conversion():
test_input = math.pi
output = ujson.encode(test_input)
assert round(test_input, 5) == round(json.loads(output), 5)
assert round(test_input, 5) == round(ujson.decode(output), 5)
def test_encode_double_neg_conversion():
test_input = -math.pi
output = ujson.encode(test_input)
assert round(test_input, 5) == round(json.loads(output), 5)
assert round(test_input, 5) == round(ujson.decode(output), 5)
def test_encode_array_of_nested_arrays():
test_input = [[[[]]]] * 20
output = ujson.encode(test_input)
assert test_input == json.loads(output)
# assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
def test_encode_array_of_doubles():
test_input = [31337.31337, 31337.31337, 31337.31337, 31337.31337] * 10
output = ujson.encode(test_input)
assert test_input == json.loads(output)
# assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
def test_encode_string_conversion2():
test_input = "A string \\ / \b \f \n \r \t"
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == '"A string \\\\ \\/ \\b \\f \\n \\r \\t"'
assert test_input == ujson.decode(output)
def test_encode_control_escaping():
test_input = "\x19"
enc = ujson.encode(test_input)
dec = ujson.decode(enc)
assert test_input == dec
assert enc == json.dumps(test_input)
# Characters outside of Basic Multilingual Plane(larger than
# 16 bits) are represented as \UXXXXXXXX in python but should be encoded
# as \uXXXX\uXXXX in json.
def test_encode_unicode_bmp():
s = "\U0001f42e\U0001f42e\U0001F42D\U0001F42D" # 🐮🐮🐭🐭
encoded = ujson.dumps(s)
encoded_json = json.dumps(s)
if len(s) == 4:
assert len(encoded) == len(s) * 12 + 2
else:
assert len(encoded) == len(s) * 6 + 2
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
# ujson outputs a UTF-8 encoded str object
encoded = ujson.dumps(s, ensure_ascii=False)
# json outputs an unicode object
encoded_json = json.dumps(s, ensure_ascii=False)
assert len(encoded) == len(s) + 2 # original length + quotes
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
def test_encode_symbols():
s = "\u273f\u2661\u273f" # ✿♡✿
encoded = ujson.dumps(s)
encoded_json = json.dumps(s)
assert len(encoded) == len(s) * 6 + 2 # 6 characters + quotes
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
# ujson outputs a UTF-8 encoded str object
encoded = ujson.dumps(s, ensure_ascii=False)
# json outputs an unicode object
encoded_json = json.dumps(s, ensure_ascii=False)
assert len(encoded) == len(s) + 2 # original length + quotes
assert encoded == encoded_json
decoded = ujson.loads(encoded)
assert s == decoded
def test_encode_long_neg_conversion():
test_input = -9223372036854775808
output = ujson.encode(test_input)
json.loads(output)
ujson.decode(output)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
def test_encode_list_conversion():
test_input = [1, 2, 3, 4]
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
def test_encode_dict_conversion():
test_input = {"k1": 1, "k2": 2, "k3": 3, "k4": 4}
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
assert test_input == ujson.decode(output)
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
def test_encode_dict_values_ref_counting():
import gc
gc.collect()
value = ["abc"]
data = {"1": value}
ref_count = sys.getrefcount(value)
ujson.dumps(data)
assert ref_count == sys.getrefcount(value)
@pytest.mark.skipif(
hasattr(sys, "pypy_version_info"), reason="PyPy uses incompatible GC"
)
def test_encode_dict_key_ref_counting():
import gc
gc.collect()
key = "key"
data = {key: "abc"}
ref_count = sys.getrefcount(key)
ujson.dumps(data)
assert ref_count == sys.getrefcount(key)
def test_encode_to_utf8():
test_input = b"\xe6\x97\xa5\xd1\x88".decode("utf-8")
enc = ujson.encode(test_input, ensure_ascii=False)
dec = ujson.decode(enc)
assert enc == json.dumps(test_input, ensure_ascii=False)
assert dec == json.loads(enc)
def test_encode_indent():
test_input = '{\n "obj": 31337\n}'
obj = ujson.decode(test_input)
output = ujson.encode(obj, indent=4)
assert test_input == output
def test_decode_from_unicode():
test_input = '{"obj": 31337}'
dec1 = ujson.decode(test_input)
dec2 = ujson.decode(str(test_input))
assert dec1 == dec2
def test_encode_recursion_max():
# 8 is the max recursion depth
class O2:
member = 0
def toDict(self):
return {"member": self.member}
class O1:
member = 0
def toDict(self):
return {"member": self.member}
test_input = O1()
test_input.member = O2()
test_input.member.member = test_input
with pytest.raises(OverflowError):
ujson.encode(test_input)
def test_decode_dict():
test_input = "{}"
obj = ujson.decode(test_input)
assert {} == obj
test_input = '{"one": 1, "two": 2, "three": 3}'
obj = ujson.decode(test_input)
assert {"one": 1, "two": 2, "three": 3} == obj
def test_encode_unicode_4_bytes_utf8_fail():
test_input = b"\xfd\xbf\xbf\xbf\xbf\xbf"
with pytest.raises(OverflowError):
ujson.encode(test_input, reject_bytes=False)
def test_encode_null_character():
test_input = "31337 \x00 1337"
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
test_input = "\x00"
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ")
def test_decode_null_character():
test_input = '"31337 \\u0000 31337"'
assert ujson.decode(test_input) == json.loads(test_input)
def test_dump_to_file():
f = io.StringIO()
ujson.dump([1, 2, 3], f)
assert "[1,2,3]" == f.getvalue()
def test_dump_to_file_like_object():
class FileLike:
def __init__(self):
self.bytes = ""
def write(self, bytes):
self.bytes += bytes
f = FileLike()
ujson.dump([1, 2, 3], f)
assert "[1,2,3]" == f.bytes
def test_dump_file_args_error():
with pytest.raises(TypeError):
ujson.dump([], "")
def test_load_file():
f = io.StringIO("[1,2,3,4]")
assert [1, 2, 3, 4] == ujson.load(f)
def test_load_file_like_object():
class FileLike:
def read(self):
try:
self.end
except AttributeError:
self.end = True
return "[1,2,3,4]"
f = FileLike()
assert [1, 2, 3, 4] == ujson.load(f)
def test_load_file_args_error():
with pytest.raises(TypeError):
ujson.load("[]")
def test_version():
assert re.search(
r"^\d+\.\d+(\.\d+)?", ujson.__version__
), "ujson.__version__ must be a string like '1.4.0'"
def test_decode_number_with32bit_sign_bit():
# Test that numbers that fit within 32 bits but would have the
# sign bit set (2**31 <= x < 2**32) are decoded properly.
docs = (
'{"id": 3590016419}',
'{"id": %s}' % 2 ** 31,
'{"id": %s}' % 2 ** 32,
'{"id": %s}' % ((2 ** 32) - 1),
)
results = (3590016419, 2 ** 31, 2 ** 32, 2 ** 32 - 1)
for doc, result in zip(docs, results):
assert ujson.decode(doc)["id"] == result
def test_encode_big_escape():
for x in range(10):
base = "\u00e5".encode()
test_input = base * 1024 * 1024 * 2
ujson.encode(test_input, reject_bytes=False)
def test_decode_big_escape():
for x in range(10):
base = "\u00e5".encode()
quote = b'"'
test_input = quote + (base * 1024 * 1024 * 2) + quote
ujson.decode(test_input)
def test_to_dict():
d = {"key": 31337}
class DictTest:
def toDict(self):
return d
def __json__(self):
return '"json defined"' # Fallback and shouldn't be called.
o = DictTest()
output = ujson.encode(o)
dec = ujson.decode(output)
assert dec == d
def test_object_with_json():
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
output_text = "this is the correct output"
class JSONTest:
def __json__(self):
return '"' + output_text + '"'
d = {"key": JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
assert dec == {"key": output_text}
def test_object_with_complex_json():
# If __json__ returns a string, then that string
# will be used as a raw JSON snippet in the object.
obj = {"foo": ["bar", "baz"]}
class JSONTest:
def __json__(self):
return ujson.encode(obj)
d = {"key": JSONTest()}
output = ujson.encode(d)
dec = ujson.decode(output)
assert dec == {"key": obj}
def test_object_with_json_type_error():
# __json__ must return a string, otherwise it should raise an error.
for return_value in (None, 1234, 12.34, True, {}):
class JSONTest:
def __json__(self):
return return_value
d = {"key": JSONTest()}
with pytest.raises(TypeError):
ujson.encode(d)
def test_object_with_json_attribute_error():
# If __json__ raises an error, make sure python actually raises it.
class JSONTest:
def __json__(self):
raise AttributeError
d = {"key": JSONTest()}
with pytest.raises(AttributeError):
ujson.encode(d)
def test_decode_array_empty():
test_input = "[]"
obj = ujson.decode(test_input)
assert [] == obj
def test_encoding_invalid_unicode_character():
s = "\udc7f"
with pytest.raises(UnicodeEncodeError):
ujson.dumps(s)
def test_sort_keys():
data = {"a": 1, "c": 1, "b": 1, "e": 1, "f": 1, "d": 1}
sorted_keys = ujson.dumps(data, sort_keys=True)
assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}'
@pytest.mark.parametrize(
"test_input",
[
"[31337]", # array one item
"18446744073709551615", # long unsigned value
"9223372036854775807", # big value
"-9223372036854775808", # small value
"{}\n\t ", # trailing whitespaces
],
)
def test_decode_no_assert(test_input):
ujson.decode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
("31337", 31337),
("-31337", -31337),
],
)
def test_decode(test_input, expected):
assert ujson.decode(test_input) == expected
@pytest.mark.parametrize(
"test_input",
[
"1337E40",
"1.337E40",
"1337E+9",
"1.337e+40",
"1337E40",
"1337e40",
"1.337E-4",
"1.337e-4",
],
)
def test_decode_numeric_int_exp(test_input):
output = ujson.decode(test_input)
assert output == json.loads(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
('{{1337:""}}', ValueError), # broken dict key type leak test
('{{"key":"}', ValueError), # broken dict leak test
('{{"key":"}', ValueError), # broken dict leak test
("[[[true", ValueError), # broken list leak test
],
)
def test_decode_range_raises(test_input, expected):
for x in range(1000):
with pytest.raises(ValueError):
ujson.decode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
("fdsa sda v9sa fdsa", ValueError), # jibberish
("[", ValueError), # broken array start
("{", ValueError), # broken object start
("]", ValueError), # broken array end
("}", ValueError), # broken object end
('{"one":1,}', ValueError), # object trailing comma fail
('"TESTING', ValueError), # string unterminated
('"TESTING\\"', ValueError), # string bad escape
("tru", ValueError), # true broken
("fa", ValueError), # false broken
("n", ValueError), # null broken
("{{{{31337}}}}", ValueError), # dict with no key
('{{{{"key"}}}}', ValueError), # dict with no colon or value
('{{{{"key":}}}}', ValueError), # dict with no value
("[31337,]", ValueError), # array trailing comma fail
("[,31337]", ValueError), # array leading comma fail
("[,]", ValueError), # array only comma fail
("[]]", ValueError), # array unmatched bracket fail
("18446744073709551616", ValueError), # too big value
("-90223372036854775809", ValueError), # too small value
("18446744073709551616", ValueError), # very too big value
("-90223372036854775809", ValueError), # very too small value
("{}\n\t a", ValueError), # with trailing non whitespaces
("[18446744073709551616]", ValueError), # array with big int
('{"age", 44}', ValueError), # read bad object syntax
],
)
def test_decode_raises(test_input, expected):
with pytest.raises(expected):
ujson.decode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
("[", ValueError), # array depth too big
("{", ValueError), # object depth too big
],
)
def test_decode_raises_for_long_input(test_input, expected):
with pytest.raises(expected):
ujson.decode(test_input * (1024 * 1024))
@pytest.mark.parametrize(
"test_input, expected",
[
(True, "true"),
(False, "false"),
(None, "null"),
([True, False, None], "[true,false,null]"),
((True, False, None), "[true,false,null]"),
],
)
def test_dumps(test_input, expected):
assert ujson.dumps(test_input) == expected
class SomeObject:
def __repr__(self):
return "Some Object"
@pytest.mark.parametrize(
"test_input, expected_exception, expected_message",
[
(set(), TypeError, "set() is not JSON serializable"),
({1, 2, 3}, TypeError, "{1, 2, 3} is not JSON serializable"),
(SomeObject(), TypeError, "Some Object is not JSON serializable"),
],
)
def test_dumps_raises(test_input, expected_exception, expected_message):
with pytest.raises(expected_exception) as e:
ujson.dumps(test_input)
assert str(e.value) == expected_message
@pytest.mark.parametrize(
"test_input, expected_exception",
[
(float("nan"), OverflowError),
(float("inf"), OverflowError),
(-float("inf"), OverflowError),
(12839128391289382193812939, OverflowError),
],
)
def test_encode_raises_allow_nan(test_input, expected_exception):
with pytest.raises(expected_exception):
ujson.dumps(test_input, allow_nan=False)
@pytest.mark.parametrize(
"test_input",
[
{
"key1": "value1",
"key1": "value1",
"key1": "value1",
"key1": "value1",
"key1": "value1",
"key1": "value1",
},
{
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
"بن": "value1",
},
],
)
def test_encode_no_assert(test_input):
ujson.encode(test_input)
@pytest.mark.parametrize(
"test_input, expected",
[
(1.0, "1.0"),
(OrderedDict([(1, 1), (0, 0), (8, 8), (2, 2)]), '{"1":1,"0":0,"8":8,"2":2}'),
({"a": float("NaN")}, '{"a":NaN}'),
({"a": float("inf")}, '{"a":Inf}'),
({"a": -float("inf")}, '{"a":-Inf}'),
],
)
def test_encode(test_input, expected):
assert ujson.encode(test_input) == expected
@pytest.mark.parametrize(
"test_input",
[
[
9223372036854775807,
9223372036854775807,
9223372036854775807,
9223372036854775807,
9223372036854775807,
9223372036854775807,
],
[
18446744073709551615,
18446744073709551615,
18446744073709551615,
],
],
)
def test_encode_list_long_conversion(test_input):
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert test_input == ujson.decode(output)
@pytest.mark.parametrize(
"test_input",
[
9223372036854775807,
18446744073709551615,
],
)
def test_encode_long_conversion(test_input):
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
@pytest.mark.parametrize(
"test_input",
[
[[[[]]]],
31337,
-31337,
None,
True,
False,
],
)
def test_encode_decode(test_input):
output = ujson.encode(test_input)
assert test_input == json.loads(output)
assert output == json.dumps(test_input)
assert test_input == ujson.decode(output)
@pytest.mark.parametrize(
"test_input",
[
"Räksmörgås اسامة بن محمد بن عوض بن لادن",
"\xe6\x97\xa5\xd1\x88",
"\xf0\x90\x8d\x86", # surrogate pair
"\xf0\x91\x80\xb0TRAILINGNORMAL", # 4 bytes UTF8
"\xf3\xbf\xbf\xbfTRAILINGNORMAL", # 4 bytes UTF8 highest
],
)
def test_encode_unicode(test_input):
enc = ujson.encode(test_input)
dec = ujson.decode(enc)
assert enc == json.dumps(test_input)
assert dec == json.loads(enc)
@pytest.mark.parametrize(
"test_input, expected",
[
("-1.1234567893", -1.1234567893),
("-1.234567893", -1.234567893),
("-1.34567893", -1.34567893),
("-1.4567893", -1.4567893),
("-1.567893", -1.567893),
("-1.67893", -1.67893),
("-1.7893", -1.7893),
("-1.893", -1.893),
("-1.3", -1.3),
("1.1234567893", 1.1234567893),
("1.234567893", 1.234567893),
("1.34567893", 1.34567893),
("1.4567893", 1.4567893),
("1.567893", 1.567893),
("1.67893", 1.67893),
("1.7893", 1.7893),
("1.893", 1.893),
("1.3", 1.3),
("true", True),
("false", False),
("null", None),
(" [ true, false,null] ", [True, False, None]),
],
)
def test_loads(test_input, expected):
assert ujson.loads(test_input) == expected
def test_reject_bytes_default():
data = {"a": b"b"}
with pytest.raises(TypeError):
ujson.dumps(data)
def test_reject_bytes_true():
data = {"a": b"b"}
with pytest.raises(TypeError):
ujson.dumps(data, reject_bytes=True)
def test_reject_bytes_false():
data = {"a": b"b"}
assert ujson.dumps(data, reject_bytes=False) == '{"a":"b"}'
def test_encode_none_key():
data = {None: None}
assert ujson.dumps(data) == '{"null":null}'
"""
def test_decode_numeric_int_frc_overflow():
input = "X.Y"
raise NotImplementedError("Implement this test!")
def test_decode_string_unicode_escape():
input = "\u3131"
raise NotImplementedError("Implement this test!")
def test_decode_string_unicode_broken_escape():
input = "\u3131"
raise NotImplementedError("Implement this test!")
def test_decode_string_unicode_invalid_escape():
input = "\u3131"
raise NotImplementedError("Implement this test!")
def test_decode_string_utf8():
input = "someutfcharacters"
raise NotImplementedError("Implement this test!")
"""
"""
# Use this to look for memory leaks
if __name__ == '__main__':
import unittest
from guppy import hpy
hp = hpy()
hp.setrelheap()
while True:
try:
unittest.main()
except SystemExit:
pass
heap = hp.heapu()
print(heap)
"""