1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-11-26 10:23:45 +01:00

Speed-up and cleanup objToJSON

* Use PyDict_Next() to iterate over dicts.
* Use macros to access lists, tuples, bytes.
* Avoid calling PyErr_Occurred() if not necessary.
* Fix a memory leak when encoding very large ints.
* Delete dead and duplicate code.

Also,

* Raise TypeError if toDict() returns a non-dict instead of silently
  converting it to null.
This commit is contained in:
Eugene Toder 2023-12-02 07:34:55 -05:00 committed by Brénainn Woodsend
parent a08b75b970
commit eda5ecd2c2
2 changed files with 87 additions and 145 deletions

@ -59,8 +59,6 @@ typedef struct __TypeContext
Py_ssize_t size;
PyObject *itemValue;
PyObject *itemName;
PyObject *attrList;
PyObject *iterator;
union
{
@ -78,13 +76,6 @@ typedef struct __TypeContext
// Avoid infinite loop caused by the default function
#define DEFAULT_FN_MAX_DEPTH 3
struct PyDictIterState
{
PyObject *keys;
size_t i;
size_t sz;
};
//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__)
#define PRINTMARK()
@ -102,11 +93,7 @@ static void *PyLongToUINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, siz
static void *PyLongToINTSTR(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
PyObject *obj = PyNumber_ToBase(_obj, 10);
if (!obj)
{
return NULL;
}
PyObject *obj = GET_TC(tc)->rawJSONValue;
*_outLen = PyUnicode_GET_LENGTH(obj);
return PyUnicode_1BYTE_DATA(obj);
}
@ -121,8 +108,8 @@ static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, si
static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{
PyObject *obj = (PyObject *) _obj;
*_outLen = PyBytes_Size(obj);
return PyBytes_AsString(obj);
*_outLen = PyBytes_GET_SIZE(obj);
return PyBytes_AS_STRING(obj);
}
static char *PyUnicodeToUTF8Raw(JSOBJ _obj, size_t *_outLen, PyObject **pBytesObj)
@ -151,8 +138,8 @@ static char *PyUnicodeToUTF8Raw(JSOBJ _obj, size_t *_outLen, PyObject **pBytesOb
return NULL;
}
*_outLen = PyBytes_Size(bytesObj);
return PyBytes_AsString(bytesObj);
*_outLen = PyBytes_GET_SIZE(bytesObj);
return PyBytes_AS_STRING(bytesObj);
}
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
@ -175,16 +162,12 @@ static void *PyRawJSONToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, si
static int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc)
{
PyObject *item;
if (GET_TC(tc)->index >= GET_TC(tc)->size)
{
return 0;
}
item = PyTuple_GetItem (obj, GET_TC(tc)->index);
GET_TC(tc)->itemValue = item;
GET_TC(tc)->itemValue = PyTuple_GET_ITEM(obj, GET_TC(tc)->index);
GET_TC(tc)->index ++;
return 1;
}
@ -198,11 +181,6 @@ static JSOBJ Tuple_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
return GET_TC(tc)->itemValue;
}
static char *Tuple_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
return NULL;
}
static int List_iterNext(JSOBJ obj, JSONTypeContext *tc)
{
if (GET_TC(tc)->index >= GET_TC(tc)->size)
@ -211,7 +189,7 @@ static int List_iterNext(JSOBJ obj, JSONTypeContext *tc)
return 0;
}
GET_TC(tc)->itemValue = PyList_GetItem (obj, GET_TC(tc)->index);
GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index);
GET_TC(tc)->index ++;
return 1;
}
@ -225,11 +203,6 @@ static JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
return GET_TC(tc)->itemValue;
}
static char *List_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
return NULL;
}
//=============================================================================
// Dict iteration functions
// itemName might converted to string (PyObject_Str). Do refCounting
@ -272,28 +245,18 @@ static int Dict_convertKey(PyObject** pkey)
static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
{
PyObject* itemNameTmp;
Py_CLEAR(GET_TC(tc)->itemName);
if (!(GET_TC(tc)->itemName = PyIter_Next(GET_TC(tc)->iterator)))
if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName,
&GET_TC(tc)->itemValue))
{
PRINTMARK();
return 0;
}
if (!(GET_TC(tc)->itemValue = PyDict_GetItem(GET_TC(tc)->dictObj, GET_TC(tc)->itemName)))
{
PRINTMARK();
return 0;
}
itemNameTmp = GET_TC(tc)->itemName;
if (Dict_convertKey(&GET_TC(tc)->itemName) < 0)
{
GET_TC(tc)->itemName = NULL; // itemName is not owned at this point
return -1;
}
Py_DECREF(itemNameTmp);
PRINTMARK();
return 1;
}
@ -301,7 +264,6 @@ static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
static void Dict_iterEnd(JSOBJ obj, JSONTypeContext *tc)
{
Py_CLEAR(GET_TC(tc)->itemName);
Py_CLEAR(GET_TC(tc)->iterator);
Py_DECREF(GET_TC(tc)->dictObj);
PRINTMARK();
}
@ -313,8 +275,8 @@ static JSOBJ Dict_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
static char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
*outLen = PyBytes_Size(GET_TC(tc)->itemName);
return PyBytes_AsString(GET_TC(tc)->itemName);
*outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AS_STRING(GET_TC(tc)->itemName);
}
static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
@ -346,10 +308,10 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
}
// Obtain the value for each key, and pack a list of (key, value) 2-tuples.
nitems = PyList_Size(items);
nitems = PyList_GET_SIZE(items);
for (i = 0; i < nitems; i++)
{
key = PyList_GetItem(items, i);
key = PyList_GET_ITEM(items, i);
value = PyDict_GetItem(GET_TC(tc)->dictObj, key);
if (Dict_convertKey(&key) < 0)
@ -380,9 +342,9 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
return 0;
}
item = PyList_GetItem(GET_TC(tc)->newObj, GET_TC(tc)->index);
GET_TC(tc)->itemName = PyTuple_GetItem(item, 0);
GET_TC(tc)->itemValue = PyTuple_GetItem(item, 1);
item = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index);
GET_TC(tc)->itemName = PyTuple_GET_ITEM(item, 0);
GET_TC(tc)->itemValue = PyTuple_GET_ITEM(item, 1);
GET_TC(tc)->index++;
return 1;
@ -402,17 +364,6 @@ static void SortedDict_iterEnd(JSOBJ obj, JSONTypeContext *tc)
PRINTMARK();
}
static JSOBJ SortedDict_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
{
return GET_TC(tc)->itemValue;
}
static char *SortedDict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
*outLen = PyBytes_Size(GET_TC(tc)->itemName);
return PyBytes_AsString(GET_TC(tc)->itemName);
}
static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc)
{
pc->dictObj = dictObj;
@ -420,23 +371,20 @@ static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder
{
pc->iterEnd = SortedDict_iterEnd;
pc->iterNext = SortedDict_iterNext;
pc->iterGetValue = SortedDict_iterGetValue;
pc->iterGetName = SortedDict_iterGetName;
pc->index = 0;
}
else
{
pc->iterEnd = Dict_iterEnd;
pc->iterNext = Dict_iterNext;
}
pc->iterGetValue = Dict_iterGetValue;
pc->iterGetName = Dict_iterGetName;
pc->iterator = PyObject_GetIter(dictObj);
}
pc->index = 0;
}
static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObjectEncoder *enc)
{
PyObject *obj, *objRepr, *exc, *defaultFn, *newObj;
PyObject *obj, *objRepr, *defaultFn, *newObj;
int level = 0;
TypeContext *pc;
PRINTMARK();
@ -461,8 +409,6 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject
pc->dictObj = NULL;
pc->itemValue = NULL;
pc->itemName = NULL;
pc->iterator = NULL;
pc->attrList = NULL;
pc->index = 0;
pc->size = 0;
pc->longValue = 0;
@ -488,39 +434,34 @@ BEGIN:
pc->PyTypeToJSON = PyLongToINT64;
tc->type = JT_LONG;
GET_TC(tc)->longValue = PyLong_AsLongLong(obj);
exc = PyErr_Occurred();
if (!exc)
if (!(GET_TC(tc)->longValue == -1 && PyErr_Occurred()))
{
return;
}
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
if (!PyErr_ExceptionMatches(PyExc_OverflowError))
{
goto INVALID;
}
PyErr_Clear();
pc->PyTypeToJSON = PyLongToUINT64;
tc->type = JT_ULONG;
GET_TC(tc)->unsignedLongValue = PyLong_AsUnsignedLongLong(obj);
exc = PyErr_Occurred();
}
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
if (!(GET_TC(tc)->unsignedLongValue == (unsigned long long)-1 && PyErr_Occurred()))
{
PyErr_Clear();
pc->PyTypeToJSON = PyLongToINTSTR;
tc->type = JT_RAW;
// Overwritten by PyLong_* due to the union, which would lead to a DECREF in endTypeContext.
GET_TC(tc)->rawJSONValue = NULL;
return;
}
if (exc)
if (!PyErr_ExceptionMatches(PyExc_OverflowError))
{
PRINTMARK();
goto INVALID;
}
PyErr_Clear();
GET_TC(tc)->rawJSONValue = PyNumber_ToBase(obj, 10);
if (!GET_TC(tc)->rawJSONValue)
{
goto INVALID;
}
pc->PyTypeToJSON = PyLongToINTSTR;
tc->type = JT_RAW;
return;
}
else
@ -529,7 +470,7 @@ BEGIN:
PRINTMARK();
if (enc->rejectBytes)
{
PyErr_Format (PyExc_TypeError, "reject_bytes is on and '%s' is bytes", PyBytes_AsString(obj));
PyErr_Format (PyExc_TypeError, "reject_bytes is on and '%s' is bytes", PyBytes_AS_STRING(obj));
goto INVALID;
}
else
@ -577,9 +518,8 @@ ISITERABLE:
pc->iterEnd = List_iterEnd;
pc->iterNext = List_iterNext;
pc->iterGetValue = List_iterGetValue;
pc->iterGetName = List_iterGetName;
GET_TC(tc)->index = 0;
GET_TC(tc)->size = PyList_Size( (PyObject *) obj);
GET_TC(tc)->size = PyList_GET_SIZE( (PyObject *) obj);
return;
}
else
@ -590,9 +530,8 @@ ISITERABLE:
pc->iterEnd = Tuple_iterEnd;
pc->iterNext = Tuple_iterNext;
pc->iterGetValue = Tuple_iterGetValue;
pc->iterGetName = Tuple_iterGetName;
GET_TC(tc)->index = 0;
GET_TC(tc)->size = PyTuple_Size( (PyObject *) obj);
GET_TC(tc)->size = PyTuple_GET_SIZE( (PyObject *) obj);
GET_TC(tc)->itemValue = NULL;
return;
@ -600,12 +539,7 @@ ISITERABLE:
if (UNLIKELY(PyObject_HasAttrString(obj, "toDict")))
{
PyObject* toDictFunc = PyObject_GetAttrString(obj, "toDict");
PyObject* tuple = PyTuple_New(0);
PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL);
Py_DECREF(tuple);
Py_DECREF(toDictFunc);
PyObject* toDictResult = PyObject_CallMethod(obj, "toDict", NULL);
if (toDictResult == NULL)
{
goto INVALID;
@ -613,9 +547,10 @@ ISITERABLE:
if (!PyDict_Check(toDictResult))
{
PyErr_Format(PyExc_TypeError, "toDict() should return a dict, got %s",
Py_TYPE(toDictResult)->tp_name);
Py_DECREF(toDictResult);
tc->type = JT_NULL;
return;
goto INVALID;
}
PRINTMARK();
@ -626,27 +561,17 @@ ISITERABLE:
else
if (UNLIKELY(PyObject_HasAttrString(obj, "__json__")))
{
PyObject* toJSONFunc = PyObject_GetAttrString(obj, "__json__");
PyObject* tuple = PyTuple_New(0);
PyObject* toJSONResult = PyObject_Call(toJSONFunc, tuple, NULL);
Py_DECREF(tuple);
Py_DECREF(toJSONFunc);
PyObject* toJSONResult = PyObject_CallMethod(obj, "__json__", NULL);
if (toJSONResult == NULL)
{
goto INVALID;
}
if (PyErr_Occurred())
{
Py_DECREF(toJSONResult);
goto INVALID;
}
if (!PyBytes_Check(toJSONResult) && !PyUnicode_Check(toJSONResult))
{
PyErr_Format(PyExc_TypeError, "__json__() should return str or bytes, got %s",
Py_TYPE(toJSONResult)->tp_name);
Py_DECREF(toJSONResult);
PyErr_Format (PyExc_TypeError, "expected string");
goto INVALID;
}
@ -657,7 +582,6 @@ ISITERABLE:
return;
}
DEFAULT:
if (defaultFn)
{
// Break infinite loop
@ -694,7 +618,7 @@ DEFAULT:
PyObject* str = PyUnicode_AsEncodedString(objRepr, NULL, "strict");
if (str)
{
PyErr_Format (PyExc_TypeError, "%s is not JSON serializable", PyBytes_AsString(str));
PyErr_Format (PyExc_TypeError, "%s is not JSON serializable", PyBytes_AS_STRING(str));
}
Py_XDECREF(str);
Py_DECREF(objRepr);
@ -889,26 +813,18 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "expected tuple or None as separator");
return NULL;
}
if (PyTuple_Size (oseparators) != 2)
if (PyTuple_GET_SIZE(oseparators) != 2)
{
PyErr_SetString(PyExc_ValueError, "expected tuple of size 2 as separator");
return NULL;
}
oseparatorsItem = PyTuple_GetItem(oseparators, 0);
if (PyErr_Occurred())
{
return NULL;
}
oseparatorsItem = PyTuple_GET_ITEM(oseparators, 0);
if (!PyUnicode_Check(oseparatorsItem))
{
PyErr_SetString(PyExc_TypeError, "expected str as item separator");
return NULL;
}
oseparatorsKey = PyTuple_GetItem(oseparators, 1);
if (PyErr_Occurred())
{
return NULL;
}
oseparatorsKey = PyTuple_GET_ITEM(oseparators, 1);
if (!PyUnicode_Check(oseparatorsKey))
{
PyErr_SetString(PyExc_TypeError, "expected str as key separator");
@ -957,10 +873,12 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
Py_XDECREF(separatorsItemBytes);
Py_XDECREF(separatorsKeyBytes);
if (encoder.errorMsg && !PyErr_Occurred())
if (encoder.errorMsg)
{
// If there is an error message and we don't already have a Python exception, set one.
if (!PyErr_Occurred())
PyErr_Format(PyExc_OverflowError, "%s", encoder.errorMsg);
return NULL;
}
if (PyErr_Occurred())

@ -198,8 +198,8 @@ def test_encode_symbols():
assert s == decoded
def test_encode_long_neg_conversion():
test_input = -9223372036854775808
@pytest.mark.parametrize("test_input", [-1, -9223372036854775808, 18446744073709551615])
def test_encode_special_longs(test_input):
output = ujson.encode(test_input)
json.loads(output)
@ -495,6 +495,30 @@ def test_object_with_json_attribute_error():
ujson.encode(d)
def test_object_with_to_dict_type_error():
# toDict must return a dict, otherwise it should raise an error.
for return_value in (None, 1234, 12.34, True, "json"):
class JSONTest:
def toDict(self):
return return_value
d = {"key": JSONTest()}
with pytest.raises(TypeError):
ujson.encode(d)
def test_object_with_to_dict_attribute_error():
# If toDict raises an error, make sure python actually raises it.
class JSONTest:
def toDict(self):
raise AttributeError
d = {"key": JSONTest()}
with pytest.raises(AttributeError):
ujson.encode(d)
def test_decode_array_empty():
test_input = "[]"
obj = ujson.decode(test_input)