1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-11-26 10:23:45 +01:00

Speed-up and cleanup objToJSON

* Use PyDict_Next() to iterate over dicts.
* Use macros to access lists, tuples, bytes.
* Avoid calling PyErr_Occurred() if not necessary.
* Fix a memory leak when encoding very large ints.
* Delete dead and duplicate code.

Also,

* Raise TypeError if toDict() returns a non-dict instead of silently
  converting it to null.
This commit is contained in:
Eugene Toder 2023-12-02 07:34:55 -05:00 committed by Brénainn Woodsend
parent a08b75b970
commit eda5ecd2c2
2 changed files with 87 additions and 145 deletions

@ -59,8 +59,6 @@ typedef struct __TypeContext
Py_ssize_t size; Py_ssize_t size;
PyObject *itemValue; PyObject *itemValue;
PyObject *itemName; PyObject *itemName;
PyObject *attrList;
PyObject *iterator;
union union
{ {
@ -78,13 +76,6 @@ typedef struct __TypeContext
// Avoid infinite loop caused by the default function // Avoid infinite loop caused by the default function
#define DEFAULT_FN_MAX_DEPTH 3 #define DEFAULT_FN_MAX_DEPTH 3
struct PyDictIterState
{
PyObject *keys;
size_t i;
size_t sz;
};
//#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__) //#define PRINTMARK() fprintf(stderr, "%s: MARK(%d)\n", __FILE__, __LINE__)
#define PRINTMARK() #define PRINTMARK()
@ -102,11 +93,7 @@ static void *PyLongToUINT64(JSOBJ _obj, JSONTypeContext *tc, void *outValue, siz
static void *PyLongToINTSTR(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) static void *PyLongToINTSTR(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{ {
PyObject *obj = PyNumber_ToBase(_obj, 10); PyObject *obj = GET_TC(tc)->rawJSONValue;
if (!obj)
{
return NULL;
}
*_outLen = PyUnicode_GET_LENGTH(obj); *_outLen = PyUnicode_GET_LENGTH(obj);
return PyUnicode_1BYTE_DATA(obj); return PyUnicode_1BYTE_DATA(obj);
} }
@ -121,8 +108,8 @@ static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue, si
static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
{ {
PyObject *obj = (PyObject *) _obj; PyObject *obj = (PyObject *) _obj;
*_outLen = PyBytes_Size(obj); *_outLen = PyBytes_GET_SIZE(obj);
return PyBytes_AsString(obj); return PyBytes_AS_STRING(obj);
} }
static char *PyUnicodeToUTF8Raw(JSOBJ _obj, size_t *_outLen, PyObject **pBytesObj) static char *PyUnicodeToUTF8Raw(JSOBJ _obj, size_t *_outLen, PyObject **pBytesObj)
@ -151,8 +138,8 @@ static char *PyUnicodeToUTF8Raw(JSOBJ _obj, size_t *_outLen, PyObject **pBytesOb
return NULL; return NULL;
} }
*_outLen = PyBytes_Size(bytesObj); *_outLen = PyBytes_GET_SIZE(bytesObj);
return PyBytes_AsString(bytesObj); return PyBytes_AS_STRING(bytesObj);
} }
static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen) static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, size_t *_outLen)
@ -175,16 +162,12 @@ static void *PyRawJSONToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, si
static int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc) static int Tuple_iterNext(JSOBJ obj, JSONTypeContext *tc)
{ {
PyObject *item;
if (GET_TC(tc)->index >= GET_TC(tc)->size) if (GET_TC(tc)->index >= GET_TC(tc)->size)
{ {
return 0; return 0;
} }
item = PyTuple_GetItem (obj, GET_TC(tc)->index); GET_TC(tc)->itemValue = PyTuple_GET_ITEM(obj, GET_TC(tc)->index);
GET_TC(tc)->itemValue = item;
GET_TC(tc)->index ++; GET_TC(tc)->index ++;
return 1; return 1;
} }
@ -198,11 +181,6 @@ static JSOBJ Tuple_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
return GET_TC(tc)->itemValue; return GET_TC(tc)->itemValue;
} }
static char *Tuple_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
return NULL;
}
static int List_iterNext(JSOBJ obj, JSONTypeContext *tc) static int List_iterNext(JSOBJ obj, JSONTypeContext *tc)
{ {
if (GET_TC(tc)->index >= GET_TC(tc)->size) if (GET_TC(tc)->index >= GET_TC(tc)->size)
@ -211,7 +189,7 @@ static int List_iterNext(JSOBJ obj, JSONTypeContext *tc)
return 0; return 0;
} }
GET_TC(tc)->itemValue = PyList_GetItem (obj, GET_TC(tc)->index); GET_TC(tc)->itemValue = PyList_GET_ITEM(obj, GET_TC(tc)->index);
GET_TC(tc)->index ++; GET_TC(tc)->index ++;
return 1; return 1;
} }
@ -225,11 +203,6 @@ static JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
return GET_TC(tc)->itemValue; return GET_TC(tc)->itemValue;
} }
static char *List_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
return NULL;
}
//============================================================================= //=============================================================================
// Dict iteration functions // Dict iteration functions
// itemName might converted to string (PyObject_Str). Do refCounting // itemName might converted to string (PyObject_Str). Do refCounting
@ -272,28 +245,18 @@ static int Dict_convertKey(PyObject** pkey)
static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
{ {
PyObject* itemNameTmp;
Py_CLEAR(GET_TC(tc)->itemName); Py_CLEAR(GET_TC(tc)->itemName);
if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName,
if (!(GET_TC(tc)->itemName = PyIter_Next(GET_TC(tc)->iterator))) &GET_TC(tc)->itemValue))
{ {
PRINTMARK(); PRINTMARK();
return 0; return 0;
} }
if (!(GET_TC(tc)->itemValue = PyDict_GetItem(GET_TC(tc)->dictObj, GET_TC(tc)->itemName)))
{
PRINTMARK();
return 0;
}
itemNameTmp = GET_TC(tc)->itemName;
if (Dict_convertKey(&GET_TC(tc)->itemName) < 0) if (Dict_convertKey(&GET_TC(tc)->itemName) < 0)
{ {
GET_TC(tc)->itemName = NULL; // itemName is not owned at this point
return -1; return -1;
} }
Py_DECREF(itemNameTmp);
PRINTMARK(); PRINTMARK();
return 1; return 1;
} }
@ -301,7 +264,6 @@ static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
static void Dict_iterEnd(JSOBJ obj, JSONTypeContext *tc) static void Dict_iterEnd(JSOBJ obj, JSONTypeContext *tc)
{ {
Py_CLEAR(GET_TC(tc)->itemName); Py_CLEAR(GET_TC(tc)->itemName);
Py_CLEAR(GET_TC(tc)->iterator);
Py_DECREF(GET_TC(tc)->dictObj); Py_DECREF(GET_TC(tc)->dictObj);
PRINTMARK(); PRINTMARK();
} }
@ -313,8 +275,8 @@ static JSOBJ Dict_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
static char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) static char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{ {
*outLen = PyBytes_Size(GET_TC(tc)->itemName); *outLen = PyBytes_GET_SIZE(GET_TC(tc)->itemName);
return PyBytes_AsString(GET_TC(tc)->itemName); return PyBytes_AS_STRING(GET_TC(tc)->itemName);
} }
static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc) static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
@ -346,10 +308,10 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
} }
// Obtain the value for each key, and pack a list of (key, value) 2-tuples. // Obtain the value for each key, and pack a list of (key, value) 2-tuples.
nitems = PyList_Size(items); nitems = PyList_GET_SIZE(items);
for (i = 0; i < nitems; i++) for (i = 0; i < nitems; i++)
{ {
key = PyList_GetItem(items, i); key = PyList_GET_ITEM(items, i);
value = PyDict_GetItem(GET_TC(tc)->dictObj, key); value = PyDict_GetItem(GET_TC(tc)->dictObj, key);
if (Dict_convertKey(&key) < 0) if (Dict_convertKey(&key) < 0)
@ -380,9 +342,9 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
return 0; return 0;
} }
item = PyList_GetItem(GET_TC(tc)->newObj, GET_TC(tc)->index); item = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index);
GET_TC(tc)->itemName = PyTuple_GetItem(item, 0); GET_TC(tc)->itemName = PyTuple_GET_ITEM(item, 0);
GET_TC(tc)->itemValue = PyTuple_GetItem(item, 1); GET_TC(tc)->itemValue = PyTuple_GET_ITEM(item, 1);
GET_TC(tc)->index++; GET_TC(tc)->index++;
return 1; return 1;
@ -402,17 +364,6 @@ static void SortedDict_iterEnd(JSOBJ obj, JSONTypeContext *tc)
PRINTMARK(); PRINTMARK();
} }
static JSOBJ SortedDict_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
{
return GET_TC(tc)->itemValue;
}
static char *SortedDict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
{
*outLen = PyBytes_Size(GET_TC(tc)->itemName);
return PyBytes_AsString(GET_TC(tc)->itemName);
}
static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc) static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc)
{ {
pc->dictObj = dictObj; pc->dictObj = dictObj;
@ -420,23 +371,20 @@ static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder
{ {
pc->iterEnd = SortedDict_iterEnd; pc->iterEnd = SortedDict_iterEnd;
pc->iterNext = SortedDict_iterNext; pc->iterNext = SortedDict_iterNext;
pc->iterGetValue = SortedDict_iterGetValue;
pc->iterGetName = SortedDict_iterGetName;
pc->index = 0;
} }
else else
{ {
pc->iterEnd = Dict_iterEnd; pc->iterEnd = Dict_iterEnd;
pc->iterNext = Dict_iterNext; pc->iterNext = Dict_iterNext;
pc->iterGetValue = Dict_iterGetValue;
pc->iterGetName = Dict_iterGetName;
pc->iterator = PyObject_GetIter(dictObj);
} }
pc->iterGetValue = Dict_iterGetValue;
pc->iterGetName = Dict_iterGetName;
pc->index = 0;
} }
static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObjectEncoder *enc) static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObjectEncoder *enc)
{ {
PyObject *obj, *objRepr, *exc, *defaultFn, *newObj; PyObject *obj, *objRepr, *defaultFn, *newObj;
int level = 0; int level = 0;
TypeContext *pc; TypeContext *pc;
PRINTMARK(); PRINTMARK();
@ -461,8 +409,6 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject
pc->dictObj = NULL; pc->dictObj = NULL;
pc->itemValue = NULL; pc->itemValue = NULL;
pc->itemName = NULL; pc->itemName = NULL;
pc->iterator = NULL;
pc->attrList = NULL;
pc->index = 0; pc->index = 0;
pc->size = 0; pc->size = 0;
pc->longValue = 0; pc->longValue = 0;
@ -488,39 +434,34 @@ BEGIN:
pc->PyTypeToJSON = PyLongToINT64; pc->PyTypeToJSON = PyLongToINT64;
tc->type = JT_LONG; tc->type = JT_LONG;
GET_TC(tc)->longValue = PyLong_AsLongLong(obj); GET_TC(tc)->longValue = PyLong_AsLongLong(obj);
if (!(GET_TC(tc)->longValue == -1 && PyErr_Occurred()))
exc = PyErr_Occurred();
if (!exc)
{ {
return;
}
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
{
PyErr_Clear();
pc->PyTypeToJSON = PyLongToUINT64;
tc->type = JT_ULONG;
GET_TC(tc)->unsignedLongValue = PyLong_AsUnsignedLongLong(obj);
exc = PyErr_Occurred();
}
if (exc && PyErr_ExceptionMatches(PyExc_OverflowError))
{
PyErr_Clear();
pc->PyTypeToJSON = PyLongToINTSTR;
tc->type = JT_RAW;
// Overwritten by PyLong_* due to the union, which would lead to a DECREF in endTypeContext.
GET_TC(tc)->rawJSONValue = NULL;
return; return;
} }
if (!PyErr_ExceptionMatches(PyExc_OverflowError))
if (exc)
{ {
PRINTMARK();
goto INVALID; goto INVALID;
} }
PyErr_Clear();
pc->PyTypeToJSON = PyLongToUINT64;
tc->type = JT_ULONG;
GET_TC(tc)->unsignedLongValue = PyLong_AsUnsignedLongLong(obj);
if (!(GET_TC(tc)->unsignedLongValue == (unsigned long long)-1 && PyErr_Occurred()))
{
return;
}
if (!PyErr_ExceptionMatches(PyExc_OverflowError))
{
goto INVALID;
}
PyErr_Clear();
GET_TC(tc)->rawJSONValue = PyNumber_ToBase(obj, 10);
if (!GET_TC(tc)->rawJSONValue)
{
goto INVALID;
}
pc->PyTypeToJSON = PyLongToINTSTR;
tc->type = JT_RAW;
return; return;
} }
else else
@ -529,7 +470,7 @@ BEGIN:
PRINTMARK(); PRINTMARK();
if (enc->rejectBytes) if (enc->rejectBytes)
{ {
PyErr_Format (PyExc_TypeError, "reject_bytes is on and '%s' is bytes", PyBytes_AsString(obj)); PyErr_Format (PyExc_TypeError, "reject_bytes is on and '%s' is bytes", PyBytes_AS_STRING(obj));
goto INVALID; goto INVALID;
} }
else else
@ -577,9 +518,8 @@ ISITERABLE:
pc->iterEnd = List_iterEnd; pc->iterEnd = List_iterEnd;
pc->iterNext = List_iterNext; pc->iterNext = List_iterNext;
pc->iterGetValue = List_iterGetValue; pc->iterGetValue = List_iterGetValue;
pc->iterGetName = List_iterGetName;
GET_TC(tc)->index = 0; GET_TC(tc)->index = 0;
GET_TC(tc)->size = PyList_Size( (PyObject *) obj); GET_TC(tc)->size = PyList_GET_SIZE( (PyObject *) obj);
return; return;
} }
else else
@ -590,9 +530,8 @@ ISITERABLE:
pc->iterEnd = Tuple_iterEnd; pc->iterEnd = Tuple_iterEnd;
pc->iterNext = Tuple_iterNext; pc->iterNext = Tuple_iterNext;
pc->iterGetValue = Tuple_iterGetValue; pc->iterGetValue = Tuple_iterGetValue;
pc->iterGetName = Tuple_iterGetName;
GET_TC(tc)->index = 0; GET_TC(tc)->index = 0;
GET_TC(tc)->size = PyTuple_Size( (PyObject *) obj); GET_TC(tc)->size = PyTuple_GET_SIZE( (PyObject *) obj);
GET_TC(tc)->itemValue = NULL; GET_TC(tc)->itemValue = NULL;
return; return;
@ -600,12 +539,7 @@ ISITERABLE:
if (UNLIKELY(PyObject_HasAttrString(obj, "toDict"))) if (UNLIKELY(PyObject_HasAttrString(obj, "toDict")))
{ {
PyObject* toDictFunc = PyObject_GetAttrString(obj, "toDict"); PyObject* toDictResult = PyObject_CallMethod(obj, "toDict", NULL);
PyObject* tuple = PyTuple_New(0);
PyObject* toDictResult = PyObject_Call(toDictFunc, tuple, NULL);
Py_DECREF(tuple);
Py_DECREF(toDictFunc);
if (toDictResult == NULL) if (toDictResult == NULL)
{ {
goto INVALID; goto INVALID;
@ -613,9 +547,10 @@ ISITERABLE:
if (!PyDict_Check(toDictResult)) if (!PyDict_Check(toDictResult))
{ {
PyErr_Format(PyExc_TypeError, "toDict() should return a dict, got %s",
Py_TYPE(toDictResult)->tp_name);
Py_DECREF(toDictResult); Py_DECREF(toDictResult);
tc->type = JT_NULL; goto INVALID;
return;
} }
PRINTMARK(); PRINTMARK();
@ -626,27 +561,17 @@ ISITERABLE:
else else
if (UNLIKELY(PyObject_HasAttrString(obj, "__json__"))) if (UNLIKELY(PyObject_HasAttrString(obj, "__json__")))
{ {
PyObject* toJSONFunc = PyObject_GetAttrString(obj, "__json__"); PyObject* toJSONResult = PyObject_CallMethod(obj, "__json__", NULL);
PyObject* tuple = PyTuple_New(0);
PyObject* toJSONResult = PyObject_Call(toJSONFunc, tuple, NULL);
Py_DECREF(tuple);
Py_DECREF(toJSONFunc);
if (toJSONResult == NULL) if (toJSONResult == NULL)
{ {
goto INVALID; goto INVALID;
} }
if (PyErr_Occurred())
{
Py_DECREF(toJSONResult);
goto INVALID;
}
if (!PyBytes_Check(toJSONResult) && !PyUnicode_Check(toJSONResult)) if (!PyBytes_Check(toJSONResult) && !PyUnicode_Check(toJSONResult))
{ {
PyErr_Format(PyExc_TypeError, "__json__() should return str or bytes, got %s",
Py_TYPE(toJSONResult)->tp_name);
Py_DECREF(toJSONResult); Py_DECREF(toJSONResult);
PyErr_Format (PyExc_TypeError, "expected string");
goto INVALID; goto INVALID;
} }
@ -657,7 +582,6 @@ ISITERABLE:
return; return;
} }
DEFAULT:
if (defaultFn) if (defaultFn)
{ {
// Break infinite loop // Break infinite loop
@ -694,7 +618,7 @@ DEFAULT:
PyObject* str = PyUnicode_AsEncodedString(objRepr, NULL, "strict"); PyObject* str = PyUnicode_AsEncodedString(objRepr, NULL, "strict");
if (str) if (str)
{ {
PyErr_Format (PyExc_TypeError, "%s is not JSON serializable", PyBytes_AsString(str)); PyErr_Format (PyExc_TypeError, "%s is not JSON serializable", PyBytes_AS_STRING(str));
} }
Py_XDECREF(str); Py_XDECREF(str);
Py_DECREF(objRepr); Py_DECREF(objRepr);
@ -889,26 +813,18 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyErr_SetString(PyExc_TypeError, "expected tuple or None as separator"); PyErr_SetString(PyExc_TypeError, "expected tuple or None as separator");
return NULL; return NULL;
} }
if (PyTuple_Size (oseparators) != 2) if (PyTuple_GET_SIZE(oseparators) != 2)
{ {
PyErr_SetString(PyExc_ValueError, "expected tuple of size 2 as separator"); PyErr_SetString(PyExc_ValueError, "expected tuple of size 2 as separator");
return NULL; return NULL;
} }
oseparatorsItem = PyTuple_GetItem(oseparators, 0); oseparatorsItem = PyTuple_GET_ITEM(oseparators, 0);
if (PyErr_Occurred())
{
return NULL;
}
if (!PyUnicode_Check(oseparatorsItem)) if (!PyUnicode_Check(oseparatorsItem))
{ {
PyErr_SetString(PyExc_TypeError, "expected str as item separator"); PyErr_SetString(PyExc_TypeError, "expected str as item separator");
return NULL; return NULL;
} }
oseparatorsKey = PyTuple_GetItem(oseparators, 1); oseparatorsKey = PyTuple_GET_ITEM(oseparators, 1);
if (PyErr_Occurred())
{
return NULL;
}
if (!PyUnicode_Check(oseparatorsKey)) if (!PyUnicode_Check(oseparatorsKey))
{ {
PyErr_SetString(PyExc_TypeError, "expected str as key separator"); PyErr_SetString(PyExc_TypeError, "expected str as key separator");
@ -957,10 +873,12 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
Py_XDECREF(separatorsItemBytes); Py_XDECREF(separatorsItemBytes);
Py_XDECREF(separatorsKeyBytes); Py_XDECREF(separatorsKeyBytes);
if (encoder.errorMsg && !PyErr_Occurred()) if (encoder.errorMsg)
{ {
// If there is an error message and we don't already have a Python exception, set one. // If there is an error message and we don't already have a Python exception, set one.
PyErr_Format (PyExc_OverflowError, "%s", encoder.errorMsg); if (!PyErr_Occurred())
PyErr_Format(PyExc_OverflowError, "%s", encoder.errorMsg);
return NULL;
} }
if (PyErr_Occurred()) if (PyErr_Occurred())

@ -198,8 +198,8 @@ def test_encode_symbols():
assert s == decoded assert s == decoded
def test_encode_long_neg_conversion(): @pytest.mark.parametrize("test_input", [-1, -9223372036854775808, 18446744073709551615])
test_input = -9223372036854775808 def test_encode_special_longs(test_input):
output = ujson.encode(test_input) output = ujson.encode(test_input)
json.loads(output) json.loads(output)
@ -495,6 +495,30 @@ def test_object_with_json_attribute_error():
ujson.encode(d) ujson.encode(d)
def test_object_with_to_dict_type_error():
# toDict must return a dict, otherwise it should raise an error.
for return_value in (None, 1234, 12.34, True, "json"):
class JSONTest:
def toDict(self):
return return_value
d = {"key": JSONTest()}
with pytest.raises(TypeError):
ujson.encode(d)
def test_object_with_to_dict_attribute_error():
# If toDict raises an error, make sure python actually raises it.
class JSONTest:
def toDict(self):
raise AttributeError
d = {"key": JSONTest()}
with pytest.raises(AttributeError):
ujson.encode(d)
def test_decode_array_empty(): def test_decode_array_empty():
test_input = "[]" test_input = "[]"
obj = ujson.decode(test_input) obj = ujson.decode(test_input)