From 381f248e03c10c70eeb4d424f4b990b4c0994210 Mon Sep 17 00:00:00 2001 From: Eugene Toder Date: Tue, 5 Dec 2023 23:56:01 -0500 Subject: [PATCH] Speedup dumps with sorted keys Do not create a list of tuples with (converted key, value) upfront. Instead, convert keys and fetch values during iteration. Also, if sorting fails, preserve the original exception instead of overwriting it with a less informative ValueError. This is the same behavior as the standard library's json module. --- python/objToJSON.c | 121 ++++++++++++++------------------------------ tests/test_ujson.py | 7 +++ 2 files changed, 45 insertions(+), 83 deletions(-) diff --git a/python/objToJSON.c b/python/objToJSON.c index 20e9d2e..b754819 100644 --- a/python/objToJSON.c +++ b/python/objToJSON.c @@ -209,52 +209,48 @@ static JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc) // itemValue is borrowed from object (which is dict). No refCounting //============================================================================= -static int Dict_convertKey(PyObject** pkey) +static PyObject* Dict_convertKey(PyObject* key) { - PyObject* key = *pkey; if (PyUnicode_Check(key)) { - *pkey = PyUnicode_AsEncodedString(key, NULL, "surrogatepass"); - return 1; + return PyUnicode_AsEncodedString(key, NULL, "surrogatepass"); } if (PyBytes_Check(key)) { Py_INCREF(key); - return 1; + return key; } if (UNLIKELY(PyBool_Check(key))) { - *pkey = PyBytes_FromString(key == Py_True ? "true" : "false"); - return 1; + return PyBytes_FromString(key == Py_True ? "true" : "false"); } if (UNLIKELY(key == Py_None)) { - *pkey = PyBytes_FromString("null"); - return 1; + return PyBytes_FromString("null"); } - key = PyObject_Str(key); - if (!key) + PyObject* keystr = PyObject_Str(key); + if (!keystr) { PRINTMARK(); - return -1; + return NULL; } - *pkey = PyUnicode_AsEncodedString(key, NULL, "surrogatepass"); - Py_DECREF(key); - return 1; + key = PyUnicode_AsEncodedString(keystr, NULL, "surrogatepass"); + Py_DECREF(keystr); + return key; } static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) { - Py_CLEAR(GET_TC(tc)->itemName); - if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName, - &GET_TC(tc)->itemValue)) + PyObject* key; + if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &key, &GET_TC(tc)->itemValue)) { PRINTMARK(); return 0; } - if (Dict_convertKey(&GET_TC(tc)->itemName) < 0) + Py_XDECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = Dict_convertKey(key); + if (!GET_TC(tc)->itemName) { - GET_TC(tc)->itemName = NULL; // itemName is not owned at this point return -1; } PRINTMARK(); @@ -281,59 +277,25 @@ static char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc) { - PyObject *items = NULL, *item = NULL, *key = NULL, *value = NULL; - Py_ssize_t i, nitems; - // Upon first call, obtain a list of the keys and sort them. This follows the same logic as the // standard library's _json.c sort_keys handler. if (GET_TC(tc)->newObj == NULL) { // Obtain the list of keys from the dictionary. - items = PyMapping_Keys(GET_TC(tc)->dictObj); - if (items == NULL) + PyObject *keys = PyDict_Keys(GET_TC(tc)->dictObj); + if (keys == NULL) { - goto error; + return -1; } - else if (!PyList_Check(items)) - { - PyErr_SetString(PyExc_ValueError, "keys must return list"); - goto error; - } - // Sort the list. - if (PyList_Sort(items) < 0) + if (PyList_Sort(keys) < 0) { - PyErr_SetString(PyExc_ValueError, "unorderable keys"); - goto error; + Py_DECREF(keys); + return -1; } - - // Obtain the value for each key, and pack a list of (key, value) 2-tuples. - nitems = PyList_GET_SIZE(items); - for (i = 0; i < nitems; i++) - { - key = PyList_GET_ITEM(items, i); - value = PyDict_GetItem(GET_TC(tc)->dictObj, key); - - if (Dict_convertKey(&key) < 0) - { - key = NULL; // key is not owned at this point - goto error; - } - item = PyTuple_Pack(2, key, value); - if (item == NULL) - { - goto error; - } - if (PyList_SetItem(items, i, item)) - { - goto error; - } - Py_DECREF(key); - } - - // Store the sorted list of tuples in the newObj slot. - GET_TC(tc)->newObj = items; - GET_TC(tc)->size = nitems; + // Store the sorted list of keys in the newObj slot. + GET_TC(tc)->newObj = keys; + GET_TC(tc)->size = PyList_GET_SIZE(keys); } if (GET_TC(tc)->index >= GET_TC(tc)->size) @@ -342,26 +304,20 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc) return 0; } - item = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index); - GET_TC(tc)->itemName = PyTuple_GET_ITEM(item, 0); - GET_TC(tc)->itemValue = PyTuple_GET_ITEM(item, 1); + PyObject* key = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index); + Py_XDECREF(GET_TC(tc)->itemName); + GET_TC(tc)->itemName = Dict_convertKey(key); + if (!GET_TC(tc)->itemName) + { + return -1; + } + GET_TC(tc)->itemValue = PyDict_GetItem(GET_TC(tc)->dictObj, key); + if (!GET_TC(tc)->itemValue) + { + return -1; + } GET_TC(tc)->index++; return 1; - -error: - Py_XDECREF(item); - Py_XDECREF(key); - Py_XDECREF(value); - Py_XDECREF(items); - return -1; -} - -static void SortedDict_iterEnd(JSOBJ obj, JSONTypeContext *tc) -{ - GET_TC(tc)->itemName = NULL; - GET_TC(tc)->itemValue = NULL; - Py_DECREF(GET_TC(tc)->dictObj); - PRINTMARK(); } static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc) @@ -369,14 +325,13 @@ static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder pc->dictObj = dictObj; if (enc->sortKeys) { - pc->iterEnd = SortedDict_iterEnd; pc->iterNext = SortedDict_iterNext; } else { - pc->iterEnd = Dict_iterEnd; pc->iterNext = Dict_iterNext; } + pc->iterEnd = Dict_iterEnd; pc->iterGetValue = Dict_iterGetValue; pc->iterGetName = Dict_iterGetName; pc->index = 0; diff --git a/tests/test_ujson.py b/tests/test_ujson.py index 277c2cd..506666d 100644 --- a/tests/test_ujson.py +++ b/tests/test_ujson.py @@ -576,6 +576,13 @@ def test_sort_keys(): assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}' +def test_sort_keys_unordered(): + data = {"a": 1, 1: 2, None: 3} + assert ujson.dumps(data) == '{"a":1,"1":2,"null":3}' + with pytest.raises(TypeError): + ujson.dumps(data, sort_keys=True) + + @pytest.mark.parametrize( "test_input", [