1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-11-22 23:52:08 +01:00

Speedup dumps with sorted keys

Do not create a list of tuples with (converted key, value) upfront.
Instead, convert keys and fetch values during iteration.

Also, if sorting fails, preserve the original exception instead of
overwriting it with a less informative ValueError. This is the same
behavior as the standard library's json module.
This commit is contained in:
Eugene Toder 2023-12-05 23:56:01 -05:00 committed by Brénainn Woodsend
parent eda5ecd2c2
commit 381f248e03
2 changed files with 45 additions and 83 deletions

@ -209,52 +209,48 @@ static JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
// itemValue is borrowed from object (which is dict). No refCounting // itemValue is borrowed from object (which is dict). No refCounting
//============================================================================= //=============================================================================
static int Dict_convertKey(PyObject** pkey) static PyObject* Dict_convertKey(PyObject* key)
{ {
PyObject* key = *pkey;
if (PyUnicode_Check(key)) if (PyUnicode_Check(key))
{ {
*pkey = PyUnicode_AsEncodedString(key, NULL, "surrogatepass"); return PyUnicode_AsEncodedString(key, NULL, "surrogatepass");
return 1;
} }
if (PyBytes_Check(key)) if (PyBytes_Check(key))
{ {
Py_INCREF(key); Py_INCREF(key);
return 1; return key;
} }
if (UNLIKELY(PyBool_Check(key))) if (UNLIKELY(PyBool_Check(key)))
{ {
*pkey = PyBytes_FromString(key == Py_True ? "true" : "false"); return PyBytes_FromString(key == Py_True ? "true" : "false");
return 1;
} }
if (UNLIKELY(key == Py_None)) if (UNLIKELY(key == Py_None))
{ {
*pkey = PyBytes_FromString("null"); return PyBytes_FromString("null");
return 1;
} }
key = PyObject_Str(key); PyObject* keystr = PyObject_Str(key);
if (!key) if (!keystr)
{ {
PRINTMARK(); PRINTMARK();
return -1; return NULL;
} }
*pkey = PyUnicode_AsEncodedString(key, NULL, "surrogatepass"); key = PyUnicode_AsEncodedString(keystr, NULL, "surrogatepass");
Py_DECREF(key); Py_DECREF(keystr);
return 1; return key;
} }
static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc) static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
{ {
Py_CLEAR(GET_TC(tc)->itemName); PyObject* key;
if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName, if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &key, &GET_TC(tc)->itemValue))
&GET_TC(tc)->itemValue))
{ {
PRINTMARK(); PRINTMARK();
return 0; return 0;
} }
if (Dict_convertKey(&GET_TC(tc)->itemName) < 0) Py_XDECREF(GET_TC(tc)->itemName);
GET_TC(tc)->itemName = Dict_convertKey(key);
if (!GET_TC(tc)->itemName)
{ {
GET_TC(tc)->itemName = NULL; // itemName is not owned at this point
return -1; return -1;
} }
PRINTMARK(); PRINTMARK();
@ -281,59 +277,25 @@ static char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc) static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
{ {
PyObject *items = NULL, *item = NULL, *key = NULL, *value = NULL;
Py_ssize_t i, nitems;
// Upon first call, obtain a list of the keys and sort them. This follows the same logic as the // Upon first call, obtain a list of the keys and sort them. This follows the same logic as the
// standard library's _json.c sort_keys handler. // standard library's _json.c sort_keys handler.
if (GET_TC(tc)->newObj == NULL) if (GET_TC(tc)->newObj == NULL)
{ {
// Obtain the list of keys from the dictionary. // Obtain the list of keys from the dictionary.
items = PyMapping_Keys(GET_TC(tc)->dictObj); PyObject *keys = PyDict_Keys(GET_TC(tc)->dictObj);
if (items == NULL) if (keys == NULL)
{ {
goto error; return -1;
} }
else if (!PyList_Check(items))
{
PyErr_SetString(PyExc_ValueError, "keys must return list");
goto error;
}
// Sort the list. // Sort the list.
if (PyList_Sort(items) < 0) if (PyList_Sort(keys) < 0)
{ {
PyErr_SetString(PyExc_ValueError, "unorderable keys"); Py_DECREF(keys);
goto error; return -1;
} }
// Store the sorted list of keys in the newObj slot.
// Obtain the value for each key, and pack a list of (key, value) 2-tuples. GET_TC(tc)->newObj = keys;
nitems = PyList_GET_SIZE(items); GET_TC(tc)->size = PyList_GET_SIZE(keys);
for (i = 0; i < nitems; i++)
{
key = PyList_GET_ITEM(items, i);
value = PyDict_GetItem(GET_TC(tc)->dictObj, key);
if (Dict_convertKey(&key) < 0)
{
key = NULL; // key is not owned at this point
goto error;
}
item = PyTuple_Pack(2, key, value);
if (item == NULL)
{
goto error;
}
if (PyList_SetItem(items, i, item))
{
goto error;
}
Py_DECREF(key);
}
// Store the sorted list of tuples in the newObj slot.
GET_TC(tc)->newObj = items;
GET_TC(tc)->size = nitems;
} }
if (GET_TC(tc)->index >= GET_TC(tc)->size) if (GET_TC(tc)->index >= GET_TC(tc)->size)
@ -342,26 +304,20 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
return 0; return 0;
} }
item = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index); PyObject* key = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index);
GET_TC(tc)->itemName = PyTuple_GET_ITEM(item, 0); Py_XDECREF(GET_TC(tc)->itemName);
GET_TC(tc)->itemValue = PyTuple_GET_ITEM(item, 1); GET_TC(tc)->itemName = Dict_convertKey(key);
GET_TC(tc)->index++; if (!GET_TC(tc)->itemName)
return 1; {
error:
Py_XDECREF(item);
Py_XDECREF(key);
Py_XDECREF(value);
Py_XDECREF(items);
return -1; return -1;
} }
GET_TC(tc)->itemValue = PyDict_GetItem(GET_TC(tc)->dictObj, key);
static void SortedDict_iterEnd(JSOBJ obj, JSONTypeContext *tc) if (!GET_TC(tc)->itemValue)
{ {
GET_TC(tc)->itemName = NULL; return -1;
GET_TC(tc)->itemValue = NULL; }
Py_DECREF(GET_TC(tc)->dictObj); GET_TC(tc)->index++;
PRINTMARK(); return 1;
} }
static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc) static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc)
@ -369,14 +325,13 @@ static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder
pc->dictObj = dictObj; pc->dictObj = dictObj;
if (enc->sortKeys) if (enc->sortKeys)
{ {
pc->iterEnd = SortedDict_iterEnd;
pc->iterNext = SortedDict_iterNext; pc->iterNext = SortedDict_iterNext;
} }
else else
{ {
pc->iterEnd = Dict_iterEnd;
pc->iterNext = Dict_iterNext; pc->iterNext = Dict_iterNext;
} }
pc->iterEnd = Dict_iterEnd;
pc->iterGetValue = Dict_iterGetValue; pc->iterGetValue = Dict_iterGetValue;
pc->iterGetName = Dict_iterGetName; pc->iterGetName = Dict_iterGetName;
pc->index = 0; pc->index = 0;

@ -576,6 +576,13 @@ def test_sort_keys():
assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}' assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}'
def test_sort_keys_unordered():
data = {"a": 1, 1: 2, None: 3}
assert ujson.dumps(data) == '{"a":1,"1":2,"null":3}'
with pytest.raises(TypeError):
ujson.dumps(data, sort_keys=True)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"test_input", "test_input",
[ [