mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-11-22 23:52:08 +01:00
Speedup dumps with sorted keys
Do not create a list of tuples with (converted key, value) upfront. Instead, convert keys and fetch values during iteration. Also, if sorting fails, preserve the original exception instead of overwriting it with a less informative ValueError. This is the same behavior as the standard library's json module.
This commit is contained in:
parent
eda5ecd2c2
commit
381f248e03
@ -209,52 +209,48 @@ static JSOBJ List_iterGetValue(JSOBJ obj, JSONTypeContext *tc)
|
|||||||
// itemValue is borrowed from object (which is dict). No refCounting
|
// itemValue is borrowed from object (which is dict). No refCounting
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
|
|
||||||
static int Dict_convertKey(PyObject** pkey)
|
static PyObject* Dict_convertKey(PyObject* key)
|
||||||
{
|
{
|
||||||
PyObject* key = *pkey;
|
|
||||||
if (PyUnicode_Check(key))
|
if (PyUnicode_Check(key))
|
||||||
{
|
{
|
||||||
*pkey = PyUnicode_AsEncodedString(key, NULL, "surrogatepass");
|
return PyUnicode_AsEncodedString(key, NULL, "surrogatepass");
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
if (PyBytes_Check(key))
|
if (PyBytes_Check(key))
|
||||||
{
|
{
|
||||||
Py_INCREF(key);
|
Py_INCREF(key);
|
||||||
return 1;
|
return key;
|
||||||
}
|
}
|
||||||
if (UNLIKELY(PyBool_Check(key)))
|
if (UNLIKELY(PyBool_Check(key)))
|
||||||
{
|
{
|
||||||
*pkey = PyBytes_FromString(key == Py_True ? "true" : "false");
|
return PyBytes_FromString(key == Py_True ? "true" : "false");
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
if (UNLIKELY(key == Py_None))
|
if (UNLIKELY(key == Py_None))
|
||||||
{
|
{
|
||||||
*pkey = PyBytes_FromString("null");
|
return PyBytes_FromString("null");
|
||||||
return 1;
|
|
||||||
}
|
}
|
||||||
key = PyObject_Str(key);
|
PyObject* keystr = PyObject_Str(key);
|
||||||
if (!key)
|
if (!keystr)
|
||||||
{
|
{
|
||||||
PRINTMARK();
|
PRINTMARK();
|
||||||
return -1;
|
return NULL;
|
||||||
}
|
}
|
||||||
*pkey = PyUnicode_AsEncodedString(key, NULL, "surrogatepass");
|
key = PyUnicode_AsEncodedString(keystr, NULL, "surrogatepass");
|
||||||
Py_DECREF(key);
|
Py_DECREF(keystr);
|
||||||
return 1;
|
return key;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
|
static int Dict_iterNext(JSOBJ obj, JSONTypeContext *tc)
|
||||||
{
|
{
|
||||||
Py_CLEAR(GET_TC(tc)->itemName);
|
PyObject* key;
|
||||||
if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &GET_TC(tc)->itemName,
|
if (!PyDict_Next(GET_TC(tc)->dictObj, &GET_TC(tc)->index, &key, &GET_TC(tc)->itemValue))
|
||||||
&GET_TC(tc)->itemValue))
|
|
||||||
{
|
{
|
||||||
PRINTMARK();
|
PRINTMARK();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (Dict_convertKey(&GET_TC(tc)->itemName) < 0)
|
Py_XDECREF(GET_TC(tc)->itemName);
|
||||||
|
GET_TC(tc)->itemName = Dict_convertKey(key);
|
||||||
|
if (!GET_TC(tc)->itemName)
|
||||||
{
|
{
|
||||||
GET_TC(tc)->itemName = NULL; // itemName is not owned at this point
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
PRINTMARK();
|
PRINTMARK();
|
||||||
@ -281,59 +277,25 @@ static char *Dict_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
|
|||||||
|
|
||||||
static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
|
static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
|
||||||
{
|
{
|
||||||
PyObject *items = NULL, *item = NULL, *key = NULL, *value = NULL;
|
|
||||||
Py_ssize_t i, nitems;
|
|
||||||
|
|
||||||
// Upon first call, obtain a list of the keys and sort them. This follows the same logic as the
|
// Upon first call, obtain a list of the keys and sort them. This follows the same logic as the
|
||||||
// standard library's _json.c sort_keys handler.
|
// standard library's _json.c sort_keys handler.
|
||||||
if (GET_TC(tc)->newObj == NULL)
|
if (GET_TC(tc)->newObj == NULL)
|
||||||
{
|
{
|
||||||
// Obtain the list of keys from the dictionary.
|
// Obtain the list of keys from the dictionary.
|
||||||
items = PyMapping_Keys(GET_TC(tc)->dictObj);
|
PyObject *keys = PyDict_Keys(GET_TC(tc)->dictObj);
|
||||||
if (items == NULL)
|
if (keys == NULL)
|
||||||
{
|
{
|
||||||
goto error;
|
return -1;
|
||||||
}
|
}
|
||||||
else if (!PyList_Check(items))
|
|
||||||
{
|
|
||||||
PyErr_SetString(PyExc_ValueError, "keys must return list");
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort the list.
|
// Sort the list.
|
||||||
if (PyList_Sort(items) < 0)
|
if (PyList_Sort(keys) < 0)
|
||||||
{
|
{
|
||||||
PyErr_SetString(PyExc_ValueError, "unorderable keys");
|
Py_DECREF(keys);
|
||||||
goto error;
|
return -1;
|
||||||
}
|
}
|
||||||
|
// Store the sorted list of keys in the newObj slot.
|
||||||
// Obtain the value for each key, and pack a list of (key, value) 2-tuples.
|
GET_TC(tc)->newObj = keys;
|
||||||
nitems = PyList_GET_SIZE(items);
|
GET_TC(tc)->size = PyList_GET_SIZE(keys);
|
||||||
for (i = 0; i < nitems; i++)
|
|
||||||
{
|
|
||||||
key = PyList_GET_ITEM(items, i);
|
|
||||||
value = PyDict_GetItem(GET_TC(tc)->dictObj, key);
|
|
||||||
|
|
||||||
if (Dict_convertKey(&key) < 0)
|
|
||||||
{
|
|
||||||
key = NULL; // key is not owned at this point
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
item = PyTuple_Pack(2, key, value);
|
|
||||||
if (item == NULL)
|
|
||||||
{
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (PyList_SetItem(items, i, item))
|
|
||||||
{
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
Py_DECREF(key);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store the sorted list of tuples in the newObj slot.
|
|
||||||
GET_TC(tc)->newObj = items;
|
|
||||||
GET_TC(tc)->size = nitems;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (GET_TC(tc)->index >= GET_TC(tc)->size)
|
if (GET_TC(tc)->index >= GET_TC(tc)->size)
|
||||||
@ -342,26 +304,20 @@ static int SortedDict_iterNext(JSOBJ obj, JSONTypeContext *tc)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
item = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index);
|
PyObject* key = PyList_GET_ITEM(GET_TC(tc)->newObj, GET_TC(tc)->index);
|
||||||
GET_TC(tc)->itemName = PyTuple_GET_ITEM(item, 0);
|
Py_XDECREF(GET_TC(tc)->itemName);
|
||||||
GET_TC(tc)->itemValue = PyTuple_GET_ITEM(item, 1);
|
GET_TC(tc)->itemName = Dict_convertKey(key);
|
||||||
GET_TC(tc)->index++;
|
if (!GET_TC(tc)->itemName)
|
||||||
return 1;
|
{
|
||||||
|
|
||||||
error:
|
|
||||||
Py_XDECREF(item);
|
|
||||||
Py_XDECREF(key);
|
|
||||||
Py_XDECREF(value);
|
|
||||||
Py_XDECREF(items);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
GET_TC(tc)->itemValue = PyDict_GetItem(GET_TC(tc)->dictObj, key);
|
||||||
static void SortedDict_iterEnd(JSOBJ obj, JSONTypeContext *tc)
|
if (!GET_TC(tc)->itemValue)
|
||||||
{
|
{
|
||||||
GET_TC(tc)->itemName = NULL;
|
return -1;
|
||||||
GET_TC(tc)->itemValue = NULL;
|
}
|
||||||
Py_DECREF(GET_TC(tc)->dictObj);
|
GET_TC(tc)->index++;
|
||||||
PRINTMARK();
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc)
|
static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder *enc)
|
||||||
@ -369,14 +325,13 @@ static void SetupDictIter(PyObject *dictObj, TypeContext *pc, JSONObjectEncoder
|
|||||||
pc->dictObj = dictObj;
|
pc->dictObj = dictObj;
|
||||||
if (enc->sortKeys)
|
if (enc->sortKeys)
|
||||||
{
|
{
|
||||||
pc->iterEnd = SortedDict_iterEnd;
|
|
||||||
pc->iterNext = SortedDict_iterNext;
|
pc->iterNext = SortedDict_iterNext;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
pc->iterEnd = Dict_iterEnd;
|
|
||||||
pc->iterNext = Dict_iterNext;
|
pc->iterNext = Dict_iterNext;
|
||||||
}
|
}
|
||||||
|
pc->iterEnd = Dict_iterEnd;
|
||||||
pc->iterGetValue = Dict_iterGetValue;
|
pc->iterGetValue = Dict_iterGetValue;
|
||||||
pc->iterGetName = Dict_iterGetName;
|
pc->iterGetName = Dict_iterGetName;
|
||||||
pc->index = 0;
|
pc->index = 0;
|
||||||
|
@ -576,6 +576,13 @@ def test_sort_keys():
|
|||||||
assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}'
|
assert sorted_keys == '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}'
|
||||||
|
|
||||||
|
|
||||||
|
def test_sort_keys_unordered():
|
||||||
|
data = {"a": 1, 1: 2, None: 3}
|
||||||
|
assert ujson.dumps(data) == '{"a":1,"1":2,"null":3}'
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
ujson.dumps(data, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"test_input",
|
"test_input",
|
||||||
[
|
[
|
||||||
|
Loading…
Reference in New Issue
Block a user