1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-12-04 06:38:23 +01:00

Add separators encoding parameter

Closes #283
This commit is contained in:
JustAnotherArchivist 2022-06-27 22:54:49 +00:00 committed by Brénainn Woodsend
parent b02bb27eac
commit 8a946e5830
4 changed files with 132 additions and 15 deletions

@ -268,6 +268,13 @@ typedef struct __JSONObjectEncoder
If true, bytes are rejected. */
int rejectBytes;
/*
Configuration for item and key separators, e.g. "," and ":" for a compact representation or ", " and ": " to match the Python standard library's defaults. */
size_t itemSeparatorLength;
const char *itemSeparatorChars;
size_t keySeparatorLength;
const char *keySeparatorChars;
/*
Private pointer to be used by the caller. Passed as encoder_prv in JSONTypeContext */
void *prv;

@ -677,8 +677,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
if (name)
{
// 2 extra for the colon and optional space after it
Buffer_Reserve(enc, RESERVE_STRING(cbName) + 2);
Buffer_Reserve(enc, RESERVE_STRING(cbName) + enc->keySeparatorLength);
Buffer_AppendCharUnchecked(enc, '\"');
if (enc->forceASCII)
@ -698,11 +697,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
Buffer_AppendCharUnchecked(enc, '\"');
Buffer_AppendCharUnchecked (enc, ':');
if (enc->indent)
{
Buffer_AppendCharUnchecked (enc, ' ');
}
Buffer_memcpy(enc, enc->keySeparatorChars, enc->keySeparatorLength);
}
tc.encoder_prv = enc->prv;
@ -741,12 +736,12 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
while (enc->iterNext(obj, &tc))
{
// The extra 2 bytes cover the comma and (optional) newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
// The extra 1 byte covers the optional newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + enc->itemSeparatorLength + 1);
if (count > 0)
{
Buffer_AppendCharUnchecked (enc, ',');
Buffer_memcpy(enc, enc->itemSeparatorChars, enc->itemSeparatorLength);
}
Buffer_AppendIndentNewlineUnchecked (enc);
@ -786,8 +781,8 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
while ((res = enc->iterNext(obj, &tc)))
{
// The extra 2 bytes cover the comma and optional newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + 2);
// The extra 1 byte covers the optional newline.
Buffer_Reserve (enc, enc->indent * (enc->level + 1) + enc->itemSeparatorLength + 1);
if(res < 0)
{
@ -799,7 +794,7 @@ static void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t c
if (count > 0)
{
Buffer_AppendCharUnchecked (enc, ',');
Buffer_memcpy(enc, enc->itemSeparatorChars, enc->itemSeparatorLength);
}
Buffer_AppendIndentNewlineUnchecked (enc);

@ -794,7 +794,7 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", NULL };
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "allow_nan", "reject_bytes", "default", "separators", NULL };
char buffer[65536];
char *ret;
@ -806,6 +806,11 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyObject *oescapeForwardSlashes = NULL;
PyObject *osortKeys = NULL;
PyObject *odefaultFn = NULL;
PyObject *oseparators = NULL;
PyObject *oseparatorsItem = NULL;
PyObject *separatorsItemBytes = NULL;
PyObject *oseparatorsKey = NULL;
PyObject *separatorsKeyBytes = NULL;
int allowNan = -1;
int orejectBytes = -1;
size_t retLen;
@ -834,13 +839,17 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
0, //indent
1, //allowNan
1, //rejectBytes
0, //itemSeparatorLength
NULL, //itemSeparatorChars
0, //keySeparatorLength
NULL, //keySeparatorChars
NULL, //prv
};
PRINTMARK();
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiiiOO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &allowNan, &orejectBytes, &odefaultFn, &oseparators))
{
return NULL;
}
@ -887,6 +896,69 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
encoder.rejectBytes = orejectBytes;
}
if (oseparators != NULL && oseparators != Py_None)
{
if (!PyTuple_Check(oseparators))
{
PyErr_SetString(PyExc_TypeError, "expected tuple or None as separator");
return NULL;
}
if (PyTuple_Size (oseparators) != 2)
{
PyErr_SetString(PyExc_ValueError, "expected tuple of size 2 as separator");
return NULL;
}
oseparatorsItem = PyTuple_GetItem(oseparators, 0);
if (PyErr_Occurred())
{
return NULL;
}
if (!PyUnicode_Check(oseparatorsItem))
{
PyErr_SetString(PyExc_TypeError, "expected str as item separator");
return NULL;
}
oseparatorsKey = PyTuple_GetItem(oseparators, 1);
if (PyErr_Occurred())
{
return NULL;
}
if (!PyUnicode_Check(oseparatorsKey))
{
PyErr_SetString(PyExc_TypeError, "expected str as key separator");
return NULL;
}
encoder.itemSeparatorChars = PyUnicodeToUTF8Raw(oseparatorsItem, &encoder.itemSeparatorLength, &separatorsItemBytes);
if (encoder.itemSeparatorChars == NULL)
{
PyErr_SetString(PyExc_ValueError, "item separator malformed");
goto ERROR;
}
encoder.keySeparatorChars = PyUnicodeToUTF8Raw(oseparatorsKey, &encoder.keySeparatorLength, &separatorsKeyBytes);
if (encoder.keySeparatorChars == NULL)
{
PyErr_SetString(PyExc_ValueError, "key separator malformed");
goto ERROR;
}
}
else
{
// Default to most compact representation
encoder.itemSeparatorChars = ",";
encoder.itemSeparatorLength = 1;
if (encoder.indent)
{
// Extra space when indentation is in use
encoder.keySeparatorChars = ": ";
encoder.keySeparatorLength = 2;
}
else
{
encoder.keySeparatorChars = ":";
encoder.keySeparatorLength = 1;
}
}
encoder.d2s = NULL;
dconv_d2s_init(&encoder.d2s, DCONV_D2S_EMIT_TRAILING_DECIMAL_POINT | DCONV_D2S_EMIT_TRAILING_ZERO_AFTER_POINT | DCONV_D2S_EMIT_POSITIVE_EXPONENT_SIGN,
csInf, csNan, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);
@ -896,6 +968,8 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PRINTMARK();
dconv_d2s_free(&encoder.d2s);
Py_XDECREF(separatorsItemBytes);
Py_XDECREF(separatorsKeyBytes);
if (encoder.errorMsg && !PyErr_Occurred())
{
@ -923,6 +997,11 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PRINTMARK();
return newobj;
ERROR:
Py_XDECREF(separatorsItemBytes);
Py_XDECREF(separatorsKeyBytes);
return NULL;
}
PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs)

@ -1088,6 +1088,42 @@ def test_no_memory_leak_encoding_errors(input):
no_memory_leak(f"functools.partial(ujson.dumps, {input})")
@pytest.mark.parametrize(
"separators, expected",
[
(None, '{"a":0,"b":1}'),
((",", ":"), '{"a":0,"b":1}'),
((", ", ": "), '{"a": 0, "b": 1}'),
# And some weird values, even though they produce invalid JSON
(("\u203d", "\u00a1"), '{"a"\u00a10\u203d"b"\u00a11}'),
(("i\x00", "k\x00"), '{"a"k\x000i\x00"b"k\x001}'),
(("\udc80", "\udc81"), '{"a"\udc810\udc80"b"\udc811}'),
],
)
def test_separators(separators, expected):
assert ujson.dumps({"a": 0, "b": 1}, separators=separators) == expected
@pytest.mark.parametrize(
"separators, expected_exception",
[
(True, TypeError),
(0, TypeError),
(b"", TypeError),
((), ValueError),
((",",), ValueError),
((",", ":", "x"), ValueError),
((True, 0), TypeError),
((",", True), TypeError),
((True, ":"), TypeError),
((b",", b":"), TypeError),
],
)
def test_separators_errors(separators, expected_exception):
with pytest.raises(expected_exception):
ujson.dumps({"a": 0, "b": 1}, separators=separators)
"""
def test_decode_numeric_int_frc_overflow():
input = "X.Y"