From f090103b31849257ab64d2edc895d1934bfbee1a Mon Sep 17 00:00:00 2001 From: joncrall Date: Sun, 3 Apr 2022 15:33:01 -0400 Subject: [PATCH] NaN and Inf in loads - Port of Pandas #30295 --- lib/ultrajson.h | 4 ++++ lib/ultrajsondec.c | 58 +++++++++++++++++++++++++++++++++++++++++---- python/JSONtoObj.c | 12 ++++++++++ tests/test_ujson.py | 8 +++++++ 4 files changed, 78 insertions(+), 4 deletions(-) diff --git a/lib/ultrajson.h b/lib/ultrajson.h index 51136ea..1547aac 100644 --- a/lib/ultrajson.h +++ b/lib/ultrajson.h @@ -166,6 +166,8 @@ enum JSTYPES JT_ARRAY, // Array structure JT_OBJECT, // Key/Value structure JT_INVALID, // Internal, do not return nor expect + JT_POS_INF, // Positive infinity + JT_NEG_INF, // Negative infinity }; typedef void * JSOBJ; @@ -321,6 +323,8 @@ typedef struct __JSONObjectDecoder JSOBJ (*newTrue)(void *prv); JSOBJ (*newFalse)(void *prv); JSOBJ (*newNull)(void *prv); + JSOBJ (*newPosInf)(void *prv); + JSOBJ (*newNegInf)(void *prv); JSOBJ (*newObject)(void *prv); JSOBJ (*newArray)(void *prv); JSOBJ (*newInt)(void *prv, JSINT32 value); diff --git a/lib/ultrajsondec.c b/lib/ultrajsondec.c index 05b1452..2fad562 100644 --- a/lib/ultrajsondec.c +++ b/lib/ultrajsondec.c @@ -98,10 +98,16 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds JSUINT64 overflowLimit = LLONG_MAX; - if (*(offset) == '-') - { - offset ++; - intNeg = -1; + if (*(offset) == 'I') { + goto DECODE_INF; + } else if (*(offset) == 'N') { + goto DECODE_NAN; + } else if (*(offset) == '-') { + offset++; + intNeg = -1; + if (*(offset) == 'I') { + goto DECODE_INF; + } overflowLimit = LLONG_MIN; } @@ -189,6 +195,48 @@ BREAK_INT_LOOP: { return ds->dec->newInt(ds->prv, (JSINT32) (intValue * intNeg)); } + +DECODE_NAN: + offset++; + if (*(offset++) != 'a') goto SET_NAN_ERROR; + if (*(offset++) != 'N') goto SET_NAN_ERROR; + + ds->lastType = JT_NULL; + ds->start = offset; + return ds->dec->newNull(ds->prv); + +SET_NAN_ERROR: + return SetError(ds, -1, "Unexpected character found when decoding 'NaN'"); + +DECODE_INF: + offset++; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'f') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 'n') goto SET_INF_ERROR; + if (*(offset++) != 'i') goto SET_INF_ERROR; + if (*(offset++) != 't') goto SET_INF_ERROR; + if (*(offset++) != 'y') goto SET_INF_ERROR; + + ds->start = offset; + + if (intNeg == 1) { + ds->lastType = JT_POS_INF; + return ds->dec->newPosInf(ds->prv); + } else { + ds->lastType = JT_NEG_INF; + return ds->dec->newNegInf(ds->prv); + } + +SET_INF_ERROR: + if (intNeg == 1) { + const char *msg = "Unexpected character found when decoding 'Infinity'"; + return SetError(ds, -1, msg); + } else { + const char *msg = "Unexpected character found when decoding '-Infinity'"; + return SetError(ds, -1, msg); + } + } static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds) @@ -732,6 +780,8 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds) case '7': case '8': case '9': + case 'I': + case 'N': case '-': return decode_numeric (ds); diff --git a/python/JSONtoObj.c b/python/JSONtoObj.c index f2a1a2c..a5085ef 100644 --- a/python/JSONtoObj.c +++ b/python/JSONtoObj.c @@ -79,6 +79,16 @@ static JSOBJ Object_newNull(void *prv) Py_RETURN_NONE; } +JSOBJ Object_newPosInf(void *prv) +{ + return PyFloat_FromDouble(Py_HUGE_VAL); +} + +JSOBJ Object_newNegInf(void *prv) +{ + return PyFloat_FromDouble(-Py_HUGE_VAL); +} + static JSOBJ Object_newObject(void *prv) { return PyDict_New(); @@ -129,6 +139,8 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs) Object_newTrue, Object_newFalse, Object_newNull, + Object_newPosInf, + Object_newNegInf, Object_newObject, Object_newArray, Object_newInteger, diff --git a/tests/test_ujson.py b/tests/test_ujson.py index ddc065f..d1c4573 100644 --- a/tests/test_ujson.py +++ b/tests/test_ujson.py @@ -670,6 +670,14 @@ def test_encode_raises_allow_nan(test_input, expected_exception): ujson.dumps(test_input, allow_nan=False) +def test_nan_inf_support(): + text = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]' + data = ujson.loads(text) + expected = ["a", float('nan'), "NaN", float('inf'), "Infinity", -float('inf'), "-Infinity"] + for a, b in zip(data, expected): + assert a == b or a is b + + @pytest.mark.parametrize( "test_input", [