1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-12-04 19:08:21 +01:00

NaN and Inf in loads - Port of Pandas #30295

This commit is contained in:
joncrall 2022-04-03 15:33:01 -04:00
parent b3c96d0bd9
commit f090103b31
No known key found for this signature in database
GPG Key ID: BE04D092BDD81C0D
4 changed files with 78 additions and 4 deletions

@ -166,6 +166,8 @@ enum JSTYPES
JT_ARRAY, // Array structure
JT_OBJECT, // Key/Value structure
JT_INVALID, // Internal, do not return nor expect
JT_POS_INF, // Positive infinity
JT_NEG_INF, // Negative infinity
};
typedef void * JSOBJ;
@ -321,6 +323,8 @@ typedef struct __JSONObjectDecoder
JSOBJ (*newTrue)(void *prv);
JSOBJ (*newFalse)(void *prv);
JSOBJ (*newNull)(void *prv);
JSOBJ (*newPosInf)(void *prv);
JSOBJ (*newNegInf)(void *prv);
JSOBJ (*newObject)(void *prv);
JSOBJ (*newArray)(void *prv);
JSOBJ (*newInt)(void *prv, JSINT32 value);

@ -98,10 +98,16 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_numeric (struct DecoderState *ds
JSUINT64 overflowLimit = LLONG_MAX;
if (*(offset) == '-')
{
offset ++;
if (*(offset) == 'I') {
goto DECODE_INF;
} else if (*(offset) == 'N') {
goto DECODE_NAN;
} else if (*(offset) == '-') {
offset++;
intNeg = -1;
if (*(offset) == 'I') {
goto DECODE_INF;
}
overflowLimit = LLONG_MIN;
}
@ -189,6 +195,48 @@ BREAK_INT_LOOP:
{
return ds->dec->newInt(ds->prv, (JSINT32) (intValue * intNeg));
}
DECODE_NAN:
offset++;
if (*(offset++) != 'a') goto SET_NAN_ERROR;
if (*(offset++) != 'N') goto SET_NAN_ERROR;
ds->lastType = JT_NULL;
ds->start = offset;
return ds->dec->newNull(ds->prv);
SET_NAN_ERROR:
return SetError(ds, -1, "Unexpected character found when decoding 'NaN'");
DECODE_INF:
offset++;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'f') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 'n') goto SET_INF_ERROR;
if (*(offset++) != 'i') goto SET_INF_ERROR;
if (*(offset++) != 't') goto SET_INF_ERROR;
if (*(offset++) != 'y') goto SET_INF_ERROR;
ds->start = offset;
if (intNeg == 1) {
ds->lastType = JT_POS_INF;
return ds->dec->newPosInf(ds->prv);
} else {
ds->lastType = JT_NEG_INF;
return ds->dec->newNegInf(ds->prv);
}
SET_INF_ERROR:
if (intNeg == 1) {
const char *msg = "Unexpected character found when decoding 'Infinity'";
return SetError(ds, -1, msg);
} else {
const char *msg = "Unexpected character found when decoding '-Infinity'";
return SetError(ds, -1, msg);
}
}
static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_true ( struct DecoderState *ds)
@ -732,6 +780,8 @@ static FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_any(struct DecoderState *ds)
case '7':
case '8':
case '9':
case 'I':
case 'N':
case '-':
return decode_numeric (ds);

@ -79,6 +79,16 @@ static JSOBJ Object_newNull(void *prv)
Py_RETURN_NONE;
}
JSOBJ Object_newPosInf(void *prv)
{
return PyFloat_FromDouble(Py_HUGE_VAL);
}
JSOBJ Object_newNegInf(void *prv)
{
return PyFloat_FromDouble(-Py_HUGE_VAL);
}
static JSOBJ Object_newObject(void *prv)
{
return PyDict_New();
@ -129,6 +139,8 @@ PyObject* JSONToObj(PyObject* self, PyObject *args, PyObject *kwargs)
Object_newTrue,
Object_newFalse,
Object_newNull,
Object_newPosInf,
Object_newNegInf,
Object_newObject,
Object_newArray,
Object_newInteger,

@ -670,6 +670,14 @@ def test_encode_raises_allow_nan(test_input, expected_exception):
ujson.dumps(test_input, allow_nan=False)
def test_nan_inf_support():
text = '["a", NaN, "NaN", Infinity, "Infinity", -Infinity, "-Infinity"]'
data = ujson.loads(text)
expected = ["a", float('nan'), "NaN", float('inf'), "Infinity", -float('inf'), "-Infinity"]
for a, b in zip(data, expected):
assert a == b or a is b
@pytest.mark.parametrize(
"test_input",
[