1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-11-23 12:32:02 +01:00

new reject_bytes option to raise on bytes

raise TypeError when encountering bytes in ujson.dumps() to prevent
unexpected Unicode exceptions in production.
Fixes #264
This commit is contained in:
Eric Le Lay 2017-06-11 11:58:10 +02:00
parent 7d0f4fb7e9
commit ad280fd99e
4 changed files with 29 additions and 5 deletions

@ -261,6 +261,10 @@ typedef struct __JSONObjectEncoder
Configuration for spaces of indent */
int indent;
/*
If true, bytes are rejetected. */
int rejectBytes;
/*
Private pointer to be used by the caller. Passed as encoder_prv in JSONTypeContext */
void *prv;

@ -540,8 +540,16 @@ static void Object_beginTypeContext (JSOBJ _obj, JSONTypeContext *tc, JSONObject
if (PyString_Check(obj))
{
PRINTMARK();
pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8;
return;
if (enc->rejectBytes)
{
PyErr_Format (PyExc_TypeError, "reject_bytes is on and '%s' is bytes", PyString_AS_STRING(obj));
goto INVALID;
}
else
{
pc->PyTypeToJSON = PyStringToUTF8; tc->type = JT_UTF8;
return;
}
}
else
if (PyUnicode_Check(obj))
@ -745,7 +753,7 @@ static char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", NULL };
static char *kwlist[] = { "obj", "ensure_ascii", "encode_html_chars", "escape_forward_slashes", "sort_keys", "indent", "reject_bytes", NULL };
char buffer[65536];
char *ret;
@ -755,6 +763,7 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
PyObject *oencodeHTMLChars = NULL;
PyObject *oescapeForwardSlashes = NULL;
PyObject *osortKeys = NULL;
PyObject *orejectBytes = NULL;
JSONObjectEncoder encoder =
{
@ -779,13 +788,14 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
1, //escapeForwardSlashes
0, //sortKeys
0, //indent
0, //rejectBytes
NULL, //prv
};
PRINTMARK();
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOi", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent))
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|OOOOiO", kwlist, &oinput, &oensureAscii, &oencodeHTMLChars, &oescapeForwardSlashes, &osortKeys, &encoder.indent, &orejectBytes))
{
return NULL;
}
@ -810,6 +820,11 @@ PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
encoder.sortKeys = 1;
}
if (orejectBytes != NULL && PyObject_IsTrue(orejectBytes))
{
encoder.rejectBytes = 1;
}
dconv_d2s_init(DCONV_D2S_EMIT_TRAILING_DECIMAL_POINT | DCONV_D2S_EMIT_TRAILING_ZERO_AFTER_POINT,
NULL, NULL, 'e', DCONV_DECIMAL_IN_SHORTEST_LOW, DCONV_DECIMAL_IN_SHORTEST_HIGH, 0, 0);

@ -53,7 +53,7 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs);
PyObject* JSONFileToObj(PyObject* self, PyObject *args, PyObject *kwargs);
#define ENCODER_HELP_TEXT "Use ensure_ascii=false to output UTF-8. Pass in double_precision to alter the maximum digit precision of doubles. Set encode_html_chars=True to encode < > & as unicode escape sequences. Set escape_forward_slashes=False to prevent escaping / characters."
#define ENCODER_HELP_TEXT "Use ensure_ascii=false to output UTF-8. Pass in double_precision to alter the maximum digit precision of doubles. Set encode_html_chars=True to encode < > & as unicode escape sequences. Set escape_forward_slashes=False to prevent escaping / characters. Set reject_bytes=True to raise TypeErro on bytes."
static PyMethodDef ujsonMethods[] = {
{"encode", (PyCFunction) objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON. " ENCODER_HELP_TEXT},

@ -818,6 +818,11 @@ class UltraJSONTests(unittest.TestCase):
sortedKeys = ujson.dumps(data, sort_keys=True)
self.assertEqual(sortedKeys, '{"a":1,"b":1,"c":1,"d":1,"e":1,"f":1}')
def test_reject_bytes_mode(self):
data = {"a": "b".encode('utf8')}
self.assertEqual(ujson.dumps(data), '{"a":"b"}')
self.assertRaises(TypeError, ujson.dumps, data, reject_bytes=True)
"""
def test_decodeNumericIntFrcOverflow(self):
input = "X.Y"