mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-11-24 05:12:02 +01:00
- Added forceAscii option to encoder to control if output is forced to be ascii (<128) or not.
- Added ensure_ascii kwargs to encode/dumps (true is default). Use ensure_ascii=false to allow UTF_8 strings to be outputted, should be faster and more space efficient. - Bumped version
This commit is contained in:
parent
6738e59af1
commit
e962ae68e2
@ -662,11 +662,15 @@ char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
PyObject* objToJSON(PyObject* self, PyObject *arg)
|
PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
|
static char *kwlist[] = { "ensure_ascii", NULL};
|
||||||
|
|
||||||
char buffer[65536];
|
char buffer[65536];
|
||||||
char *ret;
|
char *ret;
|
||||||
PyObject *newobj;
|
PyObject *newobj;
|
||||||
|
PyObject *oinput = NULL;
|
||||||
|
PyObject *oensureAscii = NULL;
|
||||||
|
|
||||||
JSONObjectEncoder encoder =
|
JSONObjectEncoder encoder =
|
||||||
{
|
{
|
||||||
@ -687,9 +691,24 @@ PyObject* objToJSON(PyObject* self, PyObject *arg)
|
|||||||
PyObject_Free, //JSPFN_FREE free;
|
PyObject_Free, //JSPFN_FREE free;
|
||||||
-1, //recursionMax
|
-1, //recursionMax
|
||||||
5, //default decimal precision
|
5, //default decimal precision
|
||||||
|
1, //forceAscii
|
||||||
};
|
};
|
||||||
|
|
||||||
|
PRINTMARK();
|
||||||
|
|
||||||
ret = JSON_EncodeObject (arg, &encoder, buffer, sizeof (buffer));
|
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &oinput, &oensureAscii))
|
||||||
|
{
|
||||||
|
return PyErr_Format(PyExc_TypeError, "Expected object, **kw ensure_ascii true/false");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii))
|
||||||
|
{
|
||||||
|
encoder.forceASCII = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
PRINTMARK();
|
||||||
|
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer));
|
||||||
|
PRINTMARK();
|
||||||
|
|
||||||
|
|
||||||
if (encoder.errorMsg)
|
if (encoder.errorMsg)
|
||||||
@ -710,10 +729,12 @@ PyObject* objToJSON(PyObject* self, PyObject *arg)
|
|||||||
encoder.free (ret);
|
encoder.free (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PRINTMARK();
|
||||||
|
|
||||||
return newobj;
|
return newobj;
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs)
|
||||||
{
|
{
|
||||||
PyObject *data;
|
PyObject *data;
|
||||||
PyObject *file;
|
PyObject *file;
|
||||||
@ -721,6 +742,8 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
|||||||
PyObject *write;
|
PyObject *write;
|
||||||
PyObject *argtuple;
|
PyObject *argtuple;
|
||||||
|
|
||||||
|
PRINTMARK();
|
||||||
|
|
||||||
if (!PyArg_ParseTuple (args, "OO", &data, &file)) {
|
if (!PyArg_ParseTuple (args, "OO", &data, &file)) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
@ -739,11 +762,14 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
string = objToJSON (self, data);
|
argtuple = PyTuple_Pack(1, data);
|
||||||
|
|
||||||
|
string = objToJSON (self, argtuple, kwargs);
|
||||||
|
|
||||||
if (string == NULL)
|
if (string == NULL)
|
||||||
{
|
{
|
||||||
Py_XDECREF(write);
|
Py_XDECREF(write);
|
||||||
|
Py_XDECREF(argtuple);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -763,6 +789,11 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
|||||||
Py_XDECREF(write);
|
Py_XDECREF(write);
|
||||||
Py_XDECREF(argtuple);
|
Py_XDECREF(argtuple);
|
||||||
Py_XDECREF(string);
|
Py_XDECREF(string);
|
||||||
|
|
||||||
|
PRINTMARK();
|
||||||
|
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,6 +214,14 @@ class UltraJSONTests(TestCase):
|
|||||||
self.assertEquals(int(expected), json.loads(output))
|
self.assertEquals(int(expected), json.loads(output))
|
||||||
self.assertEquals(int(expected), ujson.decode(output))
|
self.assertEquals(int(expected), ujson.decode(output))
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def test_encodeToUTF8(self):
|
||||||
|
input = "\xe6\x97\xa5\xd1\x88"
|
||||||
|
enc = ujson.encode(input, ensure_ascii=false)
|
||||||
|
dec = ujson.decode(enc)
|
||||||
|
self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
|
||||||
|
self.assertEquals(dec, json.loads(enc))
|
||||||
|
|
||||||
|
|
||||||
def test_encodeRecursionMax(self):
|
def test_encodeRecursionMax(self):
|
||||||
# 8 is the max recursion depth
|
# 8 is the max recursion depth
|
||||||
|
@ -16,11 +16,11 @@ PyObject* JSONFileToObj(PyObject* self, PyObject *file);
|
|||||||
|
|
||||||
|
|
||||||
static PyMethodDef ujsonMethods[] = {
|
static PyMethodDef ujsonMethods[] = {
|
||||||
{"encode", objToJSON, METH_O, "Converts arbitrary object recursivly into JSON"},
|
{"encode", objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"},
|
||||||
{"decode", JSONToObj, METH_O, "Converts JSON as string to dict object structure"},
|
{"decode", JSONToObj, METH_O, "Converts JSON as string to dict object structure"},
|
||||||
{"dumps", objToJSON, METH_O, "Converts arbitrary object recursivly into JSON"},
|
{"dumps", objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"},
|
||||||
{"loads", JSONToObj, METH_O, "Converts JSON as string to dict object structure"},
|
{"loads", JSONToObj, METH_O, "Converts JSON as string to dict object structure"},
|
||||||
{"dump", objToJSONFile, METH_VARARGS, "Converts arbitrary object recursively into JSON file"},
|
{"dump", objToJSONFile, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON file. Use ensure_ascii=false to output UTF-8"},
|
||||||
{"load", JSONFileToObj, METH_O, "Converts JSON as file to dict object structure"},
|
{"load", JSONFileToObj, METH_O, "Converts JSON as file to dict object structure"},
|
||||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||||
};
|
};
|
||||||
|
@ -1 +1 @@
|
|||||||
#define UJSON_VERSION "1.6"
|
#define UJSON_VERSION "1.7"
|
||||||
|
10
ultrajson.h
10
ultrajson.h
@ -165,6 +165,10 @@ typedef void *(*JSPFN_MALLOC)(size_t size);
|
|||||||
typedef void (*JSPFN_FREE)(void *pptr);
|
typedef void (*JSPFN_FREE)(void *pptr);
|
||||||
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
|
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
typedef int (*JSPFN_ESCAPESTRING)(JSOBJ obj, void *enc, const char *io, const char *end);
|
||||||
|
|
||||||
typedef struct __JSONObjectEncoder
|
typedef struct __JSONObjectEncoder
|
||||||
{
|
{
|
||||||
void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
|
void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
|
||||||
@ -224,6 +228,10 @@ typedef struct __JSONObjectEncoder
|
|||||||
Configuration for max decimals of double floating poiunt numbers to encode (0-9) */
|
Configuration for max decimals of double floating poiunt numbers to encode (0-9) */
|
||||||
int doublePrecision;
|
int doublePrecision;
|
||||||
|
|
||||||
|
/*
|
||||||
|
If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */
|
||||||
|
int forceASCII;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Set to an error message if error occured */
|
Set to an error message if error occured */
|
||||||
@ -237,6 +245,8 @@ typedef struct __JSONObjectEncoder
|
|||||||
int heap;
|
int heap;
|
||||||
int level;
|
int level;
|
||||||
|
|
||||||
|
JSPFN_ESCAPESTRING EscapeString;
|
||||||
|
|
||||||
} JSONObjectEncoder;
|
} JSONObjectEncoder;
|
||||||
|
|
||||||
|
|
||||||
|
103
ultrajsonenc.c
103
ultrajsonenc.c
@ -52,17 +52,20 @@ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights rese
|
|||||||
|
|
||||||
static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
|
static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
|
||||||
static const char g_hexChars[] = "0123456789abcdef";
|
static const char g_hexChars[] = "0123456789abcdef";
|
||||||
static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\";
|
static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/";
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands.
|
FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands.
|
||||||
Needs a cleanup and more documentation */
|
Needs a cleanup and more documentation */
|
||||||
static const JSUINT8 g_utf8LengthLookup[256] =
|
|
||||||
|
/*
|
||||||
|
Table for pure ascii output escaping all characters above 127 to \u00XXX */
|
||||||
|
static const JSUINT8 g_asciiOutputTable[256] =
|
||||||
{
|
{
|
||||||
/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30,
|
/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30,
|
||||||
/* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
/* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||||
/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24,
|
||||||
/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||||
/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1,
|
/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1,
|
||||||
@ -78,6 +81,7 @@ static const JSUINT8 g_utf8LengthLookup[256] =
|
|||||||
/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message)
|
static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message)
|
||||||
{
|
{
|
||||||
enc->errorMsg = message;
|
enc->errorMsg = message;
|
||||||
@ -121,6 +125,71 @@ FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (c
|
|||||||
*(outputOffset++) = g_hexChars[(value & 0x000f) >> 0];
|
*(outputOffset++) = g_hexChars[(value & 0x000f) >> 0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int Buffer_EscapeStringUnvalidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
|
||||||
|
{
|
||||||
|
char *of = (char *) enc->offset;
|
||||||
|
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
switch (*io)
|
||||||
|
{
|
||||||
|
case 0x00:
|
||||||
|
enc->offset += (of - enc->offset);
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
case '\"': (*of++) = '\\'; (*of++) = '\"'; break;
|
||||||
|
case '\\': (*of++) = '\\'; (*of++) = '\\'; break;
|
||||||
|
//case '/': (*of++) = '\\'; (*of++) = '/'; break;
|
||||||
|
case '\b': (*of++) = '\\'; (*of++) = 'b'; break;
|
||||||
|
case '\f': (*of++) = '\\'; (*of++) = 'f'; break;
|
||||||
|
case '\n': (*of++) = '\\'; (*of++) = 'n'; break;
|
||||||
|
case '\r': (*of++) = '\\'; (*of++) = 'r'; break;
|
||||||
|
case '\t': (*of++) = '\\'; (*of++) = 't'; break;
|
||||||
|
|
||||||
|
case 0x01:
|
||||||
|
case 0x02:
|
||||||
|
case 0x03:
|
||||||
|
case 0x04:
|
||||||
|
case 0x05:
|
||||||
|
case 0x06:
|
||||||
|
case 0x07:
|
||||||
|
case 0x0b:
|
||||||
|
case 0x0e:
|
||||||
|
case 0x0f:
|
||||||
|
case 0x10:
|
||||||
|
case 0x11:
|
||||||
|
case 0x12:
|
||||||
|
case 0x13:
|
||||||
|
case 0x14:
|
||||||
|
case 0x15:
|
||||||
|
case 0x16:
|
||||||
|
case 0x17:
|
||||||
|
case 0x18:
|
||||||
|
case 0x19:
|
||||||
|
case 0x1a:
|
||||||
|
case 0x1b:
|
||||||
|
case 0x1c:
|
||||||
|
case 0x1d:
|
||||||
|
case 0x1e:
|
||||||
|
case 0x1f:
|
||||||
|
*(of++) = '\\';
|
||||||
|
*(of++) = 'u';
|
||||||
|
*(of++) = '0';
|
||||||
|
*(of++) = '0';
|
||||||
|
*(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)];
|
||||||
|
*(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)];
|
||||||
|
break;
|
||||||
|
|
||||||
|
default: (*of++) = (*io); break;
|
||||||
|
}
|
||||||
|
|
||||||
|
*io++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
FIXME:
|
FIXME:
|
||||||
This code only works with Little and Big Endian
|
This code only works with Little and Big Endian
|
||||||
@ -128,24 +197,17 @@ This code only works with Little and Big Endian
|
|||||||
FIXME: The JSON spec says escape "/" but non of the others do and we don't
|
FIXME: The JSON spec says escape "/" but non of the others do and we don't
|
||||||
want to be left alone doing it so we don't :)
|
want to be left alone doing it so we don't :)
|
||||||
|
|
||||||
FIXME: It should be faster to do SHIFT and then AND instead of AND and SHIFT.
|
|
||||||
Example:
|
|
||||||
(x & 0x3f00) >> 8) => Longer/more opcodes than below
|
|
||||||
(x >> 8) & 0x3f) => Probably faster/smaller
|
|
||||||
Seems that atleast MSVC9 does this optimization by itself from time to time. Not sure really
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
|
||||||
int Buffer_EscapeString (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
|
|
||||||
{
|
{
|
||||||
JSUTF32 ucs;
|
JSUTF32 ucs;
|
||||||
char *of = (char *) enc->offset;
|
char *of = (char *) enc->offset;
|
||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
|
|
||||||
//JSUINT8 chr = (unsigned char) *io;
|
//JSUINT8 chr = (unsigned char) *io;
|
||||||
JSUINT8 utflen = g_utf8LengthLookup[(unsigned char) *io];
|
JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io];
|
||||||
|
|
||||||
switch (utflen)
|
switch (utflen)
|
||||||
{
|
{
|
||||||
@ -277,6 +339,7 @@ int Buffer_EscapeString (JSOBJ obj, JSONObjectEncoder *enc, const char *io, cons
|
|||||||
case 18:
|
case 18:
|
||||||
case 20:
|
case 20:
|
||||||
case 22:
|
case 22:
|
||||||
|
//case 24: (enable for / escaping)
|
||||||
*(of++) = *( (char *) (g_escapeChars + utflen + 0));
|
*(of++) = *( (char *) (g_escapeChars + utflen + 0));
|
||||||
*(of++) = *( (char *) (g_escapeChars + utflen + 1));
|
*(of++) = *( (char *) (g_escapeChars + utflen + 1));
|
||||||
io ++;
|
io ++;
|
||||||
@ -539,7 +602,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
|
|||||||
if (name)
|
if (name)
|
||||||
{
|
{
|
||||||
Buffer_AppendCharUnchecked(enc, '\"');
|
Buffer_AppendCharUnchecked(enc, '\"');
|
||||||
if (!Buffer_EscapeString(obj, enc, name, name + cbName))
|
if (!enc->EscapeString(obj, enc, name, name + cbName))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -680,7 +743,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
|
|||||||
Buffer_Reserve(enc, ((szlen / 4) + 1) * 12);
|
Buffer_Reserve(enc, ((szlen / 4) + 1) * 12);
|
||||||
Buffer_AppendCharUnchecked (enc, '\"');
|
Buffer_AppendCharUnchecked (enc, '\"');
|
||||||
|
|
||||||
if (!Buffer_EscapeString(obj, enc, value, value + szlen))
|
if (!enc->EscapeString(obj, enc, value, value + szlen))
|
||||||
{
|
{
|
||||||
enc->endTypeContext(obj, &tc);
|
enc->endTypeContext(obj, &tc);
|
||||||
enc->level --;
|
enc->level --;
|
||||||
@ -717,6 +780,16 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t
|
|||||||
enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS;
|
enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (enc->forceASCII)
|
||||||
|
{
|
||||||
|
enc->EscapeString = Buffer_EscapeStringValidated;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
enc->EscapeString = Buffer_EscapeStringUnvalidated;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (_buffer == NULL)
|
if (_buffer == NULL)
|
||||||
{
|
{
|
||||||
_cbBuffer = 32768;
|
_cbBuffer = 32768;
|
||||||
|
Loading…
Reference in New Issue
Block a user