mirror of
https://github.com/ultrajson/ultrajson.git
synced 2024-11-24 01:04:19 +01:00
- Added forceAscii option to encoder to control if output is forced to be ascii (<128) or not.
- Added ensure_ascii kwargs to encode/dumps (true is default). Use ensure_ascii=false to allow UTF_8 strings to be outputted, should be faster and more space efficient. - Bumped version
This commit is contained in:
parent
6738e59af1
commit
e962ae68e2
@ -662,11 +662,15 @@ char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
|
||||
}
|
||||
|
||||
|
||||
PyObject* objToJSON(PyObject* self, PyObject *arg)
|
||||
PyObject* objToJSON(PyObject* self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *kwlist[] = { "ensure_ascii", NULL};
|
||||
|
||||
char buffer[65536];
|
||||
char *ret;
|
||||
PyObject *newobj;
|
||||
PyObject *oinput = NULL;
|
||||
PyObject *oensureAscii = NULL;
|
||||
|
||||
JSONObjectEncoder encoder =
|
||||
{
|
||||
@ -687,9 +691,24 @@ PyObject* objToJSON(PyObject* self, PyObject *arg)
|
||||
PyObject_Free, //JSPFN_FREE free;
|
||||
-1, //recursionMax
|
||||
5, //default decimal precision
|
||||
1, //forceAscii
|
||||
};
|
||||
|
||||
PRINTMARK();
|
||||
|
||||
ret = JSON_EncodeObject (arg, &encoder, buffer, sizeof (buffer));
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|O", kwlist, &oinput, &oensureAscii))
|
||||
{
|
||||
return PyErr_Format(PyExc_TypeError, "Expected object, **kw ensure_ascii true/false");
|
||||
}
|
||||
|
||||
if (oensureAscii != NULL && !PyObject_IsTrue(oensureAscii))
|
||||
{
|
||||
encoder.forceASCII = 0;
|
||||
}
|
||||
|
||||
PRINTMARK();
|
||||
ret = JSON_EncodeObject (oinput, &encoder, buffer, sizeof (buffer));
|
||||
PRINTMARK();
|
||||
|
||||
|
||||
if (encoder.errorMsg)
|
||||
@ -710,10 +729,12 @@ PyObject* objToJSON(PyObject* self, PyObject *arg)
|
||||
encoder.free (ret);
|
||||
}
|
||||
|
||||
PRINTMARK();
|
||||
|
||||
return newobj;
|
||||
}
|
||||
|
||||
PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
||||
PyObject* objToJSONFile(PyObject* self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *data;
|
||||
PyObject *file;
|
||||
@ -721,6 +742,8 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
||||
PyObject *write;
|
||||
PyObject *argtuple;
|
||||
|
||||
PRINTMARK();
|
||||
|
||||
if (!PyArg_ParseTuple (args, "OO", &data, &file)) {
|
||||
return NULL;
|
||||
}
|
||||
@ -739,11 +762,14 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
string = objToJSON (self, data);
|
||||
argtuple = PyTuple_Pack(1, data);
|
||||
|
||||
string = objToJSON (self, argtuple, kwargs);
|
||||
|
||||
if (string == NULL)
|
||||
{
|
||||
Py_XDECREF(write);
|
||||
Py_XDECREF(argtuple);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -763,6 +789,11 @@ PyObject* objToJSONFile(PyObject* self, PyObject *args)
|
||||
Py_XDECREF(write);
|
||||
Py_XDECREF(argtuple);
|
||||
Py_XDECREF(string);
|
||||
|
||||
PRINTMARK();
|
||||
|
||||
Py_RETURN_NONE;
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -214,6 +214,14 @@ class UltraJSONTests(TestCase):
|
||||
self.assertEquals(int(expected), json.loads(output))
|
||||
self.assertEquals(int(expected), ujson.decode(output))
|
||||
pass
|
||||
|
||||
def test_encodeToUTF8(self):
|
||||
input = "\xe6\x97\xa5\xd1\x88"
|
||||
enc = ujson.encode(input, ensure_ascii=false)
|
||||
dec = ujson.decode(enc)
|
||||
self.assertEquals(enc, json.dumps(input, encoding="utf-8"))
|
||||
self.assertEquals(dec, json.loads(enc))
|
||||
|
||||
|
||||
def test_encodeRecursionMax(self):
|
||||
# 8 is the max recursion depth
|
||||
|
@ -16,11 +16,11 @@ PyObject* JSONFileToObj(PyObject* self, PyObject *file);
|
||||
|
||||
|
||||
static PyMethodDef ujsonMethods[] = {
|
||||
{"encode", objToJSON, METH_O, "Converts arbitrary object recursivly into JSON"},
|
||||
{"encode", objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"},
|
||||
{"decode", JSONToObj, METH_O, "Converts JSON as string to dict object structure"},
|
||||
{"dumps", objToJSON, METH_O, "Converts arbitrary object recursivly into JSON"},
|
||||
{"dumps", objToJSON, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursivly into JSON. Use ensure_ascii=false to output UTF-8"},
|
||||
{"loads", JSONToObj, METH_O, "Converts JSON as string to dict object structure"},
|
||||
{"dump", objToJSONFile, METH_VARARGS, "Converts arbitrary object recursively into JSON file"},
|
||||
{"dump", objToJSONFile, METH_VARARGS | METH_KEYWORDS, "Converts arbitrary object recursively into JSON file. Use ensure_ascii=false to output UTF-8"},
|
||||
{"load", JSONFileToObj, METH_O, "Converts JSON as file to dict object structure"},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
@ -1 +1 @@
|
||||
#define UJSON_VERSION "1.6"
|
||||
#define UJSON_VERSION "1.7"
|
||||
|
10
ultrajson.h
10
ultrajson.h
@ -165,6 +165,10 @@ typedef void *(*JSPFN_MALLOC)(size_t size);
|
||||
typedef void (*JSPFN_FREE)(void *pptr);
|
||||
typedef void *(*JSPFN_REALLOC)(void *base, size_t size);
|
||||
|
||||
|
||||
|
||||
typedef int (*JSPFN_ESCAPESTRING)(JSOBJ obj, void *enc, const char *io, const char *end);
|
||||
|
||||
typedef struct __JSONObjectEncoder
|
||||
{
|
||||
void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
|
||||
@ -224,6 +228,10 @@ typedef struct __JSONObjectEncoder
|
||||
Configuration for max decimals of double floating poiunt numbers to encode (0-9) */
|
||||
int doublePrecision;
|
||||
|
||||
/*
|
||||
If true output will be ASCII with all characters above 127 encoded as \uXXXX. If false output will be UTF-8 or what ever charset strings are brought as */
|
||||
int forceASCII;
|
||||
|
||||
|
||||
/*
|
||||
Set to an error message if error occured */
|
||||
@ -237,6 +245,8 @@ typedef struct __JSONObjectEncoder
|
||||
int heap;
|
||||
int level;
|
||||
|
||||
JSPFN_ESCAPESTRING EscapeString;
|
||||
|
||||
} JSONObjectEncoder;
|
||||
|
||||
|
||||
|
103
ultrajsonenc.c
103
ultrajsonenc.c
@ -52,17 +52,20 @@ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights rese
|
||||
|
||||
static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
|
||||
static const char g_hexChars[] = "0123456789abcdef";
|
||||
static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\";
|
||||
static const char g_escapeChars[] = "0123456789\\b\\t\\n\\f\\r\\\"\\\\\\/";
|
||||
|
||||
|
||||
/*
|
||||
FIXME: While this is fine dandy and working it's a magic value mess which probably only the author understands.
|
||||
Needs a cleanup and more documentation */
|
||||
static const JSUINT8 g_utf8LengthLookup[256] =
|
||||
|
||||
/*
|
||||
Table for pure ascii output escaping all characters above 127 to \u00XXX */
|
||||
static const JSUINT8 g_asciiOutputTable[256] =
|
||||
{
|
||||
/* 0x00 */ 0, 30, 30, 30, 30, 30, 30, 30, 10, 12, 14, 30, 16, 18, 30, 30,
|
||||
/* 0x10 */ 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30,
|
||||
/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 0x20 */ 1, 1, 20, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 24,
|
||||
/* 0x30 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 0x40 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
/* 0x50 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 22, 1, 1, 1,
|
||||
@ -78,6 +81,7 @@ static const JSUINT8 g_utf8LengthLookup[256] =
|
||||
/* 0xf0 */ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
|
||||
};
|
||||
|
||||
|
||||
static void SetError (JSOBJ obj, JSONObjectEncoder *enc, const char *message)
|
||||
{
|
||||
enc->errorMsg = message;
|
||||
@ -121,6 +125,71 @@ FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendShortHexUnchecked (c
|
||||
*(outputOffset++) = g_hexChars[(value & 0x000f) >> 0];
|
||||
}
|
||||
|
||||
int Buffer_EscapeStringUnvalidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
|
||||
{
|
||||
char *of = (char *) enc->offset;
|
||||
|
||||
while (1)
|
||||
{
|
||||
switch (*io)
|
||||
{
|
||||
case 0x00:
|
||||
enc->offset += (of - enc->offset);
|
||||
return TRUE;
|
||||
|
||||
case '\"': (*of++) = '\\'; (*of++) = '\"'; break;
|
||||
case '\\': (*of++) = '\\'; (*of++) = '\\'; break;
|
||||
//case '/': (*of++) = '\\'; (*of++) = '/'; break;
|
||||
case '\b': (*of++) = '\\'; (*of++) = 'b'; break;
|
||||
case '\f': (*of++) = '\\'; (*of++) = 'f'; break;
|
||||
case '\n': (*of++) = '\\'; (*of++) = 'n'; break;
|
||||
case '\r': (*of++) = '\\'; (*of++) = 'r'; break;
|
||||
case '\t': (*of++) = '\\'; (*of++) = 't'; break;
|
||||
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
case 0x06:
|
||||
case 0x07:
|
||||
case 0x0b:
|
||||
case 0x0e:
|
||||
case 0x0f:
|
||||
case 0x10:
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
case 0x13:
|
||||
case 0x14:
|
||||
case 0x15:
|
||||
case 0x16:
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
case 0x1b:
|
||||
case 0x1c:
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
*(of++) = '\\';
|
||||
*(of++) = 'u';
|
||||
*(of++) = '0';
|
||||
*(of++) = '0';
|
||||
*(of++) = g_hexChars[ (unsigned char) (((*io) & 0xf0) >> 4)];
|
||||
*(of++) = g_hexChars[ (unsigned char) ((*io) & 0x0f)];
|
||||
break;
|
||||
|
||||
default: (*of++) = (*io); break;
|
||||
}
|
||||
|
||||
*io++;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
FIXME:
|
||||
This code only works with Little and Big Endian
|
||||
@ -128,24 +197,17 @@ This code only works with Little and Big Endian
|
||||
FIXME: The JSON spec says escape "/" but non of the others do and we don't
|
||||
want to be left alone doing it so we don't :)
|
||||
|
||||
FIXME: It should be faster to do SHIFT and then AND instead of AND and SHIFT.
|
||||
Example:
|
||||
(x & 0x3f00) >> 8) => Longer/more opcodes than below
|
||||
(x >> 8) & 0x3f) => Probably faster/smaller
|
||||
Seems that atleast MSVC9 does this optimization by itself from time to time. Not sure really
|
||||
|
||||
*/
|
||||
|
||||
int Buffer_EscapeString (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
|
||||
int Buffer_EscapeStringValidated (JSOBJ obj, JSONObjectEncoder *enc, const char *io, const char *end)
|
||||
{
|
||||
JSUTF32 ucs;
|
||||
char *of = (char *) enc->offset;
|
||||
|
||||
|
||||
while (1)
|
||||
{
|
||||
|
||||
//JSUINT8 chr = (unsigned char) *io;
|
||||
JSUINT8 utflen = g_utf8LengthLookup[(unsigned char) *io];
|
||||
JSUINT8 utflen = g_asciiOutputTable[(unsigned char) *io];
|
||||
|
||||
switch (utflen)
|
||||
{
|
||||
@ -277,6 +339,7 @@ int Buffer_EscapeString (JSOBJ obj, JSONObjectEncoder *enc, const char *io, cons
|
||||
case 18:
|
||||
case 20:
|
||||
case 22:
|
||||
//case 24: (enable for / escaping)
|
||||
*(of++) = *( (char *) (g_escapeChars + utflen + 0));
|
||||
*(of++) = *( (char *) (g_escapeChars + utflen + 1));
|
||||
io ++;
|
||||
@ -539,7 +602,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
|
||||
if (name)
|
||||
{
|
||||
Buffer_AppendCharUnchecked(enc, '\"');
|
||||
if (!Buffer_EscapeString(obj, enc, name, name + cbName))
|
||||
if (!enc->EscapeString(obj, enc, name, name + cbName))
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -680,7 +743,7 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
|
||||
Buffer_Reserve(enc, ((szlen / 4) + 1) * 12);
|
||||
Buffer_AppendCharUnchecked (enc, '\"');
|
||||
|
||||
if (!Buffer_EscapeString(obj, enc, value, value + szlen))
|
||||
if (!enc->EscapeString(obj, enc, value, value + szlen))
|
||||
{
|
||||
enc->endTypeContext(obj, &tc);
|
||||
enc->level --;
|
||||
@ -717,6 +780,16 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t
|
||||
enc->doublePrecision = JSON_DOUBLE_MAX_DECIMALS;
|
||||
}
|
||||
|
||||
if (enc->forceASCII)
|
||||
{
|
||||
enc->EscapeString = Buffer_EscapeStringValidated;
|
||||
}
|
||||
else
|
||||
{
|
||||
enc->EscapeString = Buffer_EscapeStringUnvalidated;
|
||||
}
|
||||
|
||||
|
||||
if (_buffer == NULL)
|
||||
{
|
||||
_cbBuffer = 32768;
|
||||
|
Loading…
Reference in New Issue
Block a user