1
0
mirror of https://github.com/ultrajson/ultrajson.git synced 2024-11-24 05:12:02 +01:00

- Speed optimizations for Python decoder

- Changed inline optimization options
- Fixed empty array and object decoder bug
- Removed some buffer macros in encoder
This commit is contained in:
Jonas Tärnström 2011-03-03 16:55:07 +01:00
parent f7fa7bc062
commit 1ed0bc18ec
9 changed files with 215 additions and 86 deletions

@ -338,10 +338,6 @@
RelativePath=".\main.cpp"
>
</File>
<File
RelativePath="..\python\objToJSON.c"
>
</File>
</Filter>
<Filter
Name="Header Files"

File diff suppressed because one or more lines are too long

@ -46,7 +46,7 @@ if __name__ == "__main__":
import timeit
print "Ready? Configure affinity and priority, starting in 20..."
time.sleep(20)
#time.sleep(20)
COUNT = 10000
print "ujson encode : %.05f calls/sec" % (COUNT / min(timeit.repeat("ujsonEnc()", "from __main__ import ujsonEnc", time.clock,10, COUNT)), )

Binary file not shown.

@ -409,7 +409,25 @@ static void Object_beginTypeContext (PyObject *obj, JSONTypeContext *tc)
{
TypeContext *pc = (TypeContext *) tc->prv;
PyObject *toDictFunc;
memset (pc, 0, sizeof (TypeContext));
tc->prv[0] = 0;
tc->prv[1] = 0;
tc->prv[2] = 0;
tc->prv[3] = 0;
tc->prv[4] = 0;
tc->prv[5] = 0;
tc->prv[6] = 0;
tc->prv[7] = 0;
tc->prv[8] = 0;
tc->prv[9] = 0;
tc->prv[10] = 0;
tc->prv[11] = 0;
tc->prv[12] = 0;
tc->prv[13] = 0;
tc->prv[14] = 0;
tc->prv[15] = 0;
//memset (pc, 0, sizeof (TypeContext));
if (PyIter_Check(obj))
{
@ -626,16 +644,12 @@ char *Object_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen)
}
PyObject* objToJSON(PyObject* self, PyObject *arg)
{
char buffer[65536];
char *ret;
PyObject *newobj;
JSONObjectEncoder encoder =
{
Object_beginTypeContext, //void (*beginTypeContext)(JSOBJ obj, JSONTypeContext *tc);
@ -653,7 +667,7 @@ PyObject* objToJSON(PyObject* self, PyObject *arg)
PyObject_Realloc, //JSPFN_REALLOC realloc;
PyObject_Free, //JSPFN_FREE free;
-1, //recursionMax
-1, //default decimal precision
5, //default decimal precision
};
ret = JSON_EncodeObject (arg, &encoder, buffer, sizeof (buffer));

@ -23,6 +23,21 @@ class UltraJSONTests(TestCase):
self.assertEquals(round(input, 5), round(ujson.decode(output), 5))
pass
def test_encodeArrayOfNestedArrays(self):
input = [[[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]], [[[]]] ]
output = ujson.encode(input)
self.assertEquals(input, json.loads(output))
#self.assertEquals(output, json.dumps(input))
self.assertEquals(input, ujson.decode(output))
def test_encodeArrayOfDoubles(self):
input = [ 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337, 31337.31337 ]
output = ujson.encode(input)
self.assertEquals(input, json.loads(output))
#self.assertEquals(output, json.dumps(input))
self.assertEquals(input, ujson.decode(output))
def test_encodeStringConversion(self):
input = "A string \\ \/ \b \f \n \r \t"
@ -33,8 +48,9 @@ class UltraJSONTests(TestCase):
pass
def test_encodeArrayInArray(self):
input = "[[[[]]]]"
input = [[[[]]]]
output = ujson.encode(input)
self.assertEquals(input, json.loads(output))
self.assertEquals(output, json.dumps(input))
self.assertEquals(input, ujson.decode(output))
@ -367,7 +383,23 @@ class UltraJSONTests(TestCase):
def test_decodeNumericFloatNan(self):
pass
"""
# Should fail!
def test_decodeDictWithNoKey(self):
input = "{{{{31337}}}}"
raise NotImplementedError("Implement this test!")
# Should fail!
def test_decodeDictWithNoColonOrValue(self):
input = "{{{{"key"}}}}"
raise NotImplementedError("Implement this test!")
# Should fail!
def test_decodeDictWithNoValue(self):
input = "{{{{"key":}}}}"
raise NotImplementedError("Implement this test!")
"""
if __name__ == "__main__":

@ -135,11 +135,16 @@ typedef __int64 JSLONG;
typedef unsigned __int64 JSULONG;
#define EXPORTFUNCTION __declspec(dllexport)
#define FASTCALL_MSVC __fastcall
#define FASTCALL_ATTR
#define INLINE_PREFIX __inline
/*
#define FASTCALL_MSVC
#define FASTCALL_ATTR
#define INLINE_PREFIX
*/
typedef unsigned __int32 uint32_t;
#else
@ -149,7 +154,7 @@ typedef u_int64_t JSULONG;
#define FASTCALL_MSVC
#define FASTCALL_ATTR __attribute__((fastcall))
#define INLINE_PREFIX inline
typedef u_int32_t uint32_t;
#define EXPORTFUNCTION
@ -174,7 +179,7 @@ typedef void * JSITER;
typedef struct __JSONTypeContext
{
int type;
char prv[sizeof(void *) * 15];
void *prv[15];
} JSONTypeContext;
/*

@ -472,6 +472,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_array( struct DecoderState *ds)
if ((*ds->start) == ']')
{
*ds->start ++;
return newObj;
}
@ -521,6 +522,7 @@ FASTCALL_ATTR JSOBJ FASTCALL_MSVC decode_object( struct DecoderState *ds)
if ((*ds->start) == '}')
{
ds->start ++;
return newObj;
}

@ -41,10 +41,6 @@ Copyright (c) 2007 Nick Galbreath -- nickg [at] modp [dot] com. All rights rese
#include <malloc.h>
#include <math.h>
void FASTCALL_MSVC Buffer_Realloc (JSONObjectEncoder *enc) FASTCALL_ATTR;
void FASTCALL_MSVC Buffer_Escape (JSONObjectEncoder *enc, char *inputOffset) FASTCALL_ATTR;
#ifndef TRUE
#define TRUE 1
#endif
@ -53,7 +49,92 @@ void FASTCALL_MSVC Buffer_Escape (JSONObjectEncoder *enc, char *inputOffset) FAS
#endif
FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendEscape(JSONObjectEncoder *enc, const char *_pstr, size_t _len)
//#define __LINE_PROFILER__
#ifdef __LINE_PROFILER__
unsigned int g_profLines[1000] = { 0 };
#define PROFILE_MARK() g_profLines[__LINE__] ++;
#else
#define PROFILE_MARK()
#endif
/*
FIXME: Keep track of how big these get across several encoder calls and try to make an estimate
Thay way we won't run our head into the wall each call */
void Buffer_Realloc (JSONObjectEncoder *enc, size_t cbNeeded)
{
size_t curSize = enc->end - enc->start;
size_t newSize = curSize * 2;
size_t offset = enc->offset - enc->start;
PROFILE_MARK();
while (newSize < curSize + cbNeeded)
{
newSize *= 2;
}
if (enc->heap)
{
enc->start = (char *) enc->realloc (enc->start, newSize);
}
else
{
char *oldStart = enc->start;
enc->heap = 1;
enc->start = (char *) enc->malloc (newSize);
memcpy (enc->start, oldStart, offset);
}
enc->offset = enc->start + offset;
enc->end = enc->start + newSize;
}
void Buffer_Escape (JSONObjectEncoder *enc, char *inputOffset)
{
PROFILE_MARK();
//FIXME: Encode '\uXXXX' here
while (1)
{
switch (*inputOffset)
{
case '\0': return;
case '\"': *(enc->offset++) = '\\'; *(enc->offset++) = '\"';break;
case '\\': *(enc->offset++) = '\\'; *(enc->offset++) = '\\';break;
/*
NOTE: The RFC says escape solidus but none of the reference encoders does so.
We don't do it either now ;)
case '/': *(enc->offset++) = '\\'; *(enc->offset++) = '/';break;
*/
case '\b': *(enc->offset++) = '\\'; *(enc->offset++) = 'b';break;
case '\f': *(enc->offset++) = '\\'; *(enc->offset++) = 'f';break;
case '\n': *(enc->offset++) = '\\'; *(enc->offset++) = 'n';break;
case '\r': *(enc->offset++) = '\\'; *(enc->offset++) = 'r';break;
case '\t': *(enc->offset++) = '\\'; *(enc->offset++) = 't';break;
default: (*enc->offset++) = *(inputOffset); break;
}
inputOffset ++;
}
}
#define Buffer_Reserve(__enc, __len) \
if ((__enc)->offset + (__len) > (__enc)->end) \
{ \
Buffer_Realloc((__enc), (__len));\
} \
#define Buffer_AppendCharUnchecked(__enc, __chr) \
*((__enc)->offset++) = __chr; \
/*
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendEscape(JSONObjectEncoder *enc, const char *_pstr, size_t _len)
{
while (enc->offset + ((_len * 2) + 2) > enc->end)
{
@ -65,14 +146,15 @@ FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendEscape(JSONObjectEncoder *enc, con
*(enc->offset++) = '\"';
}
FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendEscapeUnchecked(JSONObjectEncoder *enc, const char *_pstr)
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_AppendEscapeUnchecked(JSONObjectEncoder *enc, const char *_pstr)
{
*(enc->offset++) = '\"';
Buffer_Escape(enc, (char *)_pstr);
*(enc->offset++) = '\"';
}
FASTCALL_ATTR void FASTCALL_MSVC Buffer_Append(JSONObjectEncoder *enc, const char *_pstr, size_t _len)
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_Append(JSONObjectEncoder *enc, const char *_pstr, size_t _len)
{
while (enc->offset + _len > enc->end)
{
@ -82,16 +164,14 @@ FASTCALL_ATTR void FASTCALL_MSVC Buffer_Append(JSONObjectEncoder *enc, const cha
enc->offset += _len;
}
FASTCALL_ATTR void FASTCALL_MSVC Buffer_Reserve(JSONObjectEncoder *enc, size_t _len)
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC Buffer_Reserve(JSONObjectEncoder *enc, size_t _len)
{
while (enc->offset + _len > enc->end)
{
Buffer_Realloc(enc);
}
}
#define Buffer_AppendCharUnchecked(__enc, __chr) \
*((__enc)->offset++) = __chr; \
*/
@ -102,18 +182,22 @@ FASTCALL_ATTR void FASTCALL_MSVC Buffer_Reserve(JSONObjectEncoder *enc, size_t _
*/
static const double g_pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000};
FASTCALL_ATTR void FASTCALL_MSVC strreverse(char* begin, char* end)
FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char* begin, char* end)
{
char aux;
PROFILE_MARK();
while (end > begin)
aux = *end, *end-- = *begin, *begin++ = aux;
}
FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSLONG value)
void Buffer_AppendIntUnchecked(JSONObjectEncoder *enc, JSLONG value)
{
char* wstr;
JSULONG uvalue = (value < 0) ? -value : value;
wstr = enc->offset;
JSULONG uvalue = (value < 0) ? -value : value;
PROFILE_MARK();
wstr = enc->offset;
// Conversion. Number is reversed.
do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
if (value < 0) *wstr++ = '-';
@ -123,7 +207,7 @@ FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendIntUnchecked(JSONObjectEncoder *en
enc->offset += (wstr - (enc->offset));
}
FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendDoubleUnchecked(JSONObjectEncoder *enc, double value)
void Buffer_AppendDoubleUnchecked(JSONObjectEncoder *enc, double value)
{
/* if input is larger than thres_max, revert to exponential */
const double thres_max = (double)(0x7FFFFFFF);
@ -136,6 +220,8 @@ FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendDoubleUnchecked(JSONObjectEncoder
uint32_t frac;
int neg;
PROFILE_MARK();
/* Hacky test for NaN
* under -fast-math this won't work, but then you also won't
* have correct nan values anyways. The alternative is
@ -256,58 +342,9 @@ FASTCALL_ATTR void FASTCALL_MSVC Buffer_AppendDoubleUnchecked(JSONObjectEncoder
}
/*
FIXME: Keep track of how big these get across several encoder calls and try to make an estimate
Thay way we won't run our head into the wall each call */
FASTCALL_ATTR void FASTCALL_MSVC Buffer_Realloc (JSONObjectEncoder *enc)
{
size_t newSize = enc->end - enc->start;
size_t offset = enc->offset - enc->start;
newSize *= 2;
if (enc->heap)
{
enc->start = (char *) enc->realloc (enc->start, newSize);
}
else
{
char *oldStart = enc->start;
enc->heap = 1;
enc->start = (char *) enc->malloc (newSize);
memcpy (enc->start, oldStart, offset);
}
enc->offset = enc->start + offset;
enc->end = enc->start + newSize;
}
FASTCALL_ATTR void FASTCALL_MSVC Buffer_Escape (JSONObjectEncoder *enc, char *inputOffset)
{
//FIXME: Encode '\uXXXX' here
while (1)
{
switch (*inputOffset)
{
case '\0': return;
case '\"': *(enc->offset++) = '\\'; *(enc->offset++) = '\"';break;
case '\\': *(enc->offset++) = '\\'; *(enc->offset++) = '\\';break;
/*
NOTE: The RFC says escape solidus but none of the reference encoders does so.
We don't do it either now ;)
case '/': *(enc->offset++) = '\\'; *(enc->offset++) = '/';break;
*/
case '\b': *(enc->offset++) = '\\'; *(enc->offset++) = 'b';break;
case '\f': *(enc->offset++) = '\\'; *(enc->offset++) = 'f';break;
case '\n': *(enc->offset++) = '\\'; *(enc->offset++) = 'n';break;
case '\r': *(enc->offset++) = '\\'; *(enc->offset++) = 'r';break;
case '\t': *(enc->offset++) = '\\'; *(enc->offset++) = 't';break;
default: (*enc->offset++) = *(inputOffset); break;
}
inputOffset ++;
}
}
/*
FIXME:
@ -329,6 +366,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
JSONTypeContext tc;
size_t szlen;
PROFILE_MARK();
if (enc->level > enc->recursionMax)
{
SetError (obj, enc, "Maximum recursion level reached");
@ -346,7 +385,10 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
if (name)
{
Buffer_AppendEscapeUnchecked (enc, name);
Buffer_AppendCharUnchecked(enc, '\"');
Buffer_Escape(enc, name);
Buffer_AppendCharUnchecked(enc, '\"');
Buffer_AppendCharUnchecked (enc, ':');
#ifndef JSON_NO_EXTRA_WHITESPACE
Buffer_AppendCharUnchecked (enc, ' ');
@ -365,6 +407,9 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
{
int count = 0;
JSOBJ iterObj;
PROFILE_MARK();
enc->iterBegin(obj, &tc);
Buffer_AppendCharUnchecked (enc, '[');
@ -396,6 +441,9 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
int count = 0;
JSOBJ iterObj;
char *objName;
PROFILE_MARK();
enc->iterBegin(obj, &tc);
Buffer_AppendCharUnchecked (enc, '{');
@ -425,12 +473,16 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
case JT_INTEGER:
{
PROFILE_MARK();
Buffer_AppendIntUnchecked (enc, enc->getLongValue(obj, &tc));
break;
}
case JT_TRUE:
{
PROFILE_MARK();
Buffer_AppendCharUnchecked (enc, 't');
Buffer_AppendCharUnchecked (enc, 'r');
Buffer_AppendCharUnchecked (enc, 'u');
@ -441,6 +493,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
case JT_FALSE:
{
//Buffer_AppendUnchecked (buffer, "false", 5);
PROFILE_MARK();
Buffer_AppendCharUnchecked (enc, 'f');
Buffer_AppendCharUnchecked (enc, 'a');
Buffer_AppendCharUnchecked (enc, 'l');
@ -453,6 +507,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
case JT_NULL:
{
//Buffer_AppendUnchecked(buffer, "null", 4);
PROFILE_MARK();
Buffer_AppendCharUnchecked (enc, 'n');
Buffer_AppendCharUnchecked (enc, 'u');
Buffer_AppendCharUnchecked (enc, 'l');
@ -462,6 +518,8 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
case JT_DOUBLE:
{
PROFILE_MARK();
Buffer_AppendDoubleUnchecked (enc, enc->getDoubleValue(obj, &tc));
break;
}
@ -469,7 +527,12 @@ void encode(JSOBJ obj, JSONObjectEncoder *enc, const char *name, size_t cbName)
case JT_UTF8:
{
const char *value = enc->getStringValue(obj, &tc, &szlen);
Buffer_AppendEscape(enc, value, szlen);
PROFILE_MARK();
Buffer_Reserve(enc, (szlen * 2) + 2);
Buffer_AppendCharUnchecked (enc, '\"');
Buffer_Escape(enc, value);
Buffer_AppendCharUnchecked (enc, '\"');
break;
}
}
@ -519,6 +582,21 @@ char *JSON_EncodeObject(JSOBJ obj, JSONObjectEncoder *enc, char *_buffer, size_t
encode (obj, enc, NULL, 0);
Buffer_Append(enc, "\0", 1);
Buffer_Reserve(enc, 1);
Buffer_AppendCharUnchecked(enc, '\0');
#ifdef __LINE_PROFILER__
{
int index;
for (index = 0; index < 1000; index ++)
{
if (g_profLines[index] > 0)
fprintf (stderr, "%d %u\n", index, g_profLines[index]);
}
getchar();
}
#endif
return enc->start;
}