You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
343 lines
11 KiB
343 lines
11 KiB
""" |
|
Implementation of JSONDecoder |
|
""" |
|
import re |
|
import sys |
|
|
|
from django.utils.simplejson.scanner import Scanner, pattern |
|
try: |
|
from django.utils.simplejson._speedups import scanstring as c_scanstring |
|
except ImportError: |
|
pass |
|
|
|
FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
|
|
|
def _floatconstants(): |
|
import struct |
|
import sys |
|
_BYTES = '7FF80000000000007FF0000000000000'.decode('hex') |
|
if sys.byteorder != 'big': |
|
_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] |
|
nan, inf = struct.unpack('dd', _BYTES) |
|
return nan, inf, -inf |
|
|
|
NaN, PosInf, NegInf = _floatconstants() |
|
|
|
|
|
def linecol(doc, pos): |
|
lineno = doc.count('\n', 0, pos) + 1 |
|
if lineno == 1: |
|
colno = pos |
|
else: |
|
colno = pos - doc.rindex('\n', 0, pos) |
|
return lineno, colno |
|
|
|
|
|
def errmsg(msg, doc, pos, end=None): |
|
lineno, colno = linecol(doc, pos) |
|
if end is None: |
|
return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) |
|
endlineno, endcolno = linecol(doc, end) |
|
return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( |
|
msg, lineno, colno, endlineno, endcolno, pos, end) |
|
|
|
|
|
_CONSTANTS = { |
|
'-Infinity': NegInf, |
|
'Infinity': PosInf, |
|
'NaN': NaN, |
|
'true': True, |
|
'false': False, |
|
'null': None, |
|
} |
|
|
|
def JSONConstant(match, context, c=_CONSTANTS): |
|
s = match.group(0) |
|
fn = getattr(context, 'parse_constant', None) |
|
if fn is None: |
|
rval = c[s] |
|
else: |
|
rval = fn(s) |
|
return rval, None |
|
pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
|
|
|
|
|
def JSONNumber(match, context): |
|
match = JSONNumber.regex.match(match.string, *match.span()) |
|
integer, frac, exp = match.groups() |
|
if frac or exp: |
|
fn = getattr(context, 'parse_float', None) or float |
|
res = fn(integer + (frac or '') + (exp or '')) |
|
else: |
|
fn = getattr(context, 'parse_int', None) or int |
|
res = fn(integer) |
|
return res, None |
|
pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) |
|
|
|
|
|
STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
|
BACKSLASH = { |
|
'"': u'"', '\\': u'\\', '/': u'/', |
|
'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', |
|
} |
|
|
|
DEFAULT_ENCODING = "utf-8" |
|
|
|
def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
|
if encoding is None: |
|
encoding = DEFAULT_ENCODING |
|
chunks = [] |
|
_append = chunks.append |
|
begin = end - 1 |
|
while 1: |
|
chunk = _m(s, end) |
|
if chunk is None: |
|
raise ValueError( |
|
errmsg("Unterminated string starting at", s, begin)) |
|
end = chunk.end() |
|
content, terminator = chunk.groups() |
|
if content: |
|
if not isinstance(content, unicode): |
|
content = unicode(content, encoding) |
|
_append(content) |
|
if terminator == '"': |
|
break |
|
elif terminator != '\\': |
|
if strict: |
|
raise ValueError(errmsg("Invalid control character %r at", s, end)) |
|
else: |
|
_append(terminator) |
|
continue |
|
try: |
|
esc = s[end] |
|
except IndexError: |
|
raise ValueError( |
|
errmsg("Unterminated string starting at", s, begin)) |
|
if esc != 'u': |
|
try: |
|
m = _b[esc] |
|
except KeyError: |
|
raise ValueError( |
|
errmsg("Invalid \\escape: %r" % (esc,), s, end)) |
|
end += 1 |
|
else: |
|
esc = s[end + 1:end + 5] |
|
next_end = end + 5 |
|
msg = "Invalid \\uXXXX escape" |
|
try: |
|
if len(esc) != 4: |
|
raise ValueError |
|
uni = int(esc, 16) |
|
if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
|
msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
|
if not s[end + 5:end + 7] == '\\u': |
|
raise ValueError |
|
esc2 = s[end + 7:end + 11] |
|
if len(esc2) != 4: |
|
raise ValueError |
|
uni2 = int(esc2, 16) |
|
uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
|
next_end += 6 |
|
m = unichr(uni) |
|
except ValueError: |
|
raise ValueError(errmsg(msg, s, end)) |
|
end = next_end |
|
_append(m) |
|
return u''.join(chunks), end |
|
|
|
|
|
# Use speedup |
|
try: |
|
scanstring = c_scanstring |
|
except NameError: |
|
scanstring = py_scanstring |
|
|
|
def JSONString(match, context): |
|
encoding = getattr(context, 'encoding', None) |
|
strict = getattr(context, 'strict', True) |
|
return scanstring(match.string, match.end(), encoding, strict) |
|
pattern(r'"')(JSONString) |
|
|
|
|
|
WHITESPACE = re.compile(r'\s*', FLAGS) |
|
|
|
def JSONObject(match, context, _w=WHITESPACE.match): |
|
pairs = {} |
|
s = match.string |
|
end = _w(s, match.end()).end() |
|
nextchar = s[end:end + 1] |
|
# Trivial empty object |
|
if nextchar == '}': |
|
return pairs, end + 1 |
|
if nextchar != '"': |
|
raise ValueError(errmsg("Expecting property name", s, end)) |
|
end += 1 |
|
encoding = getattr(context, 'encoding', None) |
|
strict = getattr(context, 'strict', True) |
|
iterscan = JSONScanner.iterscan |
|
while True: |
|
key, end = scanstring(s, end, encoding, strict) |
|
end = _w(s, end).end() |
|
if s[end:end + 1] != ':': |
|
raise ValueError(errmsg("Expecting : delimiter", s, end)) |
|
end = _w(s, end + 1).end() |
|
try: |
|
value, end = iterscan(s, idx=end, context=context).next() |
|
except StopIteration: |
|
raise ValueError(errmsg("Expecting object", s, end)) |
|
pairs[key] = value |
|
end = _w(s, end).end() |
|
nextchar = s[end:end + 1] |
|
end += 1 |
|
if nextchar == '}': |
|
break |
|
if nextchar != ',': |
|
raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
|
end = _w(s, end).end() |
|
nextchar = s[end:end + 1] |
|
end += 1 |
|
if nextchar != '"': |
|
raise ValueError(errmsg("Expecting property name", s, end - 1)) |
|
object_hook = getattr(context, 'object_hook', None) |
|
if object_hook is not None: |
|
pairs = object_hook(pairs) |
|
return pairs, end |
|
pattern(r'{')(JSONObject) |
|
|
|
|
|
def JSONArray(match, context, _w=WHITESPACE.match): |
|
values = [] |
|
s = match.string |
|
end = _w(s, match.end()).end() |
|
# Look-ahead for trivial empty array |
|
nextchar = s[end:end + 1] |
|
if nextchar == ']': |
|
return values, end + 1 |
|
iterscan = JSONScanner.iterscan |
|
while True: |
|
try: |
|
value, end = iterscan(s, idx=end, context=context).next() |
|
except StopIteration: |
|
raise ValueError(errmsg("Expecting object", s, end)) |
|
values.append(value) |
|
end = _w(s, end).end() |
|
nextchar = s[end:end + 1] |
|
end += 1 |
|
if nextchar == ']': |
|
break |
|
if nextchar != ',': |
|
raise ValueError(errmsg("Expecting , delimiter", s, end)) |
|
end = _w(s, end).end() |
|
return values, end |
|
pattern(r'\[')(JSONArray) |
|
|
|
|
|
ANYTHING = [ |
|
JSONObject, |
|
JSONArray, |
|
JSONString, |
|
JSONConstant, |
|
JSONNumber, |
|
] |
|
|
|
JSONScanner = Scanner(ANYTHING) |
|
|
|
|
|
class JSONDecoder(object): |
|
""" |
|
Simple JSON <http://json.org> decoder |
|
|
|
Performs the following translations in decoding by default: |
|
|
|
+---------------+-------------------+ |
|
| JSON | Python | |
|
+===============+===================+ |
|
| object | dict | |
|
+---------------+-------------------+ |
|
| array | list | |
|
+---------------+-------------------+ |
|
| string | unicode | |
|
+---------------+-------------------+ |
|
| number (int) | int, long | |
|
+---------------+-------------------+ |
|
| number (real) | float | |
|
+---------------+-------------------+ |
|
| true | True | |
|
+---------------+-------------------+ |
|
| false | False | |
|
+---------------+-------------------+ |
|
| null | None | |
|
+---------------+-------------------+ |
|
|
|
It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
|
their corresponding ``float`` values, which is outside the JSON spec. |
|
""" |
|
|
|
_scanner = Scanner(ANYTHING) |
|
__all__ = ['__init__', 'decode', 'raw_decode'] |
|
|
|
def __init__(self, encoding=None, object_hook=None, parse_float=None, |
|
parse_int=None, parse_constant=None, strict=True): |
|
""" |
|
``encoding`` determines the encoding used to interpret any ``str`` |
|
objects decoded by this instance (utf-8 by default). It has no |
|
effect when decoding ``unicode`` objects. |
|
|
|
Note that currently only encodings that are a superset of ASCII work, |
|
strings of other encodings should be passed in as ``unicode``. |
|
|
|
``object_hook``, if specified, will be called with the result |
|
of every JSON object decoded and its return value will be used in |
|
place of the given ``dict``. This can be used to provide custom |
|
deserializations (e.g. to support JSON-RPC class hinting). |
|
|
|
``parse_float``, if specified, will be called with the string |
|
of every JSON float to be decoded. By default this is equivalent to |
|
float(num_str). This can be used to use another datatype or parser |
|
for JSON floats (e.g. decimal.Decimal). |
|
|
|
``parse_int``, if specified, will be called with the string |
|
of every JSON int to be decoded. By default this is equivalent to |
|
int(num_str). This can be used to use another datatype or parser |
|
for JSON integers (e.g. float). |
|
|
|
``parse_constant``, if specified, will be called with one of the |
|
following strings: -Infinity, Infinity, NaN, null, true, false. |
|
This can be used to raise an exception if invalid JSON numbers |
|
are encountered. |
|
""" |
|
self.encoding = encoding |
|
self.object_hook = object_hook |
|
self.parse_float = parse_float |
|
self.parse_int = parse_int |
|
self.parse_constant = parse_constant |
|
self.strict = strict |
|
|
|
def decode(self, s, _w=WHITESPACE.match): |
|
""" |
|
Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
|
instance containing a JSON document) |
|
""" |
|
obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
|
end = _w(s, end).end() |
|
if end != len(s): |
|
raise ValueError(errmsg("Extra data", s, end, len(s))) |
|
return obj |
|
|
|
def raw_decode(self, s, **kw): |
|
""" |
|
Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
|
with a JSON document) and return a 2-tuple of the Python |
|
representation and the index in ``s`` where the document ended. |
|
|
|
This can be used to decode a JSON document from a string that may |
|
have extraneous data at the end. |
|
""" |
|
kw.setdefault('context', self) |
|
try: |
|
obj, end = self._scanner.iterscan(s, **kw).next() |
|
except StopIteration: |
|
raise ValueError("No JSON object could be decoded") |
|
return obj, end |
|
|
|
__all__ = ['JSONDecoder']
|
|
|