1 """
2 Implementation of JSONDecoder
3 """
4 import re
5 import sys
6
7 from scanner import Scanner, pattern
8 try:
9 from simplejson import _speedups
10 except:
11 _speedups = None
12
13 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
16 import struct
17 import sys
18 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
19 if sys.byteorder != 'big':
20 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
21 nan, inf = struct.unpack('dd', _BYTES)
22 return nan, inf, -inf
23
24 NaN, PosInf, NegInf = _floatconstants()
25
27 lineno = doc.count('\n', 0, pos) + 1
28 if lineno == 1:
29 colno = pos
30 else:
31 colno = pos - doc.rindex('\n', 0, pos)
32 return lineno, colno
33
34 -def errmsg(msg, doc, pos, end=None):
35 lineno, colno = linecol(doc, pos)
36 if end is None:
37 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
38 endlineno, endcolno = linecol(doc, end)
39 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
40 msg, lineno, colno, endlineno, endcolno, pos, end)
41
42 _CONSTANTS = {
43 '-Infinity': NegInf,
44 'Infinity': PosInf,
45 'NaN': NaN,
46 'true': True,
47 'false': False,
48 'null': None,
49 }
50
52 s = match.group(0)
53 fn = getattr(context, 'parse_constant', None)
54 if fn is None:
55 rval = c[s]
56 else:
57 rval = fn(s)
58 return rval, None
59 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant)
60
62 match = JSONNumber.regex.match(match.string, *match.span())
63 integer, frac, exp = match.groups()
64 if frac or exp:
65 fn = getattr(context, 'parse_float', None) or float
66 res = fn(integer + (frac or '') + (exp or ''))
67 else:
68 fn = getattr(context, 'parse_int', None) or int
69 res = fn(integer)
70 return res, None
71 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber)
72
73 STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS)
74 BACKSLASH = {
75 '"': u'"', '\\': u'\\', '/': u'/',
76 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
77 }
78
79 DEFAULT_ENCODING = "utf-8"
80
82 if encoding is None:
83 encoding = DEFAULT_ENCODING
84 chunks = []
85 _append = chunks.append
86 begin = end - 1
87 while 1:
88 chunk = _m(s, end)
89 if chunk is None:
90 raise ValueError(
91 errmsg("Unterminated string starting at", s, begin))
92 end = chunk.end()
93 content, terminator = chunk.groups()
94 if content:
95 if not isinstance(content, unicode):
96 content = unicode(content, encoding)
97 _append(content)
98 if terminator == '"':
99 break
100 try:
101 esc = s[end]
102 except IndexError:
103 raise ValueError(
104 errmsg("Unterminated string starting at", s, begin))
105 if esc != 'u':
106 try:
107 m = _b[esc]
108 except KeyError:
109 raise ValueError(
110 errmsg("Invalid \\escape: %r" % (esc,), s, end))
111 end += 1
112 else:
113 esc = s[end + 1:end + 5]
114 next_end = end + 5
115 msg = "Invalid \\uXXXX escape"
116 try:
117 if len(esc) != 4:
118 raise ValueError
119 uni = int(esc, 16)
120 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
121 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
122 if not s[end + 5:end + 7] == '\\u':
123 raise ValueError
124 esc2 = s[end + 7:end + 11]
125 if len(esc2) != 4:
126 raise ValueError
127 uni2 = int(esc2, 16)
128 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
129 next_end += 6
130 m = unichr(uni)
131 except ValueError:
132 raise ValueError(errmsg(msg, s, end))
133 end = next_end
134 _append(m)
135 return u''.join(chunks), end
136
137
138
139
140
142 encoding = getattr(context, 'encoding', None)
143 return scanstring(match.string, match.end(), encoding)
144 pattern(r'"')(JSONString)
145
146 WHITESPACE = re.compile(r'\s*', FLAGS)
147
149 pairs = {}
150 s = match.string
151 end = _w(s, match.end()).end()
152 nextchar = s[end:end + 1]
153
154 if nextchar == '}':
155 return pairs, end + 1
156 if nextchar != '"':
157 raise ValueError(errmsg("Expecting property name", s, end))
158 end += 1
159 encoding = getattr(context, 'encoding', None)
160 iterscan = JSONScanner.iterscan
161 while True:
162 key, end = scanstring(s, end, encoding)
163 end = _w(s, end).end()
164 if s[end:end + 1] != ':':
165 raise ValueError(errmsg("Expecting : delimiter", s, end))
166 end = _w(s, end + 1).end()
167 try:
168 value, end = iterscan(s, idx=end, context=context).next()
169 except StopIteration:
170 raise ValueError(errmsg("Expecting object", s, end))
171 pairs[key] = value
172 end = _w(s, end).end()
173 nextchar = s[end:end + 1]
174 end += 1
175 if nextchar == '}':
176 break
177 if nextchar != ',':
178 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
179 end = _w(s, end).end()
180 nextchar = s[end:end + 1]
181 end += 1
182 if nextchar != '"':
183 raise ValueError(errmsg("Expecting property name", s, end - 1))
184 object_hook = getattr(context, 'object_hook', None)
185 if object_hook is not None:
186 pairs = object_hook(pairs)
187 return pairs, end
188 pattern(r'{')(JSONObject)
189
191 values = []
192 s = match.string
193 end = _w(s, match.end()).end()
194
195 nextchar = s[end:end + 1]
196 if nextchar == ']':
197 return values, end + 1
198 iterscan = JSONScanner.iterscan
199 while True:
200 try:
201 value, end = iterscan(s, idx=end, context=context).next()
202 except StopIteration:
203 raise ValueError(errmsg("Expecting object", s, end))
204 values.append(value)
205 end = _w(s, end).end()
206 nextchar = s[end:end + 1]
207 end += 1
208 if nextchar == ']':
209 break
210 if nextchar != ',':
211 raise ValueError(errmsg("Expecting , delimiter", s, end))
212 end = _w(s, end).end()
213 return values, end
214 pattern(r'\[')(JSONArray)
215
216 ANYTHING = [
217 JSONObject,
218 JSONArray,
219 JSONString,
220 JSONConstant,
221 JSONNumber,
222 ]
223
224 JSONScanner = Scanner(ANYTHING)
225
227 """
228 Simple JSON <http://json.org> decoder
229
230 Performs the following translations in decoding by default:
231
232 +---------------+-------------------+
233 | JSON | Python |
234 +===============+===================+
235 | object | dict |
236 +---------------+-------------------+
237 | array | list |
238 +---------------+-------------------+
239 | string | unicode |
240 +---------------+-------------------+
241 | number (int) | int, long |
242 +---------------+-------------------+
243 | number (real) | float |
244 +---------------+-------------------+
245 | true | True |
246 +---------------+-------------------+
247 | false | False |
248 +---------------+-------------------+
249 | null | None |
250 +---------------+-------------------+
251
252 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
253 their corresponding ``float`` values, which is outside the JSON spec.
254 """
255
256 _scanner = Scanner(ANYTHING)
257 __all__ = ['__init__', 'decode', 'raw_decode']
258
259 - def __init__(self, encoding=None, object_hook=None, parse_float=None,
260 parse_int=None, parse_constant=None):
261 """
262 ``encoding`` determines the encoding used to interpret any ``str``
263 objects decoded by this instance (utf-8 by default). It has no
264 effect when decoding ``unicode`` objects.
265
266 Note that currently only encodings that are a superset of ASCII work,
267 strings of other encodings should be passed in as ``unicode``.
268
269 ``object_hook``, if specified, will be called with the result
270 of every JSON object decoded and its return value will be used in
271 place of the given ``dict``. This can be used to provide custom
272 deserializations (e.g. to support JSON-RPC class hinting).
273
274 ``parse_float``, if specified, will be called with the string
275 of every JSON float to be decoded. By default this is equivalent to
276 float(num_str). This can be used to use another datatype or parser
277 for JSON floats (e.g. decimal.Decimal).
278
279 ``parse_int``, if specified, will be called with the string
280 of every JSON int to be decoded. By default this is equivalent to
281 int(num_str). This can be used to use another datatype or parser
282 for JSON integers (e.g. float).
283
284 ``parse_constant``, if specified, will be called with one of the
285 following strings: -Infinity, Infinity, NaN, null, true, false.
286 This can be used to raise an exception if invalid JSON numbers
287 are encountered.
288 """
289 self.encoding = encoding
290 self.object_hook = object_hook
291 self.parse_float = parse_float
292 self.parse_int = parse_int
293 self.parse_constant = parse_constant
294
296 """
297 Return the Python representation of ``s`` (a ``str`` or ``unicode``
298 instance containing a JSON document)
299 """
300 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
301 end = _w(s, end).end()
302 if end != len(s):
303 raise ValueError(errmsg("Extra data", s, end, len(s)))
304 return obj
305
307 """
308 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
309 with a JSON document) and return a 2-tuple of the Python
310 representation and the index in ``s`` where the document ended.
311
312 This can be used to decode a JSON document from a string that may
313 have extraneous data at the end.
314 """
315 kw.setdefault('context', self)
316 try:
317 obj, end = self._scanner.iterscan(s, **kw).next()
318 except StopIteration:
319 raise ValueError("No JSON object could be decoded")
320 return obj, end
321
322 __all__ = ['JSONDecoder']
323