1 # Copyright (c) 2010, 2011, 2012 Nicira, Inc.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at:
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
20 from six.moves import range
22 __pychecker__ = 'no-stringiter'
24 escapes = {ord('"'): u"\\\"",
32 if esc not in escapes:
33 escapes[esc] = u"\\u%04x" % esc
38 class _Serializer(object):
39 def __init__(self, stream, pretty, sort_keys):
42 self.sort_keys = sort_keys
45 def __serialize_string(self, s):
46 self.stream.write(u'"%s"' % ''.join(escapes.get(ord(c), c) for c in s))
48 def __indent_line(self):
50 self.stream.write('\n')
51 self.stream.write(' ' * (SPACES_PER_LEVEL * self.depth))
53 def serialize(self, obj):
55 self.stream.write(u"null")
57 self.stream.write(u"false")
59 self.stream.write(u"true")
60 elif type(obj) in (int, long):
61 self.stream.write(u"%d" % obj)
62 elif type(obj) == float:
63 self.stream.write("%.15g" % obj)
64 elif type(obj) == unicode:
65 self.__serialize_string(obj)
66 elif type(obj) == str:
67 self.__serialize_string(unicode(obj))
68 elif type(obj) == dict:
69 self.stream.write(u"{")
75 items = sorted(obj.items())
77 items = six.iteritems(obj)
78 for i, (key, value) in enumerate(items):
80 self.stream.write(u",")
82 self.__serialize_string(unicode(key))
83 self.stream.write(u":")
85 self.stream.write(u' ')
88 self.stream.write(u"}")
90 elif type(obj) in (list, tuple):
91 self.stream.write(u"[")
97 for i, value in enumerate(obj):
99 self.stream.write(u",")
101 self.serialize(value)
104 self.stream.write(u"]")
106 raise Exception("can't serialize %s as JSON" % obj)
109 def to_stream(obj, stream, pretty=False, sort_keys=True):
110 _Serializer(stream, pretty, sort_keys).serialize(obj)
113 def to_file(obj, name, pretty=False, sort_keys=True):
114 stream = open(name, "w")
116 to_stream(obj, stream, pretty, sort_keys)
121 def to_string(obj, pretty=False, sort_keys=True):
122 output = StringIO.StringIO()
123 to_stream(obj, output, pretty, sort_keys)
124 s = output.getvalue()
129 def from_stream(stream):
130 p = Parser(check_trailer=True)
132 buf = stream.read(4096)
133 if buf == "" or p.feed(buf) != len(buf):
139 stream = open(name, "r")
141 return from_stream(stream)
148 s = unicode(s, 'utf-8')
149 except UnicodeDecodeError as e:
150 seq = ' '.join(["0x%2x" % ord(c)
151 for c in e.object[e.start:e.end] if ord(c) >= 0x80])
152 return ("not a valid UTF-8 string: invalid UTF-8 sequence %s" % seq)
153 p = Parser(check_trailer=True)
158 class Parser(object):
159 # Maximum height of parsing stack. #
162 def __init__(self, check_trailer=False):
163 self.check_trailer = check_trailer
166 self.lex_state = Parser.__lex_start
169 self.column_number = 0
173 self.parse_state = Parser.__parse_start
175 self.member_name = None
181 def __lex_start_space(self, c):
184 def __lex_start_alpha(self, c):
186 self.lex_state = Parser.__lex_keyword
188 def __lex_start_token(self, c):
189 self.__parser_input(c)
191 def __lex_start_number(self, c):
193 self.lex_state = Parser.__lex_number
195 def __lex_start_string(self, _):
196 self.lex_state = Parser.__lex_string
198 def __lex_start_error(self, c):
199 if ord(c) >= 32 and ord(c) < 128:
200 self.__error("invalid character '%s'" % c)
202 self.__error("invalid character U+%04x" % ord(c))
204 __lex_start_actions = {}
206 __lex_start_actions[c] = __lex_start_space
207 for c in "abcdefghijklmnopqrstuvwxyz":
208 __lex_start_actions[c] = __lex_start_alpha
210 __lex_start_actions[c] = __lex_start_token
211 for c in "-0123456789":
212 __lex_start_actions[c] = __lex_start_number
213 __lex_start_actions['"'] = __lex_start_string
215 def __lex_start(self, c):
216 Parser.__lex_start_actions.get(
217 c, Parser.__lex_start_error)(self, c)
221 for c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
222 __lex_alpha[c] = True
224 def __lex_finish_keyword(self):
225 if self.buffer == "false":
226 self.__parser_input(False)
227 elif self.buffer == "true":
228 self.__parser_input(True)
229 elif self.buffer == "null":
230 self.__parser_input(None)
232 self.__error("invalid keyword '%s'" % self.buffer)
234 def __lex_keyword(self, c):
235 if c in Parser.__lex_alpha:
239 self.__lex_finish_keyword()
242 __number_re = re.compile("(-)?(0|[1-9][0-9]*)"
243 "(?:\.([0-9]+))?(?:[eE]([-+]?[0-9]+))?$")
245 def __lex_finish_number(self):
247 m = Parser.__number_re.match(s)
249 sign, integer, fraction, exp = m.groups()
250 if (exp is not None and
251 (long(exp) > sys.maxint or long(exp) < -sys.maxint - 1)):
252 self.__error("exponent outside valid range")
255 if fraction is not None and len(fraction.lstrip('0')) == 0:
259 if fraction is not None:
260 sig_string += fraction
261 significand = int(sig_string)
264 if fraction is not None:
265 pow10 -= len(fraction)
270 self.__parser_input(0)
272 elif significand <= 2 ** 63:
273 while pow10 > 0 and significand <= 2 ** 63:
276 while pow10 < 0 and significand % 10 == 0:
280 ((not sign and significand < 2 ** 63) or
281 (sign and significand <= 2 ** 63))):
283 self.__parser_input(-significand)
285 self.__parser_input(significand)
289 if value == float("inf") or value == float("-inf"):
290 self.__error("number outside valid range")
293 # Suppress negative zero.
295 self.__parser_input(value)
296 elif re.match("-?0[0-9]", s):
297 self.__error("leading zeros not allowed")
298 elif re.match("-([^0-9]|$)", s):
299 self.__error("'-' must be followed by digit")
300 elif re.match("-?(0|[1-9][0-9]*)\.([^0-9]|$)", s):
301 self.__error("decimal point must be followed by digit")
302 elif re.search("e[-+]?([^0-9]|$)", s):
303 self.__error("exponent must contain at least one digit")
305 self.__error("syntax error in number")
307 def __lex_number(self, c):
308 if c in ".0123456789eE-+":
312 self.__lex_finish_number()
315 __4hex_re = re.compile("[0-9a-fA-F]{4}")
317 def __lex_4hex(self, s):
319 self.__error("quoted string ends within \\u escape")
320 elif not Parser.__4hex_re.match(s):
321 self.__error("malformed \\u escape")
323 self.__error("null bytes not supported in quoted strings")
328 def __is_leading_surrogate(c):
329 """Returns true if 'c' is a Unicode code point for a leading
331 return c >= 0xd800 and c <= 0xdbff
334 def __is_trailing_surrogate(c):
335 """Returns true if 'c' is a Unicode code point for a trailing
337 return c >= 0xdc00 and c <= 0xdfff
340 def __utf16_decode_surrogate_pair(leading, trailing):
341 """Returns the unicode code point corresponding to leading surrogate
342 'leading' and trailing surrogate 'trailing'. The return value will not
343 make any sense if 'leading' or 'trailing' are not in the correct ranges
344 for leading or trailing surrogates."""
345 # Leading surrogate: 110110wwwwxxxxxx
346 # Trailing surrogate: 110111xxxxxxxxxx
347 # Code point: 000uuuuuxxxxxxxxxxxxxxxx
348 w = (leading >> 6) & 0xf
351 x1 = trailing & 0x3ff
352 return (u << 16) | (x0 << 10) | x1
353 __unescape = {'"': u'"',
362 def __lex_finish_string(self):
366 backslash = inp.find('\\')
370 out += inp[:backslash]
371 inp = inp[backslash + 1:]
373 self.__error("quoted string may not end with backslash")
376 replacement = Parser.__unescape.get(inp[0])
377 if replacement is not None:
382 self.__error("bad escape \\%s" % inp[0])
385 c0 = self.__lex_4hex(inp[1:5])
390 if Parser.__is_leading_surrogate(c0):
391 if inp[:2] != u'\\u':
392 self.__error("malformed escaped surrogate pair")
394 c1 = self.__lex_4hex(inp[2:6])
397 if not Parser.__is_trailing_surrogate(c1):
398 self.__error("second half of escaped surrogate pair is "
399 "not trailing surrogate")
401 code_point = Parser.__utf16_decode_surrogate_pair(c0, c1)
405 out += unichr(code_point)
406 self.__parser_input('string', out)
408 def __lex_string_escape(self, c):
410 self.lex_state = Parser.__lex_string
413 def __lex_string(self, c):
416 self.lex_state = Parser.__lex_string_escape
418 self.__lex_finish_string()
422 self.__error("U+%04X must be escaped in quoted string" % ord(c))
425 def __lex_input(self, c):
426 eat = self.lex_state(self, c)
427 assert eat is True or eat is False
430 def __parse_start(self, token, unused_string):
436 self.__error("syntax error at beginning of input")
438 def __parse_end(self, unused_token, unused_string):
439 self.__error("trailing garbage at end of input")
441 def __parse_object_init(self, token, string):
445 self.__parse_object_name(token, string)
447 def __parse_object_name(self, token, string):
448 if token == 'string':
449 self.member_name = string
450 self.parse_state = Parser.__parse_object_colon
452 self.__error("syntax error parsing object expecting string")
454 def __parse_object_colon(self, token, unused_string):
456 self.parse_state = Parser.__parse_object_value
458 self.__error("syntax error parsing object expecting ':'")
460 def __parse_object_value(self, token, string):
461 self.__parse_value(token, string, Parser.__parse_object_next)
463 def __parse_object_next(self, token, unused_string):
465 self.parse_state = Parser.__parse_object_name
469 self.__error("syntax error expecting '}' or ','")
471 def __parse_array_init(self, token, string):
475 self.__parse_array_value(token, string)
477 def __parse_array_value(self, token, string):
478 self.__parse_value(token, string, Parser.__parse_array_next)
480 def __parse_array_next(self, token, unused_string):
482 self.parse_state = Parser.__parse_array_value
486 self.__error("syntax error expecting ']' or ','")
488 def __parser_input(self, token, string=None):
489 self.lex_state = Parser.__lex_start
491 self.parse_state(self, token, string)
493 def __put_value(self, value):
495 if type(top) == dict:
496 top[self.member_name] = value
500 def __parser_push(self, new_json, next_state):
501 if len(self.stack) < Parser.MAX_HEIGHT:
502 if len(self.stack) > 0:
503 self.__put_value(new_json)
504 self.stack.append(new_json)
505 self.parse_state = next_state
507 self.__error("input exceeds maximum nesting depth %d" %
510 def __push_object(self):
511 self.__parser_push({}, Parser.__parse_object_init)
513 def __push_array(self):
514 self.__parser_push([], Parser.__parse_array_init)
516 def __parser_pop(self):
517 if len(self.stack) == 1:
518 self.parse_state = Parser.__parse_end
519 if not self.check_trailer:
524 if type(top) == list:
525 self.parse_state = Parser.__parse_array_next
527 self.parse_state = Parser.__parse_object_next
529 def __parse_value(self, token, string, next_state):
530 if token in [False, None, True] or type(token) in [int, long, float]:
531 self.__put_value(token)
532 elif token == 'string':
533 self.__put_value(string)
540 self.__error("syntax error expecting value")
542 self.parse_state = next_state
544 def __error(self, message):
545 if self.error is None:
546 self.error = ("line %d, column %d, byte %d: %s"
547 % (self.line_number, self.column_number,
548 self.byte_number, message))
554 if self.done or i >= len(s):
558 if self.__lex_input(c):
559 self.byte_number += 1
561 self.column_number = 0
562 self.line_number += 1
564 self.column_number += 1
572 if self.lex_state == Parser.__lex_start:
574 elif self.lex_state in (Parser.__lex_string,
575 Parser.__lex_string_escape):
576 self.__error("unexpected end of input in quoted string")
578 self.__lex_input(" ")
580 if self.parse_state == Parser.__parse_start:
581 self.__error("empty input stream")
582 elif self.parse_state != Parser.__parse_end:
583 self.__error("unexpected end of input")
585 if self.error is None:
586 assert len(self.stack) == 1
587 return self.stack.pop()