blob: f7eb0a61bf3431ae6309d81701200d02422d4358 [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// JSON (JavaScript Object Notation) parser.
// See http://www.json.org/
// The json package implements a simple parser and
// representation for JSON (JavaScript Object Notation),
// as defined at http://www.json.org/.
package json
import (
"bytes"
"strconv"
"utf8"
)
// Strings
//
// Double quoted with escapes: \" \\ \/ \b \f \n \r \t \uXXXX.
// No literal control characters, supposedly.
// Have also seen \' and embedded newlines.
func _UnHex(p string, r, l int) (v int, ok bool) {
v = 0
for i := r; i < l; i++ {
if i >= len(p) {
return 0, false
}
v *= 16
switch {
case '0' <= p[i] && p[i] <= '9':
v += int(p[i] - '0')
case 'a' <= p[i] && p[i] <= 'f':
v += int(p[i] - 'a' + 10)
case 'A' <= p[i] && p[i] <= 'F':
v += int(p[i] - 'A' + 10)
default:
return 0, false
}
}
return v, true
}
func _ToHex(b []byte, rune int) {
const hexDigits = "0123456789abcdef"
b[0] = hexDigits[rune>>12&0xf]
b[1] = hexDigits[rune>>8&0xf]
b[2] = hexDigits[rune>>4&0xf]
b[3] = hexDigits[rune&0xf]
}
// Unquote unquotes the JSON-quoted string s,
// returning a raw string t. If s is not a valid
// JSON-quoted string, Unquote returns with ok set to false.
func Unquote(s string) (t string, ok bool) {
if len(s) < 2 || s[0] != '"' || s[len(s)-1] != '"' {
return
}
b := make([]byte, len(s))
w := 0
for r := 1; r < len(s)-1; {
switch {
case s[r] == '\\':
r++
if r >= len(s)-1 {
return
}
switch s[r] {
default:
return
case '"', '\\', '/', '\'':
b[w] = s[r]
r++
w++
case 'b':
b[w] = '\b'
r++
w++
case 'f':
b[w] = '\f'
r++
w++
case 'n':
b[w] = '\n'
r++
w++
case 'r':
b[w] = '\r'
r++
w++
case 't':
b[w] = '\t'
r++
w++
case 'u':
r++
rune, ok := _UnHex(s, r, r+4)
if !ok {
return
}
r += 4
w += utf8.EncodeRune(rune, b[w:])
}
// Control characters are invalid, but we've seen raw \n.
case s[r] < ' ' && s[r] != '\n':
if s[r] == '\n' {
b[w] = '\n'
r++
w++
break
}
return
// ASCII
case s[r] < utf8.RuneSelf:
b[w] = s[r]
r++
w++
// Coerce to well-formed UTF-8.
default:
rune, size := utf8.DecodeRuneInString(s[r:])
r += size
w += utf8.EncodeRune(rune, b[w:])
}
}
return string(b[0:w]), true
}
// Quote quotes the raw string s using JSON syntax,
// so that Unquote(Quote(s)) = s, true.
func Quote(s string) string {
chr := make([]byte, 6)
chr0 := chr[0:1]
b := new(bytes.Buffer)
chr[0] = '"'
b.Write(chr0)
for _, rune := range s {
switch {
case rune == '"' || rune == '\\':
chr[0] = '\\'
chr[1] = byte(rune)
b.Write(chr[0:2])
case rune == '\b':
chr[0] = '\\'
chr[1] = 'b'
b.Write(chr[0:2])
case rune == '\f':
chr[0] = '\\'
chr[1] = 'f'
b.Write(chr[0:2])
case rune == '\n':
chr[0] = '\\'
chr[1] = 'n'
b.Write(chr[0:2])
case rune == '\r':
chr[0] = '\\'
chr[1] = 'r'
b.Write(chr[0:2])
case rune == '\t':
chr[0] = '\\'
chr[1] = 't'
b.Write(chr[0:2])
case 0x20 <= rune && rune < utf8.RuneSelf:
chr[0] = byte(rune)
b.Write(chr0)
default:
chr[0] = '\\'
chr[1] = 'u'
_ToHex(chr[2:6], rune)
b.Write(chr)
}
}
chr[0] = '"'
b.Write(chr0)
return b.String()
}
// _Lexer
type _Lexer struct {
s string
i int
kind int
token string
}
func punct(c byte) bool {
return c == '"' || c == '[' || c == ']' || c == ':' || c == '{' || c == '}' || c == ','
}
func white(c byte) bool { return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\v' }
func skipwhite(p string, i int) int {
for i < len(p) && white(p[i]) {
i++
}
return i
}
func skiptoken(p string, i int) int {
for i < len(p) && !punct(p[i]) && !white(p[i]) {
i++
}
return i
}
func skipstring(p string, i int) int {
for i++; i < len(p) && p[i] != '"'; i++ {
if p[i] == '\\' {
i++
}
}
if i >= len(p) {
return i
}
return i + 1
}
func (t *_Lexer) Next() {
i, s := t.i, t.s
i = skipwhite(s, i)
if i >= len(s) {
t.kind = 0
t.token = ""
t.i = len(s)
return
}
c := s[i]
switch {
case c == '-' || '0' <= c && c <= '9':
j := skiptoken(s, i)
t.kind = '1'
t.token = s[i:j]
i = j
case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z':
j := skiptoken(s, i)
t.kind = 'a'
t.token = s[i:j]
i = j
case c == '"':
j := skipstring(s, i)
t.kind = '"'
t.token = s[i:j]
i = j
case c == '[', c == ']', c == ':', c == '{', c == '}', c == ',':
t.kind = int(c)
t.token = s[i : i+1]
i++
default:
t.kind = '?'
t.token = s[i : i+1]
}
t.i = i
}
// Parser
//
// Implements parsing but not the actions. Those are
// carried out by the implementation of the Builder interface.
// A Builder represents the object being created.
// Calling a method like Int64(i) sets that object to i.
// Calling a method like Elem(i) or Key(s) creates a
// new builder for a subpiece of the object (logically,
// an array element or a map key).
//
// There are two Builders, in other files.
// The JsonBuilder builds a generic Json structure
// in which maps are maps.
// The StructBuilder copies data into a possibly
// nested data structure, using the "map keys"
// as struct field names.
type _Value interface{}
// BUG(rsc): The json Builder interface needs to be
// reconciled with the xml Builder interface.
// A Builder is an interface implemented by clients and passed
// to the JSON parser. It gives clients full control over the
// eventual representation returned by the parser.
type Builder interface {
// Set value
Int64(i int64)
Uint64(i uint64)
Float64(f float64)
String(s string)
Bool(b bool)
Null()
Array()
Map()
// Create sub-Builders
Elem(i int) Builder
Key(s string) Builder
// Flush changes to parent Builder if necessary.
Flush()
}
func parse(lex *_Lexer, build Builder) bool {
ok := false
Switch:
switch lex.kind {
case 0:
break
case '1':
// If the number is exactly an integer, use that.
if i, err := strconv.Atoi64(lex.token); err == nil {
build.Int64(i)
ok = true
} else if i, err := strconv.Atoui64(lex.token); err == nil {
build.Uint64(i)
ok = true
} else
// Fall back to floating point.
if f, err := strconv.Atof64(lex.token); err == nil {
build.Float64(f)
ok = true
}
case 'a':
switch lex.token {
case "true":
build.Bool(true)
ok = true
case "false":
build.Bool(false)
ok = true
case "null":
build.Null()
ok = true
}
case '"':
if str, ok1 := Unquote(lex.token); ok1 {
build.String(str)
ok = true
}
case '[':
// array
build.Array()
lex.Next()
n := 0
for lex.kind != ']' {
if n > 0 {
if lex.kind != ',' {
break Switch
}
lex.Next()
}
if !parse(lex, build.Elem(n)) {
break Switch
}
n++
}
ok = true
case '{':
// map
lex.Next()
build.Map()
n := 0
for lex.kind != '}' {
if n > 0 {
if lex.kind != ',' {
break Switch
}
lex.Next()
}
if lex.kind != '"' {
break Switch
}
key, ok := Unquote(lex.token)
if !ok {
break Switch
}
lex.Next()
if lex.kind != ':' {
break Switch
}
lex.Next()
if !parse(lex, build.Key(key)) {
break Switch
}
n++
}
ok = true
}
if ok {
lex.Next()
}
build.Flush()
return ok
}
// Parse parses the JSON syntax string s and makes calls to
// the builder to construct a parsed representation.
// On success, it returns with ok set to true.
// On error, it returns with ok set to false, errindx set
// to the byte index in s where a syntax error occurred,
// and errtok set to the offending token.
func Parse(s string, builder Builder) (ok bool, errindx int, errtok string) {
lex := new(_Lexer)
lex.s = s
lex.Next()
if parse(lex, builder) {
if lex.kind == 0 { // EOF
return true, 0, ""
}
}
return false, lex.i, lex.token
}