| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package json |
| |
| import ( |
| "bytes" |
| "fmt" |
| "io" |
| "regexp" |
| "strconv" |
| "unicode/utf8" |
| |
| "google.golang.org/protobuf/internal/errors" |
| ) |
| |
| // call specifies which Decoder method was invoked. |
| type call uint8 |
| |
| const ( |
| readCall call = iota |
| peekCall |
| ) |
| |
| // Decoder is a token-based JSON decoder. |
| type Decoder struct { |
| // lastCall is last method called, either readCall or peekCall. |
| // Initial value is readCall. |
| lastCall call |
| |
| // value contains the last read value. |
| value Value |
| |
| // err contains the last read error. |
| err error |
| |
| // startStack is a stack containing StartObject and StartArray types. The |
| // top of stack represents the object or the array the current value is |
| // directly located in. |
| startStack []Type |
| |
| // orig is used in reporting line and column. |
| orig []byte |
| // in contains the unconsumed input. |
| in []byte |
| } |
| |
| // NewDecoder returns a Decoder to read the given []byte. |
| func NewDecoder(b []byte) *Decoder { |
| return &Decoder{orig: b, in: b} |
| } |
| |
| // Peek looks ahead and returns the next JSON type without advancing a read. |
| func (d *Decoder) Peek() Type { |
| defer func() { d.lastCall = peekCall }() |
| if d.lastCall == readCall { |
| d.value, d.err = d.Read() |
| } |
| return d.value.typ |
| } |
| |
| // Read returns the next JSON value. It will return an error if there is no |
| // valid value. For String types containing invalid UTF8 characters, a non-fatal |
| // error is returned and caller can call Read for the next value. |
| func (d *Decoder) Read() (Value, error) { |
| defer func() { d.lastCall = readCall }() |
| if d.lastCall == peekCall { |
| return d.value, d.err |
| } |
| |
| value, err := d.parseNext() |
| if err != nil { |
| return Value{}, err |
| } |
| n := value.size |
| |
| switch value.typ { |
| case EOF: |
| if len(d.startStack) != 0 || |
| d.value.typ&Null|Bool|Number|String|EndObject|EndArray == 0 { |
| return Value{}, io.ErrUnexpectedEOF |
| } |
| |
| case Null: |
| if !d.isValueNext() { |
| return Value{}, d.newSyntaxError("unexpected value null") |
| } |
| |
| case Bool, Number: |
| if !d.isValueNext() { |
| return Value{}, d.newSyntaxError("unexpected value %v", value.Raw()) |
| } |
| |
| case String: |
| if d.isValueNext() { |
| break |
| } |
| // Check if this is for an object name. |
| if d.value.typ&(StartObject|comma) == 0 { |
| return Value{}, d.newSyntaxError("unexpected value %v", value.Raw()) |
| } |
| d.in = d.in[n:] |
| d.consume(0) |
| if len(d.in) == 0 { |
| return Value{}, d.newSyntaxError(`unexpected EOF, missing ":" after object name`) |
| } |
| if c := d.in[0]; c != ':' { |
| return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c)) |
| } |
| n = 1 |
| value.typ = Name |
| |
| case StartObject, StartArray: |
| if !d.isValueNext() { |
| return Value{}, d.newSyntaxError("unexpected character %v", value.Raw()) |
| } |
| d.startStack = append(d.startStack, value.typ) |
| |
| case EndObject: |
| if len(d.startStack) == 0 || |
| d.value.typ == comma || |
| d.startStack[len(d.startStack)-1] != StartObject { |
| return Value{}, d.newSyntaxError("unexpected character }") |
| } |
| d.startStack = d.startStack[:len(d.startStack)-1] |
| |
| case EndArray: |
| if len(d.startStack) == 0 || |
| d.value.typ == comma || |
| d.startStack[len(d.startStack)-1] != StartArray { |
| return Value{}, d.newSyntaxError("unexpected character ]") |
| } |
| d.startStack = d.startStack[:len(d.startStack)-1] |
| |
| case comma: |
| if len(d.startStack) == 0 || |
| d.value.typ&(Null|Bool|Number|String|EndObject|EndArray) == 0 { |
| return Value{}, d.newSyntaxError("unexpected character ,") |
| } |
| } |
| |
| // Update d.value only after validating value to be in the right sequence. |
| d.value = value |
| d.in = d.in[n:] |
| |
| if d.value.typ == comma { |
| return d.Read() |
| } |
| return value, nil |
| } |
| |
| // Any sequence that looks like a non-delimiter (for error reporting). |
| var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`) |
| |
| // parseNext parses for the next JSON value. It returns a Value object for |
| // different types, except for Name. It does not handle whether the next value |
| // is in a valid sequence or not. |
| func (d *Decoder) parseNext() (value Value, err error) { |
| // Trim leading spaces. |
| d.consume(0) |
| |
| in := d.in |
| if len(in) == 0 { |
| return d.newValue(EOF, nil, 0), nil |
| } |
| |
| switch in[0] { |
| case 'n': |
| n := matchWithDelim("null", in) |
| if n == 0 { |
| return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in)) |
| } |
| return d.newValue(Null, in, n), nil |
| |
| case 't': |
| n := matchWithDelim("true", in) |
| if n == 0 { |
| return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in)) |
| } |
| return d.newBoolValue(in, n, true), nil |
| |
| case 'f': |
| n := matchWithDelim("false", in) |
| if n == 0 { |
| return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in)) |
| } |
| return d.newBoolValue(in, n, false), nil |
| |
| case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': |
| n, ok := consumeNumber(in) |
| if !ok { |
| return Value{}, d.newSyntaxError("invalid number %s", errRegexp.Find(in)) |
| } |
| return d.newValue(Number, in, n), nil |
| |
| case '"': |
| s, n, err := d.parseString(in) |
| if err != nil { |
| return Value{}, err |
| } |
| return d.newStringValue(in, n, s), nil |
| |
| case '{': |
| return d.newValue(StartObject, in, 1), nil |
| |
| case '}': |
| return d.newValue(EndObject, in, 1), nil |
| |
| case '[': |
| return d.newValue(StartArray, in, 1), nil |
| |
| case ']': |
| return d.newValue(EndArray, in, 1), nil |
| |
| case ',': |
| return d.newValue(comma, in, 1), nil |
| } |
| return Value{}, d.newSyntaxError("invalid value %s", errRegexp.Find(in)) |
| } |
| |
| // position returns line and column number of index in given orig slice. |
| func position(orig []byte, idx int) (int, int) { |
| b := orig[:idx] |
| line := bytes.Count(b, []byte("\n")) + 1 |
| if i := bytes.LastIndexByte(b, '\n'); i >= 0 { |
| b = b[i+1:] |
| } |
| column := utf8.RuneCount(b) + 1 // ignore multi-rune characters |
| return line, column |
| } |
| |
| // newSyntaxError returns an error with line and column information useful for |
| // syntax errors. |
| func (d *Decoder) newSyntaxError(f string, x ...interface{}) error { |
| e := errors.New(f, x...) |
| line, column := position(d.orig, len(d.orig)-len(d.in)) |
| return errors.New("syntax error (line %d:%d): %v", line, column, e) |
| } |
| |
| // matchWithDelim matches s with the input b and verifies that the match |
| // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]"). |
| // As a special case, EOF is considered a delimiter. It returns the length of s |
| // if there is a match, else 0. |
| func matchWithDelim(s string, b []byte) int { |
| if !bytes.HasPrefix(b, []byte(s)) { |
| return 0 |
| } |
| |
| n := len(s) |
| if n < len(b) && isNotDelim(b[n]) { |
| return 0 |
| } |
| return n |
| } |
| |
| // isNotDelim returns true if given byte is a not delimiter character. |
| func isNotDelim(c byte) bool { |
| return (c == '-' || c == '+' || c == '.' || c == '_' || |
| ('a' <= c && c <= 'z') || |
| ('A' <= c && c <= 'Z') || |
| ('0' <= c && c <= '9')) |
| } |
| |
| // consume consumes n bytes of input and any subsequent whitespace. |
| func (d *Decoder) consume(n int) { |
| d.in = d.in[n:] |
| for len(d.in) > 0 { |
| switch d.in[0] { |
| case ' ', '\n', '\r', '\t': |
| d.in = d.in[1:] |
| default: |
| return |
| } |
| } |
| } |
| |
| // isValueNext returns true if next type should be a JSON value: Null, |
| // Number, String or Bool. |
| func (d *Decoder) isValueNext() bool { |
| if len(d.startStack) == 0 { |
| return d.value.typ == 0 |
| } |
| |
| start := d.startStack[len(d.startStack)-1] |
| switch start { |
| case StartObject: |
| return d.value.typ&Name != 0 |
| case StartArray: |
| return d.value.typ&(StartArray|comma) != 0 |
| } |
| panic(fmt.Sprintf( |
| "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v", |
| d.value.typ, start)) |
| } |
| |
| // newValue constructs a Value for given Type. |
| func (d *Decoder) newValue(typ Type, input []byte, size int) Value { |
| return Value{ |
| typ: typ, |
| input: d.orig, |
| start: len(d.orig) - len(input), |
| size: size, |
| } |
| } |
| |
| // newBoolValue constructs a Value for a JSON boolean. |
| func (d *Decoder) newBoolValue(input []byte, size int, b bool) Value { |
| return Value{ |
| typ: Bool, |
| input: d.orig, |
| start: len(d.orig) - len(input), |
| size: size, |
| boo: b, |
| } |
| } |
| |
| // newStringValue constructs a Value for a JSON string. |
| func (d *Decoder) newStringValue(input []byte, size int, s string) Value { |
| return Value{ |
| typ: String, |
| input: d.orig, |
| start: len(d.orig) - len(input), |
| size: size, |
| str: s, |
| } |
| } |
| |
| // Clone returns a copy of the Decoder for use in reading ahead the next JSON |
| // object, array or other values without affecting current Decoder. |
| func (d *Decoder) Clone() *Decoder { |
| ret := *d |
| ret.startStack = append([]Type(nil), ret.startStack...) |
| return &ret |
| } |
| |
| // Value provides a parsed JSON type and value. |
| // |
| // The original input slice is stored in this struct in order to compute for |
| // position as needed. The raw JSON value is derived from the original input |
| // slice given start and size. |
| // |
| // For JSON boolean and string, it holds the converted value in boo and str |
| // fields respectively. For JSON number, the raw JSON value holds a valid number |
| // which is converted only in Int or Float. Other JSON types do not require any |
| // additional data. |
| type Value struct { |
| typ Type |
| input []byte |
| start int |
| size int |
| boo bool |
| str string |
| } |
| |
| func (v Value) newError(f string, x ...interface{}) error { |
| e := errors.New(f, x...) |
| line, col := v.Position() |
| return errors.New("error (line %d:%d): %v", line, col, e) |
| } |
| |
| // Type returns the JSON type. |
| func (v Value) Type() Type { |
| return v.typ |
| } |
| |
| // Position returns the line and column of the value. |
| func (v Value) Position() (int, int) { |
| return position(v.input, v.start) |
| } |
| |
| // Bool returns the bool value if token is Bool, else it will return an error. |
| func (v Value) Bool() (bool, error) { |
| if v.typ != Bool { |
| return false, v.newError("%s is not a bool", v.Raw()) |
| } |
| return v.boo, nil |
| } |
| |
| // String returns the string value for a JSON string token or the read value in |
| // string if token is not a string. |
| func (v Value) String() string { |
| if v.typ != String { |
| return v.Raw() |
| } |
| return v.str |
| } |
| |
| // Name returns the object name if token is Name, else it will return an error. |
| func (v Value) Name() (string, error) { |
| if v.typ != Name { |
| return "", v.newError("%s is not an object name", v.Raw()) |
| } |
| return v.str, nil |
| } |
| |
| // Raw returns the read value in string. |
| func (v Value) Raw() string { |
| return string(v.input[v.start : v.start+v.size]) |
| } |
| |
| // Float returns the floating-point number if token is Number, else it will |
| // return an error. |
| // |
| // The floating-point precision is specified by the bitSize parameter: 32 for |
| // float32 or 64 for float64. If bitSize=32, the result still has type float64, |
| // but it will be convertible to float32 without changing its value. It will |
| // return an error if the number exceeds the floating point limits for given |
| // bitSize. |
| func (v Value) Float(bitSize int) (float64, error) { |
| if v.typ != Number { |
| return 0, v.newError("%s is not a number", v.Raw()) |
| } |
| f, err := strconv.ParseFloat(v.Raw(), bitSize) |
| if err != nil { |
| return 0, v.newError("%v", err) |
| } |
| return f, nil |
| } |
| |
| // Int returns the signed integer number if token is Number, else it will |
| // return an error. |
| // |
| // The given bitSize specifies the integer type that the result must fit into. |
| // It returns an error if the number is not an integer value or if the result |
| // exceeds the limits for given bitSize. |
| func (v Value) Int(bitSize int) (int64, error) { |
| s, err := v.getIntStr() |
| if err != nil { |
| return 0, err |
| } |
| n, err := strconv.ParseInt(s, 10, bitSize) |
| if err != nil { |
| return 0, v.newError("%v", err) |
| } |
| return n, nil |
| } |
| |
| // Uint returns the signed integer number if token is Number, else it will |
| // return an error. |
| // |
| // The given bitSize specifies the unsigned integer type that the result must |
| // fit into. It returns an error if the number is not an unsigned integer value |
| // or if the result exceeds the limits for given bitSize. |
| func (v Value) Uint(bitSize int) (uint64, error) { |
| s, err := v.getIntStr() |
| if err != nil { |
| return 0, err |
| } |
| n, err := strconv.ParseUint(s, 10, bitSize) |
| if err != nil { |
| return 0, v.newError("%v", err) |
| } |
| return n, nil |
| } |
| |
| func (v Value) getIntStr() (string, error) { |
| if v.typ != Number { |
| return "", v.newError("%s is not a number", v.input) |
| } |
| parts, ok := parseNumber(v.input[v.start : v.start+v.size]) |
| if !ok { |
| return "", v.newError("%s is not a number", v.input) |
| } |
| num, ok := normalizeToIntString(parts) |
| if !ok { |
| return "", v.newError("cannot convert %s to integer", v.input) |
| } |
| return num, nil |
| } |