internal/encoding/json: rewrite to a token-based encoder and decoder
Previous decoder decodes a JSON number into a float64, which lacks
64-bit integer precision.
I attempted to retrofit it with storing the raw bytes and parsed out
number parts, see golang.org/cl/164377. While that is possible, the
encoding logic for Value is not symmetrical with the decoding logic and
can be confusing since both utilizes the same Value struct.
Joe and I decided that it would be better to rewrite the JSON encoder
and decoder to be token-based instead, removing the need for sharing a
model type plus making it more efficient.
Change-Id: Ic0601428a824be4e20141623409ab4d92b6167c7
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/165677
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/internal/encoding/json/decode.go b/internal/encoding/json/decode.go
index 769619b..543abbc 100644
--- a/internal/encoding/json/decode.go
+++ b/internal/encoding/json/decode.go
@@ -6,189 +6,389 @@
import (
"bytes"
+ "fmt"
"io"
"regexp"
+ "strconv"
"unicode/utf8"
"github.com/golang/protobuf/v2/internal/errors"
)
-type syntaxError struct{ error }
+// Decoder is a token-based JSON decoder.
+type Decoder struct {
+ lastType Type
-func newSyntaxError(f string, x ...interface{}) error {
- return syntaxError{errors.New(f, x...)}
+ // startStack is a stack containing StartObject and StartArray types. The
+ // top of stack represents the object or the array the current value is
+ // directly located in.
+ startStack []Type
+
+ // orig is used in reporting line and column.
+ orig []byte
+ // in contains the unconsumed input.
+ in []byte
}
-// Unmarshal parses b as the JSON format.
-// It returns a Value, which represents the input as an AST.
-func Unmarshal(b []byte) (Value, error) {
- p := decoder{in: b}
- p.consume(0) // trim leading spaces
- v, err := p.unmarshalValue()
- if !p.nerr.Merge(err) {
- if e, ok := err.(syntaxError); ok {
- b = b[:len(b)-len(p.in)] // consumed input
- line := bytes.Count(b, []byte("\n")) + 1
- if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
- b = b[i+1:]
- }
- column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
- err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
- }
+// NewDecoder returns a Decoder to read the given []byte.
+func NewDecoder(b []byte) *Decoder {
+ return &Decoder{orig: b, in: b}
+}
+
+// ReadNext returns the next JSON value. It will return an error if there is no
+// valid JSON value. For String types containing invalid UTF8 characters, a
+// non-fatal error is returned and caller can call ReadNext for the next value.
+func (d *Decoder) ReadNext() (Value, error) {
+ var nerr errors.NonFatal
+ value, n, err := d.parseNext()
+ if !nerr.Merge(err) {
return Value{}, err
}
- if len(p.in) > 0 {
- return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
+
+ switch value.typ {
+ case EOF:
+ if len(d.startStack) != 0 ||
+ d.lastType&Null|Bool|Number|String|EndObject|EndArray == 0 {
+ return Value{}, io.ErrUnexpectedEOF
+ }
+
+ case Null:
+ if !d.isValueNext() {
+ return Value{}, d.newSyntaxError("unexpected value null")
+ }
+
+ case Bool, Number:
+ if !d.isValueNext() {
+ return Value{}, d.newSyntaxError("unexpected value %v", value)
+ }
+
+ case String:
+ if d.isValueNext() {
+ break
+ }
+ // Check if this is for an object name.
+ if d.lastType&(StartObject|comma) == 0 {
+ return Value{}, d.newSyntaxError("unexpected value %q", value)
+ }
+ d.in = d.in[n:]
+ d.consume(0)
+ if c := d.in[0]; c != ':' {
+ return Value{}, d.newSyntaxError(`unexpected character %v, missing ":" after object name`, string(c))
+ }
+ n = 1
+ value.typ = Name
+
+ case StartObject, StartArray:
+ if !d.isValueNext() {
+ return Value{}, d.newSyntaxError("unexpected character %v", value)
+ }
+ d.startStack = append(d.startStack, value.typ)
+
+ case EndObject:
+ if len(d.startStack) == 0 ||
+ d.lastType == comma ||
+ d.startStack[len(d.startStack)-1] != StartObject {
+ return Value{}, d.newSyntaxError("unexpected character }")
+ }
+ d.startStack = d.startStack[:len(d.startStack)-1]
+
+ case EndArray:
+ if len(d.startStack) == 0 ||
+ d.lastType == comma ||
+ d.startStack[len(d.startStack)-1] != StartArray {
+ return Value{}, d.newSyntaxError("unexpected character ]")
+ }
+ d.startStack = d.startStack[:len(d.startStack)-1]
+
+ case comma:
+ if len(d.startStack) == 0 ||
+ d.lastType&(Null|Bool|Number|String|EndObject|EndArray) == 0 {
+ return Value{}, d.newSyntaxError("unexpected character ,")
+ }
}
- return v, p.nerr.E
+
+ // Update lastType only after validating value to be in the right
+ // sequence.
+ d.lastType = value.typ
+ d.in = d.in[n:]
+
+ if d.lastType == comma {
+ return d.ReadNext()
+ }
+ return value, nerr.E
}
-type decoder struct {
- nerr errors.NonFatal
- in []byte
-}
+var (
+ literalRegexp = regexp.MustCompile(`^(null|true|false)`)
+ // Any sequence that looks like a non-delimiter (for error reporting).
+ errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9]{1,32}|.)`)
+)
-var literalRegexp = regexp.MustCompile("^(null|true|false)")
+// parseNext parses for the next JSON value. It returns a Value object for
+// different types, except for Name. It also returns the size that was parsed.
+// It does not handle whether the next value is in a valid sequence or not, it
+// only ensures that the value is a valid one.
+func (d *Decoder) parseNext() (value Value, n int, err error) {
+ // Trim leading spaces.
+ d.consume(0)
-func (p *decoder) unmarshalValue() (Value, error) {
- if len(p.in) == 0 {
- return Value{}, io.ErrUnexpectedEOF
+ in := d.in
+ if len(in) == 0 {
+ return d.newValue(EOF, nil, nil), 0, nil
}
- switch p.in[0] {
+
+ switch in[0] {
case 'n', 't', 'f':
- if n := matchWithDelim(literalRegexp, p.in); n > 0 {
- var v Value
- switch p.in[0] {
- case 'n':
- v = rawValueOf(nil, p.in[:n:n])
- case 't':
- v = rawValueOf(true, p.in[:n:n])
- case 'f':
- v = rawValueOf(false, p.in[:n:n])
- }
- p.consume(n)
- return v, nil
+ n := matchWithDelim(literalRegexp, in)
+ if n == 0 {
+ return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
}
- return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
+ switch in[0] {
+ case 'n':
+ return d.newValue(Null, in[:n], nil), n, nil
+ case 't':
+ return d.newValue(Bool, in[:n], true), n, nil
+ case 'f':
+ return d.newValue(Bool, in[:n], false), n, nil
+ }
+
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
- return p.unmarshalNumber()
+ num, n := parseNumber(in)
+ if num == nil {
+ return Value{}, 0, d.newSyntaxError("invalid number %s", errRegexp.Find(in))
+ }
+ return d.newValue(Number, in[:n], num), n, nil
+
case '"':
- return p.unmarshalString()
- case '[':
- return p.unmarshalArray()
+ var nerr errors.NonFatal
+ s, n, err := d.parseString(in)
+ if !nerr.Merge(err) {
+ return Value{}, 0, err
+ }
+ return d.newValue(String, in[:n], s), n, nerr.E
+
case '{':
- return p.unmarshalObject()
- default:
- return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
+ return d.newValue(StartObject, in[:1], nil), 1, nil
+
+ case '}':
+ return d.newValue(EndObject, in[:1], nil), 1, nil
+
+ case '[':
+ return d.newValue(StartArray, in[:1], nil), 1, nil
+
+ case ']':
+ return d.newValue(EndArray, in[:1], nil), 1, nil
+
+ case ',':
+ return d.newValue(comma, in[:1], nil), 1, nil
}
+ return Value{}, 0, d.newSyntaxError("invalid value %s", errRegexp.Find(in))
}
-func (p *decoder) unmarshalArray() (Value, error) {
- b := p.in
- var elems []Value
- if err := p.consumeChar('[', "at start of array"); err != nil {
- return Value{}, err
+// position returns line and column number of parsed bytes.
+func (d *Decoder) position() (int, int) {
+ // Calculate line and column of consumed input.
+ b := d.orig[:len(d.orig)-len(d.in)]
+ line := bytes.Count(b, []byte("\n")) + 1
+ if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
+ b = b[i+1:]
}
- if len(p.in) > 0 && p.in[0] != ']' {
- for len(p.in) > 0 {
- v, err := p.unmarshalValue()
- if !p.nerr.Merge(err) {
- return Value{}, err
- }
- elems = append(elems, v)
- if !p.tryConsumeChar(',') {
- break
- }
- }
- }
- if err := p.consumeChar(']', "at end of array"); err != nil {
- return Value{}, err
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(elems, b[:len(b):len(b)]), nil
+ column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
+ return line, column
}
-func (p *decoder) unmarshalObject() (Value, error) {
- b := p.in
- var items [][2]Value
- if err := p.consumeChar('{', "at start of object"); err != nil {
- return Value{}, err
- }
- if len(p.in) > 0 && p.in[0] != '}' {
- for len(p.in) > 0 {
- k, err := p.unmarshalString()
- if !p.nerr.Merge(err) {
- return Value{}, err
- }
- if err := p.consumeChar(':', "in object"); err != nil {
- return Value{}, err
- }
- v, err := p.unmarshalValue()
- if !p.nerr.Merge(err) {
- return Value{}, err
- }
- items = append(items, [2]Value{k, v})
- if !p.tryConsumeChar(',') {
- break
- }
- }
- }
- if err := p.consumeChar('}', "at end of object"); err != nil {
- return Value{}, err
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(items, b[:len(b):len(b)]), nil
+// newSyntaxError returns an error with line and column information useful for
+// syntax errors.
+func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
+ e := errors.New(f, x...)
+ line, column := d.position()
+ return errors.New("syntax error (line %d:%d): %v", line, column, e)
}
-func (p *decoder) consumeChar(c byte, msg string) error {
- if p.tryConsumeChar(c) {
- return nil
- }
- if len(p.in) == 0 {
- return io.ErrUnexpectedEOF
- }
- return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
-}
-
-func (p *decoder) tryConsumeChar(c byte) bool {
- if len(p.in) > 0 && p.in[0] == c {
- p.consume(1)
- return true
- }
- return false
-}
-
-// consume consumes n bytes of input and any subsequent whitespace.
-func (p *decoder) consume(n int) {
- p.in = p.in[n:]
- for len(p.in) > 0 {
- switch p.in[0] {
- case ' ', '\n', '\r', '\t':
- p.in = p.in[1:]
- default:
- return
- }
- }
-}
-
-// Any sequence that looks like a non-delimiter (for error reporting).
-var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
-
// matchWithDelim matches r with the input b and verifies that the match
// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
// As a special case, EOF is considered a delimiter.
func matchWithDelim(r *regexp.Regexp, b []byte) int {
n := len(r.Find(b))
if n < len(b) {
- // Check that that the next character is a delimiter.
- c := b[n]
- notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
- ('a' <= c && c <= 'z') ||
- ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9'))
- if notDelim {
+ // Check that the next character is a delimiter.
+ if isNotDelim(b[n]) {
return 0
}
}
return n
}
+
+// isNotDelim returns true if given byte is a not delimiter character.
+func isNotDelim(c byte) bool {
+ return (c == '-' || c == '+' || c == '.' || c == '_' ||
+ ('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z') ||
+ ('0' <= c && c <= '9'))
+}
+
+// consume consumes n bytes of input and any subsequent whitespace.
+func (d *Decoder) consume(n int) {
+ d.in = d.in[n:]
+ for len(d.in) > 0 {
+ switch d.in[0] {
+ case ' ', '\n', '\r', '\t':
+ d.in = d.in[1:]
+ default:
+ return
+ }
+ }
+}
+
+// isValueNext returns true if next type should be a JSON value: Null,
+// Number, String or Bool.
+func (d *Decoder) isValueNext() bool {
+ if len(d.startStack) == 0 {
+ return d.lastType == 0
+ }
+
+ start := d.startStack[len(d.startStack)-1]
+ switch start {
+ case StartObject:
+ return d.lastType&Name != 0
+ case StartArray:
+ return d.lastType&(StartArray|comma) != 0
+ }
+ panic(fmt.Sprintf(
+ "unreachable logic in Decoder.isValueNext, lastType: %v, startStack: %v",
+ d.lastType, start))
+}
+
+// newValue constructs a Value.
+func (d *Decoder) newValue(typ Type, input []byte, value interface{}) Value {
+ line, column := d.position()
+ return Value{
+ input: input,
+ line: line,
+ column: column,
+ typ: typ,
+ value: value,
+ }
+}
+
+// Value contains a JSON type and value parsed from calling Decoder.ReadNext.
+type Value struct {
+ input []byte
+ line int
+ column int
+ typ Type
+ // value will be set to the following Go type based on the type field:
+ // Bool => bool
+ // Number => *numberParts
+ // String => string
+ // Name => string
+ // It will be nil if none of the above.
+ value interface{}
+}
+
+func (v Value) newError(f string, x ...interface{}) error {
+ e := errors.New(f, x...)
+ return errors.New("error (line %d:%d): %v", v.line, v.column, e)
+}
+
+// Type returns the JSON type.
+func (v Value) Type() Type {
+ return v.typ
+}
+
+// Position returns the line and column of the value.
+func (v Value) Position() (int, int) {
+ return v.line, v.column
+}
+
+// Bool returns the bool value if token is Bool, else it will return an error.
+func (v Value) Bool() (bool, error) {
+ if v.typ != Bool {
+ return false, v.newError("%s is not a bool", v.input)
+ }
+ return v.value.(bool), nil
+}
+
+// String returns the string value for a JSON string token or the read value in
+// string if token is not a string.
+func (v Value) String() string {
+ if v.typ != String {
+ return string(v.input)
+ }
+ return v.value.(string)
+}
+
+// Name returns the object name if token is Name, else it will return an error.
+func (v Value) Name() (string, error) {
+ if v.typ != Name {
+ return "", v.newError("%s is not an object name", v.input)
+ }
+ return v.value.(string), nil
+}
+
+// Float returns the floating-point number if token is Number, else it will
+// return an error.
+//
+// The floating-point precision is specified by the bitSize parameter: 32 for
+// float32 or 64 for float64. If bitSize=32, the result still has type float64,
+// but it will be convertible to float32 without changing its value. It will
+// return an error if the number exceeds the floating point limits for given
+// bitSize.
+func (v Value) Float(bitSize int) (float64, error) {
+ if v.typ != Number {
+ return 0, v.newError("%s is not a number", v.input)
+ }
+ f, err := strconv.ParseFloat(string(v.input), bitSize)
+ if err != nil {
+ return 0, v.newError("%v", err)
+ }
+ return f, nil
+}
+
+// Int returns the signed integer number if token is Number, else it will
+// return an error.
+//
+// The given bitSize specifies the integer type that the result must fit into.
+// It returns an error if the number is not an integer value or if the result
+// exceeds the limits for given bitSize.
+func (v Value) Int(bitSize int) (int64, error) {
+ s, err := v.getIntStr()
+ if err != nil {
+ return 0, err
+ }
+ n, err := strconv.ParseInt(s, 10, bitSize)
+ if err != nil {
+ return 0, v.newError("%v", err)
+ }
+ return n, nil
+}
+
+// Uint returns the signed integer number if token is Number, else it will
+// return an error.
+//
+// The given bitSize specifies the unsigned integer type that the result must
+// fit into. It returns an error if the number is not an unsigned integer value
+// or if the result exceeds the limits for given bitSize.
+func (v Value) Uint(bitSize int) (uint64, error) {
+ s, err := v.getIntStr()
+ if err != nil {
+ return 0, err
+ }
+ n, err := strconv.ParseUint(s, 10, bitSize)
+ if err != nil {
+ return 0, v.newError("%v", err)
+ }
+ return n, nil
+}
+
+func (v Value) getIntStr() (string, error) {
+ if v.typ != Number {
+ return "", v.newError("%s is not a number", v.input)
+ }
+ pnum := v.value.(*numberParts)
+ num, ok := normalizeToIntString(pnum)
+ if !ok {
+ return "", v.newError("cannot convert %s to integer", v.input)
+ }
+ return num, nil
+}
diff --git a/internal/encoding/json/decode_test.go b/internal/encoding/json/decode_test.go
new file mode 100644
index 0000000..4917bc2
--- /dev/null
+++ b/internal/encoding/json/decode_test.go
@@ -0,0 +1,1085 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json_test
+
+import (
+ "strings"
+ "testing"
+ "unicode/utf8"
+
+ "github.com/golang/protobuf/v2/internal/encoding/json"
+)
+
+type R struct {
+ // T is expected Type returned from calling Decoder.ReadNext.
+ T json.Type
+ // E is expected error substring from calling Decoder.ReadNext if set.
+ E string
+ // V is expected value from calling
+ // Value.{Bool()|Float()|Int()|Uint()|String()} depending on type.
+ V interface{}
+ // VE is expected error substring from calling
+ // Value.{Bool()|Float()|Int()|Uint()|String()} depending on type if set.
+ VE string
+}
+
+func TestDecoder(t *testing.T) {
+ const space = " \n\r\t"
+
+ tests := []struct {
+ input string
+ // want is a list of expected values returned from calling
+ // Decoder.ReadNext. An item makes the test code invoke
+ // Decoder.ReadNext and compare against R.T and R.E. For Bool,
+ // Number and String tokens, it invokes the corresponding getter method
+ // and compares the returned value against R.V or R.VE if it returned an
+ // error.
+ want []R
+ }{
+ {
+ input: ``,
+ want: []R{{T: json.EOF}},
+ },
+ {
+ input: space,
+ want: []R{{T: json.EOF}},
+ },
+ {
+ // Calling ReadNext after EOF will keep returning EOF for
+ // succeeding ReadNext calls.
+ input: space,
+ want: []R{
+ {T: json.EOF},
+ {T: json.EOF},
+ {T: json.EOF},
+ },
+ },
+
+ // JSON literals.
+ {
+ input: space + `null` + space,
+ want: []R{
+ {T: json.Null},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `true` + space,
+ want: []R{
+ {T: json.Bool, V: true},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `false` + space,
+ want: []R{
+ {T: json.Bool, V: false},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Error returned will produce the same error again.
+ input: space + `foo` + space,
+ want: []R{
+ {E: `invalid value foo`},
+ {E: `invalid value foo`},
+ },
+ },
+
+ // JSON strings.
+ {
+ input: space + `""` + space,
+ want: []R{
+ {T: json.String, V: ""},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `"hello"` + space,
+ want: []R{
+ {T: json.String, V: "hello"},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `"hello`,
+ want: []R{{E: `unexpected EOF`}},
+ },
+ {
+ input: "\"\x00\"",
+ want: []R{{E: `invalid character '\x00' in string`}},
+ },
+ {
+ input: "\"\u0031\u0032\"",
+ want: []R{
+ {T: json.String, V: "12"},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Invalid UTF-8 error is returned in ReadString instead of ReadNext.
+ input: "\"\xff\"",
+ want: []R{
+ {T: json.String, E: `invalid UTF-8 detected`, V: string("\xff")},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `"` + string(utf8.RuneError) + `"`,
+ want: []R{
+ {T: json.String, V: string(utf8.RuneError)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `"\uFFFD"`,
+ want: []R{
+ {T: json.String, V: string(utf8.RuneError)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `"\x"`,
+ want: []R{{E: `invalid escape code "\\x" in string`}},
+ },
+ {
+ input: `"\uXXXX"`,
+ want: []R{{E: `invalid escape code "\\uXXXX" in string`}},
+ },
+ {
+ input: `"\uDEAD"`, // unmatched surrogate pair
+ want: []R{{E: `unexpected EOF`}},
+ },
+ {
+ input: `"\uDEAD\uBEEF"`, // invalid surrogate half
+ want: []R{{E: `invalid escape code "\\uBEEF" in string`}},
+ },
+ {
+ input: `"\uD800\udead"`, // valid surrogate pair
+ want: []R{
+ {T: json.String, V: `𐊭`},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `"\u0000\"\\\/\b\f\n\r\t"`,
+ want: []R{
+ {T: json.String, V: "\u0000\"\\/\b\f\n\r\t"},
+ {T: json.EOF},
+ },
+ },
+
+ // Invalid JSON numbers.
+ {
+ input: `-`,
+ want: []R{{E: `invalid number -`}},
+ },
+ {
+ input: `+0`,
+ want: []R{{E: `invalid value +0`}},
+ },
+ {
+ input: `-+`,
+ want: []R{{E: `invalid number -+`}},
+ },
+ {
+ input: `0.`,
+ want: []R{{E: `invalid number 0.`}},
+ },
+ {
+ input: `.1`,
+ want: []R{{E: `invalid value .1`}},
+ },
+ {
+ input: `1.0.1`,
+ want: []R{{E: `invalid number 1.0.1`}},
+ },
+ {
+ input: `1..1`,
+ want: []R{{E: `invalid number 1..1`}},
+ },
+ {
+ input: `-1-2`,
+ want: []R{{E: `invalid number -1-2`}},
+ },
+ {
+ input: `01`,
+ want: []R{{E: `invalid number 01`}},
+ },
+ {
+ input: `1e`,
+ want: []R{{E: `invalid number 1e`}},
+ },
+ {
+ input: `1e1.2`,
+ want: []R{{E: `invalid number 1e1.2`}},
+ },
+ {
+ input: `1Ee`,
+ want: []R{{E: `invalid number 1Ee`}},
+ },
+ {
+ input: `1.e1`,
+ want: []R{{E: `invalid number 1.e1`}},
+ },
+ {
+ input: `1.e+`,
+ want: []R{{E: `invalid number 1.e+`}},
+ },
+ {
+ input: `1e+-2`,
+ want: []R{{E: `invalid number 1e+-2`}},
+ },
+ {
+ input: `1e--2`,
+ want: []R{{E: `invalid number 1e--2`}},
+ },
+ {
+ input: `1.0true`,
+ want: []R{{E: `invalid number 1.0true`}},
+ },
+
+ // JSON numbers as floating point.
+ {
+ input: space + `0.0` + space,
+ want: []R{
+ {T: json.Number, V: float32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `0` + space,
+ want: []R{
+ {T: json.Number, V: float32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `-0` + space,
+ want: []R{
+ {T: json.Number, V: float32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1.02`,
+ want: []R{
+ {T: json.Number, V: float32(-1.02)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1.020000`,
+ want: []R{
+ {T: json.Number, V: float32(1.02)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1.0e0`,
+ want: []R{
+ {T: json.Number, V: float32(-1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1.0e-000`,
+ want: []R{
+ {T: json.Number, V: float32(1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1e+00`,
+ want: []R{
+ {T: json.Number, V: float32(1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1.02e3`,
+ want: []R{
+ {T: json.Number, V: float32(1.02e3)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1.02E03`,
+ want: []R{
+ {T: json.Number, V: float32(-1.02e3)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1.0200e+3`,
+ want: []R{
+ {T: json.Number, V: float32(1.02e3)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1.0200E+03`,
+ want: []R{
+ {T: json.Number, V: float32(-1.02e3)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1.0200e-3`,
+ want: []R{
+ {T: json.Number, V: float32(1.02e-3)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1.0200E-03`,
+ want: []R{
+ {T: json.Number, V: float32(-1.02e-3)},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds max float32 limit, but should be ok for float64.
+ input: `3.4e39`,
+ want: []R{
+ {T: json.Number, V: float64(3.4e39)},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds max float32 limit.
+ input: `3.4e39`,
+ want: []R{
+ {T: json.Number, V: float32(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Less than negative max float32 limit.
+ input: `-3.4e39`,
+ want: []R{
+ {T: json.Number, V: float32(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds max float64 limit.
+ input: `1.79e+309`,
+ want: []R{
+ {T: json.Number, V: float64(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Less than negative max float64 limit.
+ input: `-1.79e+309`,
+ want: []R{
+ {T: json.Number, V: float64(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+
+ // JSON numbers as signed integers.
+ {
+ input: space + `0` + space,
+ want: []R{
+ {T: json.Number, V: int32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `-0` + space,
+ want: []R{
+ {T: json.Number, V: int32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Fractional part equals 0 is ok.
+ input: `1.00000`,
+ want: []R{
+ {T: json.Number, V: int32(1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Fractional part not equals 0 returns error.
+ input: `1.0000000001`,
+ want: []R{
+ {T: json.Number, V: int32(0), VE: `cannot convert 1.0000000001 to integer`},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `0e0`,
+ want: []R{
+ {T: json.Number, V: int32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `0.0E0`,
+ want: []R{
+ {T: json.Number, V: int32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `0.0E10`,
+ want: []R{
+ {T: json.Number, V: int32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1`,
+ want: []R{
+ {T: json.Number, V: int32(-1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1.0e+0`,
+ want: []R{
+ {T: json.Number, V: int32(1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1E-0`,
+ want: []R{
+ {T: json.Number, V: int32(-1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `1E1`,
+ want: []R{
+ {T: json.Number, V: int32(10)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-100.00e-02`,
+ want: []R{
+ {T: json.Number, V: int32(-1)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `0.1200E+02`,
+ want: []R{
+ {T: json.Number, V: int64(12)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `0.012e2`,
+ want: []R{
+ {T: json.Number, V: int32(0), VE: `cannot convert 0.012e2 to integer`},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `12e-2`,
+ want: []R{
+ {T: json.Number, V: int32(0), VE: `cannot convert 12e-2 to integer`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MaxInt32.
+ input: `2147483648`,
+ want: []R{
+ {T: json.Number, V: int32(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MinInt32.
+ input: `-2147483649`,
+ want: []R{
+ {T: json.Number, V: int32(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MaxInt32, but ok for int64.
+ input: `2147483648`,
+ want: []R{
+ {T: json.Number, V: int64(2147483648)},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MinInt32, but ok for int64.
+ input: `-2147483649`,
+ want: []R{
+ {T: json.Number, V: int64(-2147483649)},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MaxInt64.
+ input: `9223372036854775808`,
+ want: []R{
+ {T: json.Number, V: int64(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MinInt64.
+ input: `-9223372036854775809`,
+ want: []R{
+ {T: json.Number, V: int64(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+
+ // JSON numbers as unsigned integers.
+ {
+ input: space + `0` + space,
+ want: []R{
+ {T: json.Number, V: uint32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `-0` + space,
+ want: []R{
+ {T: json.Number, V: uint32(0)},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `-1`,
+ want: []R{
+ {T: json.Number, V: uint32(0), VE: `invalid syntax`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MaxUint32.
+ input: `4294967296`,
+ want: []R{
+ {T: json.Number, V: uint32(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+ {
+ // Exceeds math.MaxUint64.
+ input: `18446744073709551616`,
+ want: []R{
+ {T: json.Number, V: uint64(0), VE: `value out of range`},
+ {T: json.EOF},
+ },
+ },
+
+ // JSON sequence of values.
+ {
+ input: `true null`,
+ want: []R{
+ {T: json.Bool, V: true},
+ {E: `unexpected value null`},
+ },
+ },
+ {
+ input: "null false",
+ want: []R{
+ {T: json.Null},
+ {E: `unexpected value false`},
+ },
+ },
+ {
+ input: `true,false`,
+ want: []R{
+ {T: json.Bool, V: true},
+ {E: `unexpected character ,`},
+ },
+ },
+ {
+ input: `47"hello"`,
+ want: []R{
+ {T: json.Number, V: int32(47)},
+ {E: `unexpected value "hello"`},
+ },
+ },
+ {
+ input: `47 "hello"`,
+ want: []R{
+ {T: json.Number, V: int32(47)},
+ {E: `unexpected value "hello"`},
+ },
+ },
+ {
+ input: `true 42`,
+ want: []R{
+ {T: json.Bool, V: true},
+ {E: `unexpected value 42`},
+ },
+ },
+
+ // JSON arrays.
+ {
+ input: space + `[]` + space,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `[` + space + `]` + space,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `[` + space,
+ want: []R{
+ {T: json.StartArray},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: space + `]` + space,
+ want: []R{{E: `unexpected character ]`}},
+ },
+ {
+ input: `[null,true,false, 1e1, "hello" ]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Null},
+ {T: json.Bool, V: true},
+ {T: json.Bool, V: false},
+ {T: json.Number, V: int32(10)},
+ {T: json.String, V: "hello"},
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `[` + space + `true` + space + `,` + space + `"hello"` + space + `]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Bool, V: true},
+ {T: json.String, V: "hello"},
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `[` + space + `true` + space + `,` + space + `]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Bool, V: true},
+ {E: `unexpected character ]`},
+ },
+ },
+ {
+ input: `[` + space + `false` + space + `]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Bool, V: false},
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `[` + space + `1` + space + `0` + space + `]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Number, V: int64(1)},
+ {E: `unexpected value 0`},
+ },
+ },
+ {
+ input: `[null`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Null},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: `[foo]`,
+ want: []R{
+ {T: json.StartArray},
+ {E: `invalid value foo`},
+ },
+ },
+ {
+ input: `[{}, "hello", [true, false], null]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.StartObject},
+ {T: json.EndObject},
+ {T: json.String, V: "hello"},
+ {T: json.StartArray},
+ {T: json.Bool, V: true},
+ {T: json.Bool, V: false},
+ {T: json.EndArray},
+ {T: json.Null},
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `[{ ]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.StartObject},
+ {E: `unexpected character ]`},
+ },
+ },
+ {
+ input: `[[ ]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.StartArray},
+ {T: json.EndArray},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: `[,]`,
+ want: []R{
+ {T: json.StartArray},
+ {E: `unexpected character ,`},
+ },
+ },
+ {
+ input: `[true "hello"]`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.Bool, V: true},
+ {E: `unexpected value "hello"`},
+ },
+ },
+ {
+ input: `[] null`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.EndArray},
+ {E: `unexpected value null`},
+ },
+ },
+ {
+ input: `true []`,
+ want: []R{
+ {T: json.Bool, V: true},
+ {E: `unexpected character [`},
+ },
+ },
+
+ // JSON objects.
+ {
+ input: space + `{}` + space,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.EndObject},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `{` + space + `}` + space,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.EndObject},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: space + `{` + space,
+ want: []R{
+ {T: json.StartObject},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: space + `}` + space,
+ want: []R{{E: `unexpected character }`}},
+ },
+ {
+ input: `{` + space + `null` + space + `}`,
+ want: []R{
+ {T: json.StartObject},
+ {E: `unexpected value null`},
+ },
+ },
+ {
+ input: `{[]}`,
+ want: []R{
+ {T: json.StartObject},
+ {E: `unexpected character [`},
+ },
+ },
+ {
+ input: `{,}`,
+ want: []R{
+ {T: json.StartObject},
+ {E: `unexpected character ,`},
+ },
+ },
+ {
+ input: `{"345678"}`,
+ want: []R{
+ {T: json.StartObject},
+ {E: `unexpected character }, missing ":" after object name`},
+ },
+ },
+ {
+ input: `{` + space + `"hello"` + space + `:` + space + `"world"` + space + `}`,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.Name, V: "hello"},
+ {T: json.String, V: "world"},
+ {T: json.EndObject},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `{"hello" "world"}`,
+ want: []R{
+ {T: json.StartObject},
+ {E: `unexpected character ", missing ":" after object name`},
+ },
+ },
+ {
+ input: `{"hello":`,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.Name, V: "hello"},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: `{"hello":"world"`,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.Name, V: "hello"},
+ {T: json.String, V: "world"},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: `{"hello":"world",`,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.Name, V: "hello"},
+ {T: json.String, V: "world"},
+ {E: `unexpected EOF`},
+ },
+ },
+ {
+ input: `{"34":"89",}`,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.Name, V: "34"},
+ {T: json.String, V: "89"},
+ {E: `syntax error (line 1:12): unexpected character }`},
+ },
+ },
+ {
+ input: `{
+ "number": 123e2,
+ "bool" : false,
+ "object": {"string": "world"},
+ "null" : null,
+ "array" : [1.01, "hello", true],
+ "string": "hello"
+}`,
+ want: []R{
+ {T: json.StartObject},
+
+ {T: json.Name, V: "number"},
+ {T: json.Number, V: int32(12300)},
+
+ {T: json.Name, V: "bool"},
+ {T: json.Bool, V: false},
+
+ {T: json.Name, V: "object"},
+ {T: json.StartObject},
+ {T: json.Name, V: "string"},
+ {T: json.String, V: "world"},
+ {T: json.EndObject},
+
+ {T: json.Name, V: "null"},
+ {T: json.Null},
+
+ {T: json.Name, V: "array"},
+ {T: json.StartArray},
+ {T: json.Number, V: float32(1.01)},
+ {T: json.String, V: "hello"},
+ {T: json.Bool, V: true},
+ {T: json.EndArray},
+
+ {T: json.Name, V: "string"},
+ {T: json.String, V: "hello"},
+
+ {T: json.EndObject},
+ {T: json.EOF},
+ },
+ },
+ {
+ input: `[
+ {"object": {"number": 47}},
+ ["list"],
+ null
+]`,
+ want: []R{
+ {T: json.StartArray},
+
+ {T: json.StartObject},
+ {T: json.Name, V: "object"},
+ {T: json.StartObject},
+ {T: json.Name, V: "number"},
+ {T: json.Number, V: uint32(47)},
+ {T: json.EndObject},
+ {T: json.EndObject},
+
+ {T: json.StartArray},
+ {T: json.String, V: "list"},
+ {T: json.EndArray},
+
+ {T: json.Null},
+
+ {T: json.EndArray},
+ {T: json.EOF},
+ },
+ },
+
+ // Tests for line and column info.
+ {
+ input: `12345678 x`,
+ want: []R{
+ {T: json.Number, V: int64(12345678)},
+ {E: `syntax error (line 1:10): invalid value x`},
+ },
+ },
+ {
+ input: "\ntrue\n x",
+ want: []R{
+ {T: json.Bool, V: true},
+ {E: `syntax error (line 3:4): invalid value x`},
+ },
+ },
+ {
+ input: `"💩"x`,
+ want: []R{
+ {T: json.String, V: "💩"},
+ {E: `syntax error (line 1:4): invalid value x`},
+ },
+ },
+ {
+ input: "\n\n[\"🔥🔥🔥\"x",
+ want: []R{
+ {T: json.StartArray},
+ {T: json.String, V: "🔥🔥🔥"},
+ {E: `syntax error (line 3:7): invalid value x`},
+ },
+ },
+ {
+ // Multi-rune emojis.
+ input: `["👍🏻👍🏿"x`,
+ want: []R{
+ {T: json.StartArray},
+ {T: json.String, V: "👍🏻👍🏿"},
+ {E: `syntax error (line 1:8): invalid value x`},
+ },
+ },
+ {
+ input: `{
+ "45678":-1
+}`,
+ want: []R{
+ {T: json.StartObject},
+ {T: json.Name, V: "45678"},
+ {T: json.Number, V: uint64(1), VE: "error (line 2:11)"},
+ },
+ },
+ }
+
+ for _, tc := range tests {
+ tc := tc
+ t.Run("", func(t *testing.T) {
+ dec := json.NewDecoder([]byte(tc.input))
+ for i, want := range tc.want {
+ value, err := dec.ReadNext()
+ if err != nil {
+ if want.E == "" {
+ t.Errorf("input: %v\nReadNext() got unexpected error: %v", tc.input, err)
+
+ } else if !strings.Contains(err.Error(), want.E) {
+ t.Errorf("input: %v\nReadNext() got %q, want %q", tc.input, err, want.E)
+ }
+ } else {
+ if want.E != "" {
+ t.Errorf("input: %v\nReadNext() got nil error, want %q", tc.input, want.E)
+ }
+ }
+ token := value.Type()
+ if token != want.T {
+ t.Errorf("input: %v\nReadNext() got %v, want %v", tc.input, token, want.T)
+ break
+ }
+ checkValue(t, value, i, want)
+ }
+ })
+ }
+}
+
+func checkValue(t *testing.T, value json.Value, wantIdx int, want R) {
+ var got interface{}
+ var err error
+ switch value.Type() {
+ case json.Bool:
+ got, err = value.Bool()
+ case json.String:
+ got = value.String()
+ case json.Name:
+ got, err = value.Name()
+ case json.Number:
+ switch want.V.(type) {
+ case float32:
+ got, err = value.Float(32)
+ got = float32(got.(float64))
+ case float64:
+ got, err = value.Float(64)
+ case int32:
+ got, err = value.Int(32)
+ got = int32(got.(int64))
+ case int64:
+ got, err = value.Int(64)
+ case uint32:
+ got, err = value.Uint(32)
+ got = uint32(got.(uint64))
+ case uint64:
+ got, err = value.Uint(64)
+ }
+ default:
+ return
+ }
+
+ if err != nil {
+ if want.VE == "" {
+ t.Errorf("want%d: %v got unexpected error: %v", wantIdx, value, err)
+ } else if !strings.Contains(err.Error(), want.VE) {
+ t.Errorf("want#%d: %v got %q, want %q", wantIdx, value, err, want.VE)
+ }
+ return
+ } else {
+ if want.VE != "" {
+ t.Errorf("want#%d: %v got nil error, want %q", wantIdx, value, want.VE)
+ return
+ }
+ }
+
+ if got != want.V {
+ t.Errorf("want#%d: %v got %v, want %v", wantIdx, value, got, want.V)
+ }
+}
diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go
index 93b0928..f73e224 100644
--- a/internal/encoding/json/encode.go
+++ b/internal/encoding/json/encode.go
@@ -5,124 +5,160 @@
package json
import (
+ "strconv"
"strings"
"github.com/golang/protobuf/v2/internal/errors"
)
-// Marshal serializes v as the JSON format.
+// Encoder provides methods to write out JSON constructs and values. The user is
+// responsible for producing valid sequences of JSON constructs and values.
+type Encoder struct {
+ indent string
+ lastType Type
+ indents []byte
+ out []byte
+}
+
+// NewEncoder returns an Encoder.
//
// If indent is a non-empty string, it causes every entry for an Array or Object
// to be preceded by the indent and trailed by a newline.
-func Marshal(v Value, indent string) ([]byte, error) {
- p := encoder{}
+func NewEncoder(indent string) (*Encoder, error) {
+ e := &Encoder{}
if len(indent) > 0 {
if strings.Trim(indent, " \t") != "" {
- return nil, errors.New("indent may only be composed of space and tab characters")
+ return nil, errors.New("indent may only be composed of space or tab characters")
}
- p.indent = indent
- p.newline = "\n"
+ e.indent = indent
}
- err := p.marshalValue(v)
- if !p.nerr.Merge(err) {
- return nil, err
- }
- return p.out, p.nerr.E
+ return e, nil
}
-type encoder struct {
- nerr errors.NonFatal
- out []byte
-
- indent string
- indents []byte
- newline string // set to "\n" if len(indent) > 0
+// Bytes returns the content of the written bytes.
+func (e *Encoder) Bytes() []byte {
+ return e.out
}
-func (p *encoder) marshalValue(v Value) error {
- switch v.Type() {
- case Null:
- p.out = append(p.out, "null"...)
- return nil
- case Bool:
- if v.Bool() {
- p.out = append(p.out, "true"...)
- } else {
- p.out = append(p.out, "false"...)
- }
- return nil
- case Number:
- return p.marshalNumber(v)
- case String:
- return p.marshalString(v)
- case Array:
- return p.marshalArray(v)
- case Object:
- return p.marshalObject(v)
- default:
- return errors.New("invalid type %v to encode value", v.Type())
+// WriteNull writes out the null value.
+func (e *Encoder) WriteNull() {
+ e.prepareNext(Null)
+ e.out = append(e.out, "null"...)
+}
+
+// WriteBool writes out the given boolean value.
+func (e *Encoder) WriteBool(b bool) {
+ e.prepareNext(Bool)
+ if b {
+ e.out = append(e.out, "true"...)
+ } else {
+ e.out = append(e.out, "false"...)
}
}
-func (p *encoder) marshalArray(v Value) error {
- if v.Type() != Array {
- return errors.New("invalid type %v, expected array", v.Type())
+// WriteString writes out the given string in JSON string value.
+func (e *Encoder) WriteString(s string) error {
+ e.prepareNext(String)
+ var err error
+ if e.out, err = appendString(e.out, s); err != nil {
+ return err
}
- elems := v.Array()
- p.out = append(p.out, '[')
- p.indents = append(p.indents, p.indent...)
- if len(elems) > 0 {
- p.out = append(p.out, p.newline...)
- }
- for i, elem := range elems {
- p.out = append(p.out, p.indents...)
- if err := p.marshalValue(elem); !p.nerr.Merge(err) {
- return err
- }
- if i < len(elems)-1 {
- p.out = append(p.out, ',')
- }
- p.out = append(p.out, p.newline...)
- }
- p.indents = p.indents[:len(p.indents)-len(p.indent)]
- if len(elems) > 0 {
- p.out = append(p.out, p.indents...)
- }
- p.out = append(p.out, ']')
return nil
}
-func (p *encoder) marshalObject(v Value) error {
- if v.Type() != Object {
- return errors.New("invalid type %v, expected object", v.Type())
- }
- items := v.Object()
- p.out = append(p.out, '{')
- p.indents = append(p.indents, p.indent...)
- if len(items) > 0 {
- p.out = append(p.out, p.newline...)
- }
- for i, item := range items {
- p.out = append(p.out, p.indents...)
- if err := p.marshalString(item[0]); !p.nerr.Merge(err) {
- return err
+// WriteFloat writes out the given float and bitSize in JSON number value.
+func (e *Encoder) WriteFloat(n float64, bitSize int) {
+ e.prepareNext(Number)
+ e.out = appendFloat(e.out, n, bitSize)
+}
+
+// WriteInt writes out the given signed integer in JSON number value.
+func (e *Encoder) WriteInt(n int64) {
+ e.prepareNext(Number)
+ e.out = append(e.out, strconv.FormatInt(n, 10)...)
+}
+
+// WriteUint writes out the given unsigned integer in JSON number value.
+func (e *Encoder) WriteUint(n uint64) {
+ e.prepareNext(Number)
+ e.out = append(e.out, strconv.FormatUint(n, 10)...)
+}
+
+// StartObject writes out the '{' symbol.
+func (e *Encoder) StartObject() {
+ e.prepareNext(StartObject)
+ e.out = append(e.out, '{')
+}
+
+// EndObject writes out the '}' symbol.
+func (e *Encoder) EndObject() {
+ e.prepareNext(EndObject)
+ e.out = append(e.out, '}')
+}
+
+// WriteName writes out the given string in JSON string value and the name
+// separator ':'.
+func (e *Encoder) WriteName(s string) error {
+ e.prepareNext(Name)
+ // Errors returned by appendString() are non-fatal.
+ var err error
+ e.out, err = appendString(e.out, s)
+ e.out = append(e.out, ':')
+ return err
+}
+
+// StartArray writes out the '[' symbol.
+func (e *Encoder) StartArray() {
+ e.prepareNext(StartArray)
+ e.out = append(e.out, '[')
+}
+
+// EndArray writes out the ']' symbol.
+func (e *Encoder) EndArray() {
+ e.prepareNext(EndArray)
+ e.out = append(e.out, ']')
+}
+
+// prepareNext adds possible comma and indentation for the next value based
+// on last type and indent option. It also updates lastType to next.
+func (e *Encoder) prepareNext(next Type) {
+ defer func() {
+ // Set lastType to next.
+ e.lastType = next
+ }()
+
+ if len(e.indent) == 0 {
+ // Need to add comma on the following condition.
+ if e.lastType&(Null|Bool|Number|String|EndObject|EndArray) != 0 &&
+ next&(Name|Null|Bool|Number|String|StartObject|StartArray) != 0 {
+ e.out = append(e.out, ',')
}
- p.out = append(p.out, ':')
- if len(p.indent) > 0 {
- p.out = append(p.out, ' ')
- }
- if err := p.marshalValue(item[1]); !p.nerr.Merge(err) {
- return err
- }
- if i < len(items)-1 {
- p.out = append(p.out, ',')
- }
- p.out = append(p.out, p.newline...)
+ return
}
- p.indents = p.indents[:len(p.indents)-len(p.indent)]
- if len(items) > 0 {
- p.out = append(p.out, p.indents...)
+
+ switch {
+ case e.lastType&(StartObject|StartArray) != 0:
+ // If next type is NOT closing, add indent and newline.
+ if next&(EndObject|EndArray) == 0 {
+ e.indents = append(e.indents, e.indent...)
+ e.out = append(e.out, '\n')
+ e.out = append(e.out, e.indents...)
+ }
+
+ case e.lastType&(Null|Bool|Number|String|EndObject|EndArray) != 0:
+ switch {
+ // If next type is either a value or name, add comma and newline.
+ case next&(Name|Null|Bool|Number|String|StartObject|StartArray) != 0:
+ e.out = append(e.out, ',', '\n')
+
+ // If next type is a closing object or array, adjust indentation.
+ case next&(EndObject|EndArray) != 0:
+ e.indents = e.indents[:len(e.indents)-len(e.indent)]
+ e.out = append(e.out, '\n')
+ }
+ e.out = append(e.out, e.indents...)
+
+ case e.lastType&Name != 0:
+ e.out = append(e.out, ' ')
}
- p.out = append(p.out, '}')
- return nil
}
diff --git a/internal/encoding/json/encode_test.go b/internal/encoding/json/encode_test.go
new file mode 100644
index 0000000..d3f0afd
--- /dev/null
+++ b/internal/encoding/json/encode_test.go
@@ -0,0 +1,410 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json_test
+
+import (
+ "math"
+ "strings"
+ "testing"
+
+ "github.com/golang/protobuf/v2/internal/encoding/json"
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+)
+
+// splitLines is a cmpopts.Option for comparing strings with line breaks.
+var splitLines = cmpopts.AcyclicTransformer("SplitLines", func(s string) []string {
+ return strings.Split(s, "\n")
+})
+
+func TestEncoder(t *testing.T) {
+ tests := []struct {
+ desc string
+ write func(*json.Encoder)
+ wantOut string
+ wantOutIndent string
+ }{
+ {
+ desc: "null",
+ write: func(e *json.Encoder) {
+ e.WriteNull()
+ },
+ wantOut: `null`,
+ wantOutIndent: `null`,
+ },
+ {
+ desc: "true",
+ write: func(e *json.Encoder) {
+ e.WriteBool(true)
+ },
+ wantOut: `true`,
+ wantOutIndent: `true`,
+ },
+ {
+ desc: "false",
+ write: func(e *json.Encoder) {
+ e.WriteBool(false)
+ },
+ wantOut: `false`,
+ wantOutIndent: `false`,
+ },
+ {
+ desc: "string",
+ write: func(e *json.Encoder) {
+ e.WriteString("hello world")
+ },
+ wantOut: `"hello world"`,
+ wantOutIndent: `"hello world"`,
+ },
+ {
+ desc: "string contains escaped characters",
+ write: func(e *json.Encoder) {
+ e.WriteString("\u0000\"\\/\b\f\n\r\t")
+ },
+ wantOut: `"\u0000\"\\/\b\f\n\r\t"`,
+ },
+ {
+ desc: "float64",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(1.0199999809265137, 64)
+ },
+ wantOut: `1.0199999809265137`,
+ wantOutIndent: `1.0199999809265137`,
+ },
+ {
+ desc: "float64 max value",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(math.MaxFloat64, 64)
+ },
+ wantOut: `1.7976931348623157e+308`,
+ wantOutIndent: `1.7976931348623157e+308`,
+ },
+ {
+ desc: "float64 min value",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(-math.MaxFloat64, 64)
+ },
+ wantOut: `-1.7976931348623157e+308`,
+ wantOutIndent: `-1.7976931348623157e+308`,
+ },
+ {
+ desc: "float64 NaN",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(math.NaN(), 64)
+ },
+ wantOut: `"NaN"`,
+ wantOutIndent: `"NaN"`,
+ },
+ {
+ desc: "float64 Infinity",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(math.Inf(+1), 64)
+ },
+ wantOut: `"Infinity"`,
+ wantOutIndent: `"Infinity"`,
+ },
+ {
+ desc: "float64 -Infinity",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(math.Inf(-1), 64)
+ },
+ wantOut: `"-Infinity"`,
+ wantOutIndent: `"-Infinity"`,
+ },
+ {
+ desc: "float32",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(1.02, 32)
+ },
+ wantOut: `1.02`,
+ wantOutIndent: `1.02`,
+ },
+ {
+ desc: "float32 max value",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(math.MaxFloat32, 32)
+ },
+ wantOut: `3.4028235e+38`,
+ wantOutIndent: `3.4028235e+38`,
+ },
+ {
+ desc: "float32 min value",
+ write: func(e *json.Encoder) {
+ e.WriteFloat(-math.MaxFloat32, 32)
+ },
+ wantOut: `-3.4028235e+38`,
+ wantOutIndent: `-3.4028235e+38`,
+ },
+ {
+ desc: "int",
+ write: func(e *json.Encoder) {
+ e.WriteInt(-math.MaxInt64)
+ },
+ wantOut: `-9223372036854775807`,
+ wantOutIndent: `-9223372036854775807`,
+ },
+ {
+ desc: "uint",
+ write: func(e *json.Encoder) {
+ e.WriteUint(math.MaxUint64)
+ },
+ wantOut: `18446744073709551615`,
+ wantOutIndent: `18446744073709551615`,
+ },
+ {
+ desc: "empty object",
+ write: func(e *json.Encoder) {
+ e.StartObject()
+ e.EndObject()
+ },
+ wantOut: `{}`,
+ wantOutIndent: `{}`,
+ },
+ {
+ desc: "empty array",
+ write: func(e *json.Encoder) {
+ e.StartArray()
+ e.EndArray()
+ },
+ wantOut: `[]`,
+ wantOutIndent: `[]`,
+ },
+ {
+ desc: "object with one member",
+ write: func(e *json.Encoder) {
+ e.StartObject()
+ e.WriteName("hello")
+ e.WriteString("world")
+ e.EndObject()
+ },
+ wantOut: `{"hello":"world"}`,
+ wantOutIndent: `{
+ "hello": "world"
+}`,
+ },
+ {
+ desc: "array with one member",
+ write: func(e *json.Encoder) {
+ e.StartArray()
+ e.WriteNull()
+ e.EndArray()
+ },
+ wantOut: `[null]`,
+ wantOutIndent: `[
+ null
+]`,
+ },
+ {
+ desc: "simple object",
+ write: func(e *json.Encoder) {
+ e.StartObject()
+ {
+ e.WriteName("null")
+ e.WriteNull()
+ }
+ {
+ e.WriteName("bool")
+ e.WriteBool(true)
+ }
+ {
+ e.WriteName("string")
+ e.WriteString("hello")
+ }
+ {
+ e.WriteName("float")
+ e.WriteFloat(6.28318, 64)
+ }
+ {
+ e.WriteName("int")
+ e.WriteInt(42)
+ }
+ {
+ e.WriteName("uint")
+ e.WriteUint(47)
+ }
+ e.EndObject()
+ },
+ wantOut: `{"null":null,"bool":true,"string":"hello","float":6.28318,"int":42,"uint":47}`,
+ wantOutIndent: `{
+ "null": null,
+ "bool": true,
+ "string": "hello",
+ "float": 6.28318,
+ "int": 42,
+ "uint": 47
+}`,
+ },
+ {
+ desc: "simple array",
+ write: func(e *json.Encoder) {
+ e.StartArray()
+ {
+ e.WriteString("hello")
+ e.WriteFloat(6.28318, 32)
+ e.WriteInt(42)
+ e.WriteUint(47)
+ e.WriteBool(true)
+ e.WriteNull()
+ }
+ e.EndArray()
+ },
+ wantOut: `["hello",6.28318,42,47,true,null]`,
+ wantOutIndent: `[
+ "hello",
+ 6.28318,
+ 42,
+ 47,
+ true,
+ null
+]`,
+ },
+ {
+ desc: "fancy object",
+ write: func(e *json.Encoder) {
+ e.StartObject()
+ {
+ e.WriteName("object0")
+ e.StartObject()
+ e.EndObject()
+ }
+ {
+ e.WriteName("array0")
+ e.StartArray()
+ e.EndArray()
+ }
+ {
+ e.WriteName("object1")
+ e.StartObject()
+ {
+ e.WriteName("null")
+ e.WriteNull()
+ }
+ {
+ e.WriteName("object1-1")
+ e.StartObject()
+ {
+ e.WriteName("bool")
+ e.WriteBool(false)
+ }
+ {
+ e.WriteName("float")
+ e.WriteFloat(3.14159, 32)
+ }
+ e.EndObject()
+ }
+ e.EndObject()
+ }
+ {
+ e.WriteName("array1")
+ e.StartArray()
+ {
+ e.WriteNull()
+ e.StartObject()
+ e.EndObject()
+ e.StartObject()
+ {
+ e.WriteName("hello")
+ e.WriteString("world")
+ }
+ {
+ e.WriteName("hola")
+ e.WriteString("mundo")
+ }
+ e.EndObject()
+ e.StartArray()
+ {
+ e.WriteUint(1)
+ e.WriteUint(0)
+ e.WriteUint(1)
+ }
+ e.EndArray()
+ }
+ e.EndArray()
+ }
+ e.EndObject()
+ },
+ wantOutIndent: `{
+ "object0": {},
+ "array0": [],
+ "object1": {
+ "null": null,
+ "object1-1": {
+ "bool": false,
+ "float": 3.14159
+ }
+ },
+ "array1": [
+ null,
+ {},
+ {
+ "hello": "world",
+ "hola": "mundo"
+ },
+ [
+ 1,
+ 0,
+ 1
+ ]
+ ]
+}`,
+ },
+ {
+ desc: "string contains rune error",
+ write: func(e *json.Encoder) {
+ // WriteString returns non-fatal error for invalid UTF sequence, but
+ // should still output the written value. See TestWriteStringError
+ // below that checks for this.
+ e.StartObject()
+ e.WriteName("invalid rune")
+ e.WriteString("abc\xff")
+ e.EndObject()
+ },
+ wantOut: "{\"invalid rune\":\"abc\xff\"}",
+ }}
+
+ for _, tc := range tests {
+ t.Run(tc.desc, func(t *testing.T) {
+ if tc.wantOut != "" {
+ enc, err := json.NewEncoder("")
+ if err != nil {
+ t.Fatalf("NewEncoder() returned error: %v", err)
+ }
+ tc.write(enc)
+ got := string(enc.Bytes())
+ if got != tc.wantOut {
+ t.Errorf("%s:\n<got>:\n%v\n<want>\n%v\n", tc.desc, got, tc.wantOut)
+ }
+ }
+ if tc.wantOutIndent != "" {
+ enc, err := json.NewEncoder("\t")
+ if err != nil {
+ t.Fatalf("NewEncoder() returned error: %v", err)
+ }
+ tc.write(enc)
+ got, want := string(enc.Bytes()), tc.wantOutIndent
+ if got != want {
+ t.Errorf("%s(indent):\n<got>:\n%v\n<want>\n%v\n<diff -want +got>\n%v\n",
+ tc.desc, got, want, cmp.Diff(want, got, splitLines))
+ }
+ }
+ })
+ }
+}
+
+func TestWriteStringError(t *testing.T) {
+ tests := []string{"abc\xff"}
+
+ for _, in := range tests {
+ t.Run(in, func(t *testing.T) {
+ enc, err := json.NewEncoder("")
+ if err != nil {
+ t.Fatalf("NewEncoder() returned error: %v", err)
+ }
+ if err := enc.WriteString(in); err == nil {
+ t.Errorf("WriteString(%v): got nil error, want error", in)
+ }
+ })
+ }
+}
diff --git a/internal/encoding/json/json_test.go b/internal/encoding/json/json_test.go
deleted file mode 100644
index 3e96fa6..0000000
--- a/internal/encoding/json/json_test.go
+++ /dev/null
@@ -1,416 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package json
-
-import (
- "math"
- "strings"
- "testing"
- "unicode/utf8"
-
- "github.com/google/go-cmp/cmp"
- "github.com/google/go-cmp/cmp/cmpopts"
-)
-
-func Test(t *testing.T) {
- const space = " \n\r\t"
- var V = ValueOf
- type Arr = []Value
- type Obj = [][2]Value
-
- tests := []struct {
- in string
- wantVal Value
- wantOut string
- wantOutIndent string
- wantErr string
- }{{
- in: ``,
- wantErr: `unexpected EOF`,
- }, {
- in: space,
- wantErr: `unexpected EOF`,
- }, {
- in: space + `null` + space,
- wantVal: V(nil),
- wantOut: `null`,
- wantOutIndent: `null`,
- }, {
- in: space + `true` + space,
- wantVal: V(true),
- wantOut: `true`,
- wantOutIndent: `true`,
- }, {
- in: space + `false` + space,
- wantVal: V(false),
- wantOut: `false`,
- wantOutIndent: `false`,
- }, {
- in: space + `0` + space,
- wantVal: V(0.0),
- wantOut: `0`,
- wantOutIndent: `0`,
- }, {
- in: space + `"hello"` + space,
- wantVal: V("hello"),
- wantOut: `"hello"`,
- wantOutIndent: `"hello"`,
- }, {
- in: space + `[]` + space,
- wantVal: V(Arr{}),
- wantOut: `[]`,
- wantOutIndent: `[]`,
- }, {
- in: space + `{}` + space,
- wantVal: V(Obj{}),
- wantOut: `{}`,
- wantOutIndent: `{}`,
- }, {
- in: `null#invalid`,
- wantErr: `8 bytes of unconsumed input`,
- }, {
- in: `0#invalid`,
- wantErr: `8 bytes of unconsumed input`,
- }, {
- in: `"hello"#invalid`,
- wantErr: `8 bytes of unconsumed input`,
- }, {
- in: `[]#invalid`,
- wantErr: `8 bytes of unconsumed input`,
- }, {
- in: `{}#invalid`,
- wantErr: `8 bytes of unconsumed input`,
- }, {
- in: `[truee,true]`,
- wantErr: `invalid "truee" as literal`,
- }, {
- in: `[falsee,false]`,
- wantErr: `invalid "falsee" as literal`,
- }, {
- in: `[`,
- wantErr: `unexpected EOF`,
- }, {
- in: `[{}]`,
- wantVal: V(Arr{V(Obj{})}),
- wantOut: "[{}]",
- wantOutIndent: "[\n\t{}\n]",
- }, {
- in: `[{]}`,
- wantErr: `invalid character ']' at start of string`,
- }, {
- in: `[,]`,
- wantErr: `invalid "," as value`,
- }, {
- in: `{,}`,
- wantErr: `invalid character ',' at start of string`,
- }, {
- in: `{"key""val"}`,
- wantErr: `invalid character '"', expected ':' in object`,
- }, {
- in: `["elem0""elem1"]`,
- wantErr: `invalid character '"', expected ']' at end of array`,
- }, {
- in: `{"hello"`,
- wantErr: `unexpected EOF`,
- }, {
- in: `{"hello"}`,
- wantErr: `invalid character '}', expected ':' in object`,
- }, {
- in: `{"hello":`,
- wantErr: `unexpected EOF`,
- }, {
- in: `{"hello":}`,
- wantErr: `invalid "}" as value`,
- }, {
- in: `{"hello":"goodbye"`,
- wantErr: `unexpected EOF`,
- }, {
- in: `{"hello":"goodbye"]`,
- wantErr: `invalid character ']', expected '}' at end of object`,
- }, {
- in: `{"hello":"goodbye"}`,
- wantVal: V(Obj{{V("hello"), V("goodbye")}}),
- wantOut: `{"hello":"goodbye"}`,
- wantOutIndent: "{\n\t\"hello\": \"goodbye\"\n}",
- }, {
- in: `{"hello":"goodbye",}`,
- wantErr: `invalid character '}' at start of string`,
- }, {
- in: `{"k":"v1","k":"v2"}`,
- wantVal: V(Obj{
- {V("k"), V("v1")}, {V("k"), V("v2")},
- }),
- wantOut: `{"k":"v1","k":"v2"}`,
- wantOutIndent: "{\n\t\"k\": \"v1\",\n\t\"k\": \"v2\"\n}",
- }, {
- in: `{"k":{"k":{"k":"v"}}}`,
- wantVal: V(Obj{
- {V("k"), V(Obj{
- {V("k"), V(Obj{
- {V("k"), V("v")},
- })},
- })},
- }),
- wantOut: `{"k":{"k":{"k":"v"}}}`,
- wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v\"\n\t\t}\n\t}\n}",
- }, {
- in: `{"k":{"k":{"k":"v1","k":"v2"}}}`,
- wantVal: V(Obj{
- {V("k"), V(Obj{
- {V("k"), V(Obj{
- {V("k"), V("v1")},
- {V("k"), V("v2")},
- })},
- })},
- }),
- wantOut: `{"k":{"k":{"k":"v1","k":"v2"}}}`,
- wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v1\",\n\t\t\t\"k\": \"v2\"\n\t\t}\n\t}\n}",
- }, {
- in: " x",
- wantErr: `syntax error (line 1:3)`,
- }, {
- in: `["💩"x`,
- wantErr: `syntax error (line 1:5)`,
- }, {
- in: "\n\n[\"🔥🔥🔥\"x",
- wantErr: `syntax error (line 3:7)`,
- }, {
- in: `["👍🏻👍🏿"x`,
- wantErr: `syntax error (line 1:8)`, // multi-rune emojis; could be column:6
- }, {
- in: "\"\x00\"",
- wantErr: `invalid character '\x00' in string`,
- }, {
- in: "\"\xff\"",
- wantErr: `invalid UTF-8 detected`,
- wantVal: V(string("\xff")),
- }, {
- in: `"` + string(utf8.RuneError) + `"`,
- wantVal: V(string(utf8.RuneError)),
- wantOut: `"` + string(utf8.RuneError) + `"`,
- }, {
- in: `"\uFFFD"`,
- wantVal: V(string(utf8.RuneError)),
- wantOut: `"` + string(utf8.RuneError) + `"`,
- }, {
- in: `"\x"`,
- wantErr: `invalid escape code "\\x" in string`,
- }, {
- in: `"\uXXXX"`,
- wantErr: `invalid escape code "\\uXXXX" in string`,
- }, {
- in: `"\uDEAD"`, // unmatched surrogate pair
- wantErr: `unexpected EOF`,
- }, {
- in: `"\uDEAD\uBEEF"`, // invalid surrogate half
- wantErr: `invalid escape code "\\uBEEF" in string`,
- }, {
- in: `"\uD800\udead"`, // valid surrogate pair
- wantVal: V("𐊭"),
- wantOut: `"𐊭"`,
- }, {
- in: `"\u0000\"\\\/\b\f\n\r\t"`,
- wantVal: V("\u0000\"\\/\b\f\n\r\t"),
- wantOut: `"\u0000\"\\/\b\f\n\r\t"`,
- }, {
- in: `-`,
- wantErr: `invalid "-" as number`,
- }, {
- in: `-0`,
- wantVal: V(math.Copysign(0, -1)),
- wantOut: `-0`,
- }, {
- in: `+0`,
- wantErr: `invalid "+0" as value`,
- }, {
- in: `-+`,
- wantErr: `invalid "-+" as number`,
- }, {
- in: `0.`,
- wantErr: `invalid "0." as number`,
- }, {
- in: `.1`,
- wantErr: `invalid ".1" as value`,
- }, {
- in: `0.e1`,
- wantErr: `invalid "0.e1" as number`,
- }, {
- in: `0.0`,
- wantVal: V(0.0),
- wantOut: "0",
- }, {
- in: `01`,
- wantErr: `invalid "01" as number`,
- }, {
- in: `0e`,
- wantErr: `invalid "0e" as number`,
- }, {
- in: `0e0`,
- wantVal: V(0.0),
- wantOut: "0",
- }, {
- in: `0E0`,
- wantVal: V(0.0),
- wantOut: "0",
- }, {
- in: `0Ee`,
- wantErr: `invalid "0Ee" as number`,
- }, {
- in: `-1.0E+1`,
- wantVal: V(-10.0),
- wantOut: "-10",
- }, {
- in: `
- {
- "firstName" : "John",
- "lastName" : "Smith" ,
- "isAlive" : true,
- "age" : 27,
- "address" : {
- "streetAddress" : "21 2nd Street" ,
- "city" : "New York" ,
- "state" : "NY" ,
- "postalCode" : "10021-3100"
- },
- "phoneNumbers" : [
- {
- "type" : "home" ,
- "number" : "212 555-1234"
- } ,
- {
- "type" : "office" ,
- "number" : "646 555-4567"
- } ,
- {
- "type" : "mobile" ,
- "number" : "123 456-7890"
- }
- ],
- "children" : [] ,
- "spouse" : null
- }
- `,
- wantVal: V(Obj{
- {V("firstName"), V("John")},
- {V("lastName"), V("Smith")},
- {V("isAlive"), V(true)},
- {V("age"), V(27.0)},
- {V("address"), V(Obj{
- {V("streetAddress"), V("21 2nd Street")},
- {V("city"), V("New York")},
- {V("state"), V("NY")},
- {V("postalCode"), V("10021-3100")},
- })},
- {V("phoneNumbers"), V(Arr{
- V(Obj{
- {V("type"), V("home")},
- {V("number"), V("212 555-1234")},
- }),
- V(Obj{
- {V("type"), V("office")},
- {V("number"), V("646 555-4567")},
- }),
- V(Obj{
- {V("type"), V("mobile")},
- {V("number"), V("123 456-7890")},
- }),
- })},
- {V("children"), V(Arr{})},
- {V("spouse"), V(nil)},
- }),
- wantOut: `{"firstName":"John","lastName":"Smith","isAlive":true,"age":27,"address":{"streetAddress":"21 2nd Street","city":"New York","state":"NY","postalCode":"10021-3100"},"phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"},{"type":"mobile","number":"123 456-7890"}],"children":[],"spouse":null}`,
- wantOutIndent: `{
- "firstName": "John",
- "lastName": "Smith",
- "isAlive": true,
- "age": 27,
- "address": {
- "streetAddress": "21 2nd Street",
- "city": "New York",
- "state": "NY",
- "postalCode": "10021-3100"
- },
- "phoneNumbers": [
- {
- "type": "home",
- "number": "212 555-1234"
- },
- {
- "type": "office",
- "number": "646 555-4567"
- },
- {
- "type": "mobile",
- "number": "123 456-7890"
- }
- ],
- "children": [],
- "spouse": null
-}`,
- }}
-
- opts := cmp.Options{
- cmpopts.EquateEmpty(),
- cmp.Transformer("", func(v Value) interface{} {
- switch v.typ {
- case 0:
- return nil // special case so Value{} == Value{}
- case Null:
- return nil
- case Bool:
- return v.Bool()
- case Number:
- return v.Number()
- case String:
- return v.String()
- case Array:
- return v.Array()
- case Object:
- return v.Object()
- default:
- panic("invalid type")
- }
- }),
- }
- for _, tt := range tests {
- t.Run("", func(t *testing.T) {
- if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
- gotVal, err := Unmarshal([]byte(tt.in))
- if err == nil {
- if tt.wantErr != "" {
- t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
- }
- } else {
- if tt.wantErr == "" {
- t.Errorf("Unmarshal(): got %v, want nil error", err)
- } else if !strings.Contains(err.Error(), tt.wantErr) {
- t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
- }
- }
- if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
- t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
- }
- }
- if tt.wantOut != "" {
- gotOut, err := Marshal(tt.wantVal, "")
- if err != nil {
- t.Errorf("Marshal(): got %v, want nil error", err)
- }
- if string(gotOut) != tt.wantOut {
- t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
- }
- }
- if tt.wantOutIndent != "" {
- gotOut, err := Marshal(tt.wantVal, "\t")
- if err != nil {
- t.Errorf("Marshal(Indent): got %v, want nil error", err)
- }
- if string(gotOut) != tt.wantOutIndent {
- t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
- }
- }
- })
- }
-}
diff --git a/internal/encoding/json/number.go b/internal/encoding/json/number.go
index ec99cbd..6b3ed86 100644
--- a/internal/encoding/json/number.go
+++ b/internal/encoding/json/number.go
@@ -5,45 +5,32 @@
package json
import (
- "io"
+ "bytes"
"math"
- "regexp"
"strconv"
-
- "github.com/golang/protobuf/v2/internal/errors"
)
-// marshalNumber encodes v as a Number.
-func (p *encoder) marshalNumber(v Value) error {
- var err error
- p.out, err = appendNumber(p.out, v)
- return err
-}
-func appendNumber(out []byte, v Value) ([]byte, error) {
- if v.Type() != Number {
- return nil, errors.New("invalid type %v, expected number", v.Type())
- }
- if len(v.raw) > 0 {
- return append(out, v.raw...), nil
- }
- n := v.Number()
- if math.IsInf(n, 0) || math.IsNaN(n) {
- return nil, errors.New("invalid number value: %v", n)
+// appendFloat formats given float in bitSize, and appends to the given []byte.
+func appendFloat(out []byte, n float64, bitSize int) []byte {
+ switch {
+ case math.IsNaN(n):
+ return append(out, `"NaN"`...)
+ case math.IsInf(n, +1):
+ return append(out, `"Infinity"`...)
+ case math.IsInf(n, -1):
+ return append(out, `"-Infinity"`...)
}
// JSON number formatting logic based on encoding/json.
// See floatEncoder.encode for reference.
- bits := 64
- if float64(float32(n)) == n {
- bits = 32
- }
fmt := byte('f')
if abs := math.Abs(n); abs != 0 {
- if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
+ if bitSize == 64 && (abs < 1e-6 || abs >= 1e21) ||
+ bitSize == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
fmt = 'e'
}
}
- out = strconv.AppendFloat(out, n, fmt, -1, bits)
+ out = strconv.AppendFloat(out, n, fmt, -1, bitSize)
if fmt == 'e' {
n := len(out)
if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
@@ -51,29 +38,172 @@
out = out[:n-1]
}
}
- return out, nil
+ return out
}
-// Exact expression to match a JSON floating-point number.
-// JSON's grammar for floats is more restrictive than Go's grammar.
-var floatRegexp = regexp.MustCompile("^-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?")
-
-// unmarshalNumber decodes a Number from the input.
-func (p *decoder) unmarshalNumber() (Value, error) {
- v, n, err := consumeNumber(p.in)
- p.consume(n)
- return v, err
+// numberParts is the result of parsing out a valid JSON number. It contains
+// the parts of a number. The parts are used for integer conversion.
+type numberParts struct {
+ neg bool
+ intp []byte
+ frac []byte
+ exp []byte
}
-func consumeNumber(in []byte) (Value, int, error) {
- if len(in) == 0 {
- return Value{}, 0, io.ErrUnexpectedEOF
+
+// parseNumber returns a numberParts instance if it is able to read a JSON
+// number from the given []byte. It also returns the number of bytes read.
+// Parsing logic follows the definition in
+// https://tools.ietf.org/html/rfc7159#section-6, and is based off
+// encoding/json.isValidNumber function.
+func parseNumber(input []byte) (*numberParts, int) {
+ var n int
+ var neg bool
+ var intp []byte
+ var frac []byte
+ var exp []byte
+
+ s := input
+ if len(s) == 0 {
+ return nil, 0
}
- if n := matchWithDelim(floatRegexp, in); n > 0 {
- v, err := strconv.ParseFloat(string(in[:n]), 64)
- if err != nil {
- return Value{}, 0, err
+
+ // Optional -
+ if s[0] == '-' {
+ neg = true
+ s = s[1:]
+ n++
+ if len(s) == 0 {
+ return nil, 0
}
- return rawValueOf(v, in[:n:n]), n, nil
}
- return Value{}, 0, newSyntaxError("invalid %q as number", errRegexp.Find(in))
+
+ // Digits
+ switch {
+ case s[0] == '0':
+ // Skip first 0 and no need to store.
+ s = s[1:]
+ n++
+
+ case '1' <= s[0] && s[0] <= '9':
+ intp = append(intp, s[0])
+ s = s[1:]
+ n++
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+ intp = append(intp, s[0])
+ s = s[1:]
+ n++
+ }
+
+ default:
+ return nil, 0
+ }
+
+ // . followed by 1 or more digits.
+ if len(s) >= 2 && s[0] == '.' && '0' <= s[1] && s[1] <= '9' {
+ frac = append(frac, s[1])
+ s = s[2:]
+ n += 2
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+ frac = append(frac, s[0])
+ s = s[1:]
+ n++
+ }
+ }
+
+ // e or E followed by an optional - or + and
+ // 1 or more digits.
+ if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
+ s = s[1:]
+ n++
+ if s[0] == '+' || s[0] == '-' {
+ exp = append(exp, s[0])
+ s = s[1:]
+ n++
+ if len(s) == 0 {
+ return nil, 0
+ }
+ }
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+ exp = append(exp, s[0])
+ s = s[1:]
+ n++
+ }
+ }
+
+ // Check that next byte is a delimiter or it is at the end.
+ if n < len(input) && isNotDelim(input[n]) {
+ return nil, 0
+ }
+
+ return &numberParts{
+ neg: neg,
+ intp: intp,
+ frac: bytes.TrimRight(frac, "0"), // Remove unnecessary 0s to the right.
+ exp: exp,
+ }, n
+}
+
+// normalizeToIntString returns an integer string in normal form without the
+// E-notation for given numberParts. It will return false if it is not an
+// integer or if the exponent exceeds than max/min int value.
+func normalizeToIntString(n *numberParts) (string, bool) {
+ num := n.intp
+ intpSize := len(num)
+ fracSize := len(n.frac)
+
+ if intpSize == 0 && fracSize == 0 {
+ return "0", true
+ }
+
+ var exp int
+ if len(n.exp) > 0 {
+ i, err := strconv.ParseInt(string(n.exp), 10, 32)
+ if err != nil {
+ return "", false
+ }
+ exp = int(i)
+ }
+
+ if exp >= 0 {
+ // For positive E, shift fraction digits into integer part and also pad
+ // with zeroes as needed.
+
+ // If there are more digits in fraction than the E value, then number is
+ // not an integer.
+ if fracSize > exp {
+ return "", false
+ }
+
+ num = append(num, n.frac...)
+ for i := 0; i < exp-fracSize; i++ {
+ num = append(num, '0')
+ }
+
+ } else {
+ // For negative E, shift digits in integer part out.
+
+ // If there are any fractions to begin with, then number is not an
+ // integer.
+ if fracSize > 0 {
+ return "", false
+ }
+
+ index := intpSize + exp
+ if index < 0 {
+ return "", false
+ }
+ // If any of the digits being shifted out is non-zero, then number is
+ // not an integer.
+ for i := index; i < intpSize; i++ {
+ if num[i] != '0' {
+ return "", false
+ }
+ }
+ num = num[:index]
+ }
+
+ if n.neg {
+ return "-" + string(num), true
+ }
+ return string(num), true
}
diff --git a/internal/encoding/json/string.go b/internal/encoding/json/string.go
index 813d8fa..fbfb09f 100644
--- a/internal/encoding/json/string.go
+++ b/internal/encoding/json/string.go
@@ -15,20 +15,7 @@
"github.com/golang/protobuf/v2/internal/errors"
)
-func (p *encoder) marshalString(v Value) error {
- var err error
- p.out, err = appendString(p.out, v)
- return err
-}
-func appendString(out []byte, v Value) ([]byte, error) {
- if v.Type() != String {
- return nil, errors.New("invalid type %v, expected string", v.Type())
- }
- if len(v.raw) > 0 {
- return append(out, v.raw...), nil
- }
- in := v.String()
-
+func appendString(out []byte, in string) ([]byte, error) {
var nerr errors.NonFatal
out = append(out, '"')
i := indexNeedEscape(in)
@@ -68,19 +55,14 @@
return out, nerr.E
}
-func (p *decoder) unmarshalString() (Value, error) {
- v, n, err := consumeString(p.in)
- p.consume(n)
- return v, err
-}
-func consumeString(in []byte) (Value, int, error) {
+func (d *Decoder) parseString(in []byte) (string, int, error) {
var nerr errors.NonFatal
in0 := in
if len(in) == 0 {
- return Value{}, 0, io.ErrUnexpectedEOF
+ return "", 0, io.ErrUnexpectedEOF
}
if in[0] != '"' {
- return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
+ return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
i := indexNeedEscape(string(in))
@@ -91,15 +73,14 @@
nerr.AppendInvalidUTF8("")
in, out = in[1:], append(out, in[0]) // preserve invalid byte
case r < ' ':
- return Value{}, 0, newSyntaxError("invalid character %q in string", r)
+ return "", 0, d.newSyntaxError("invalid character %q in string", r)
case r == '"':
in = in[1:]
n := len(in0) - len(in)
- v := rawValueOf(string(out), in0[:n:n])
- return v, n, nerr.E
+ return string(out), n, nerr.E
case r == '\\':
if len(in) < 2 {
- return Value{}, 0, io.ErrUnexpectedEOF
+ return "", 0, io.ErrUnexpectedEOF
}
switch r := in[1]; r {
case '"', '\\', '/':
@@ -116,36 +97,37 @@
in, out = in[2:], append(out, '\t')
case 'u':
if len(in) < 6 {
- return Value{}, 0, io.ErrUnexpectedEOF
+ return "", 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
if err != nil {
- return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
+ return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
}
in = in[6:]
r := rune(v)
if utf16.IsSurrogate(r) {
if len(in) < 6 {
- return Value{}, 0, io.ErrUnexpectedEOF
+ return "", 0, io.ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
r = utf16.DecodeRune(r, rune(v))
- if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
- return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
+ if in[0] != '\\' || in[1] != 'u' ||
+ r == unicode.ReplacementChar || err != nil {
+ return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:6])
}
in = in[6:]
}
out = append(out, string(r)...)
default:
- return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
+ return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscape(string(in[n:]))
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
- return Value{}, 0, io.ErrUnexpectedEOF
+ return "", 0, io.ErrUnexpectedEOF
}
// indexNeedEscape returns the index of the next character that needs escaping.
diff --git a/internal/encoding/json/types.go b/internal/encoding/json/types.go
new file mode 100644
index 0000000..28901e8
--- /dev/null
+++ b/internal/encoding/json/types.go
@@ -0,0 +1,51 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+// Type represents a type expressible in the JSON format.
+type Type uint
+
+const (
+ _ Type = (1 << iota) / 2
+ EOF
+ Null
+ Bool
+ Number
+ String
+ StartObject
+ EndObject
+ Name
+ StartArray
+ EndArray
+
+ // comma is only for parsing in between values and should not be exposed.
+ comma
+)
+
+func (t Type) String() string {
+ switch t {
+ case EOF:
+ return "eof"
+ case Null:
+ return "null"
+ case Bool:
+ return "bool"
+ case Number:
+ return "number"
+ case String:
+ return "string"
+ case StartObject:
+ return "{"
+ case EndObject:
+ return "}"
+ case Name:
+ return "name"
+ case StartArray:
+ return "["
+ case EndArray:
+ return "]"
+ }
+ return "<invalid>"
+}
diff --git a/internal/encoding/json/value.go b/internal/encoding/json/value.go
deleted file mode 100644
index aeff222..0000000
--- a/internal/encoding/json/value.go
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package json implements the JSON format.
-// This package has no semantic understanding for protocol buffers and is only
-// a parser and composer for the format.
-//
-// This follows RFC 7159, with some notable implementation specifics:
-// * numbers that are out of range result in a decoding error
-// * duplicate keys in objects are not rejected
-//
-// Reasons why the standard encoding/json package is not suitable:
-// * information about duplicate keys is lost
-// * invalid UTF-8 is silently coerced into utf8.RuneError
-package json
-
-import (
- "fmt"
- "strings"
-)
-
-// Type represents a type expressible in the JSON format.
-type Type uint8
-
-const (
- _ Type = iota
- // Null is the null literal (i.e., "null").
- Null
- // Bool is a boolean (i.e., "true" or "false").
- Bool
- // Number is a floating-point number (e.g., "1.234" or "1e100").
- Number
- // String is an escaped string (e.g., `"the quick brown fox"`).
- String
- // Array is an ordered list of values (e.g., `[0, "one", true]`).
- Array
- // Object is an ordered map of values (e.g., `{"key": null}`).
- Object
-)
-
-func (t Type) String() string {
- switch t {
- case Null:
- return "null"
- case Bool:
- return "bool"
- case Number:
- return "number"
- case String:
- return "string"
- case Array:
- return "array"
- case Object:
- return "object"
- default:
- return "<invalid>"
- }
-}
-
-// Value contains a value of a given Type.
-type Value struct {
- typ Type
- raw []byte // raw bytes of the serialized data
- str string // only for String
- num float64 // only for Bool or Number
- arr []Value // only for Array
- obj [][2]Value // only for Object
-}
-
-// ValueOf returns a Value for a given Go value:
-// nil => Null
-// bool => Bool
-// int32, int64 => Number
-// uint32, uint64 => Number
-// float32, float64 => Number
-// string, []byte => String
-// []Value => Array
-// [][2]Value => Object
-//
-// ValueOf panics if the Go type is not one of the above.
-func ValueOf(v interface{}) Value {
- switch v := v.(type) {
- case nil:
- return Value{typ: Null}
- case bool:
- if v {
- return Value{typ: Bool, num: 1}
- } else {
- return Value{typ: Bool, num: 0}
- }
- case int32:
- return Value{typ: Number, num: float64(v)}
- case int64:
- return Value{typ: Number, num: float64(v)} // possible loss of precision
- case uint32:
- return Value{typ: Number, num: float64(v)}
- case uint64:
- return Value{typ: Number, num: float64(v)} // possible loss of precision
- case float32:
- return Value{typ: Number, num: float64(v)}
- case float64:
- return Value{typ: Number, num: float64(v)}
- case string:
- return Value{typ: String, str: string(v)}
- case []byte:
- return Value{typ: String, str: string(v)}
- case []Value:
- return Value{typ: Array, arr: v}
- case [][2]Value:
- return Value{typ: Object, obj: v}
- default:
- panic(fmt.Sprintf("invalid type %T", v))
- }
-}
-func rawValueOf(v interface{}, raw []byte) Value {
- v2 := ValueOf(v)
- v2.raw = raw
- return v2
-}
-
-// Type is the type of the value.
-func (v Value) Type() Type {
- return v.typ
-}
-
-// Bool returns v as a bool and panics if it is not a Bool.
-func (v Value) Bool() bool {
- if v.typ != Bool {
- panic("value is not a boolean")
- }
- return v.num != 0
-}
-
-// Number returns v as a float64 and panics if it is not a Number.
-func (v Value) Number() float64 {
- if v.typ != Number {
- panic("value is not a number")
- }
- return v.num
-}
-
-// String returns v as a string if the Type is String.
-// Otherwise, this returns a formatted string of v for debugging purposes.
-//
-// Since JSON strings must be UTF-8, the marshaler and unmarshaler will verify
-// for UTF-8 correctness.
-func (v Value) String() string {
- if v.typ != String {
- return v.stringValue()
- }
- return v.str
-}
-func (v Value) stringValue() string {
- switch v.typ {
- case Null, Bool, Number:
- return string(v.Raw())
- case Array:
- var ss []string
- for _, v := range v.Array() {
- ss = append(ss, v.String())
- }
- return "[" + strings.Join(ss, ",") + "]"
- case Object:
- var ss []string
- for _, v := range v.Object() {
- ss = append(ss, v[0].String()+":"+v[1].String())
- }
- return "{" + strings.Join(ss, ",") + "}"
- default:
- return "<invalid>"
- }
-}
-
-// Array returns the elements of v and panics if the Type is not Array.
-// Mutations on the return value may not be observable from the Raw method.
-func (v Value) Array() []Value {
- if v.typ != Array {
- panic("value is not an array")
- }
- return v.arr
-}
-
-// Object returns the items of v and panics if the Type is not Object.
-// The [2]Value represents a key (of type String) and value pair.
-//
-// Mutations on the return value may not be observable from the Raw method.
-func (v Value) Object() [][2]Value {
- if v.typ != Object {
- panic("value is not an object")
- }
- return v.obj
-}
-
-// Raw returns the raw representation of the value.
-// The returned value may alias the input given to Unmarshal.
-func (v Value) Raw() []byte {
- if len(v.raw) > 0 {
- return v.raw
- }
- p := encoder{}
- if err := p.marshalValue(v); !p.nerr.Merge(err) {
- return []byte("<invalid>")
- }
- return p.out
-}