internal/encoding/json: initial commit of JSON parser/serializer
Package json provides a parser and serializer for the JSON format.
This focuses on the grammar of the format and is agnostic towards specific
semantics of protobuf types.
High-level API:
func Marshal(v Value, indent string) ([]byte, error)
func Unmarshal(b []byte) (Value, error)
type Type uint8
const Null Type ...
type Value struct{ ... }
func ValueOf(v interface{}) Value
func (v Value) Type() Type
func (v Value) Bool() bool
func (v Value) Number() float64
func (v Value) String() string
func (v Value) Array() []Value
func (v Value) Object() [][2]Value
func (v Value) Raw() []byte
Change-Id: I26422f6b3881ef1a11b8aa95160645b1384b27b8
Reviewed-on: https://go-review.googlesource.com/127824
Reviewed-by: Herbie Ong <herbie@google.com>
diff --git a/go.mod b/go.mod
index 9aa5cf4..d94d903 100644
--- a/go.mod
+++ b/go.mod
@@ -1 +1,3 @@
module google.golang.org/proto
+
+require github.com/google/go-cmp v0.2.0
diff --git a/go.sum b/go.sum
index e69de29..5f4f636 100644
--- a/go.sum
+++ b/go.sum
@@ -0,0 +1,2 @@
+github.com/google/go-cmp v0.2.0 h1:+dTQ8DZQJz0Mb/HjFlkptS1FeQ4cWSnN941F8aEG4SQ=
+github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
diff --git a/internal/encoding/json/decode.go b/internal/encoding/json/decode.go
new file mode 100644
index 0000000..4f7ddb6
--- /dev/null
+++ b/internal/encoding/json/decode.go
@@ -0,0 +1,194 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "bytes"
+ "io"
+ "regexp"
+ "unicode/utf8"
+
+ "google.golang.org/proto/internal/errors"
+)
+
+type syntaxError struct{ error }
+
+func newSyntaxError(f string, x ...interface{}) error {
+ return syntaxError{errors.New(f, x...)}
+}
+
+// Unmarshal parses b as the JSON format.
+// It returns a Value, which represents the input as an AST.
+func Unmarshal(b []byte) (Value, error) {
+ p := decoder{in: b}
+ p.consume(0) // trim leading spaces
+ v, err := p.unmarshalValue()
+ if !p.nerr.Merge(err) {
+ if e, ok := err.(syntaxError); ok {
+ b = b[:len(b)-len(p.in)] // consumed input
+ line := bytes.Count(b, []byte("\n")) + 1
+ if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
+ b = b[i+1:]
+ }
+ column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
+ err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
+ }
+ return Value{}, err
+ }
+ if len(p.in) > 0 {
+ return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
+ }
+ return v, p.nerr.E
+}
+
+type decoder struct {
+ nerr errors.NonFatal
+ in []byte
+}
+
+var literalRegexp = regexp.MustCompile("^(null|true|false)")
+
+func (p *decoder) unmarshalValue() (Value, error) {
+ if len(p.in) == 0 {
+ return Value{}, io.ErrUnexpectedEOF
+ }
+ switch p.in[0] {
+ case 'n', 't', 'f':
+ if n := matchWithDelim(literalRegexp, p.in); n > 0 {
+ var v Value
+ switch p.in[0] {
+ case 'n':
+ v = rawValueOf(nil, p.in[:n:n])
+ case 't':
+ v = rawValueOf(true, p.in[:n:n])
+ case 'f':
+ v = rawValueOf(false, p.in[:n:n])
+ }
+ p.consume(n)
+ return v, nil
+ }
+ return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
+ case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+ return p.unmarshalNumber()
+ case '"':
+ return p.unmarshalString()
+ case '[':
+ return p.unmarshalArray()
+ case '{':
+ return p.unmarshalObject()
+ default:
+ return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
+ }
+}
+
+func (p *decoder) unmarshalArray() (Value, error) {
+ b := p.in
+ var elems []Value
+ if err := p.consumeChar('[', "at start of array"); err != nil {
+ return Value{}, err
+ }
+ if len(p.in) > 0 && p.in[0] != ']' {
+ for len(p.in) > 0 {
+ v, err := p.unmarshalValue()
+ if !p.nerr.Merge(err) {
+ return Value{}, err
+ }
+ elems = append(elems, v)
+ if !p.tryConsumeChar(',') {
+ break
+ }
+ }
+ }
+ if err := p.consumeChar(']', "at end of array"); err != nil {
+ return Value{}, err
+ }
+ b = b[:len(b)-len(p.in)]
+ return rawValueOf(elems, b[:len(b):len(b)]), nil
+}
+
+func (p *decoder) unmarshalObject() (Value, error) {
+ b := p.in
+ var items [][2]Value
+ if err := p.consumeChar('{', "at start of object"); err != nil {
+ return Value{}, err
+ }
+ if len(p.in) > 0 && p.in[0] != '}' {
+ for len(p.in) > 0 {
+ k, err := p.unmarshalString()
+ if !p.nerr.Merge(err) {
+ return Value{}, err
+ }
+ if err := p.consumeChar(':', "in object"); err != nil {
+ return Value{}, err
+ }
+ v, err := p.unmarshalValue()
+ if !p.nerr.Merge(err) {
+ return Value{}, err
+ }
+ items = append(items, [2]Value{k, v})
+ if !p.tryConsumeChar(',') {
+ break
+ }
+ }
+ }
+ if err := p.consumeChar('}', "at end of object"); err != nil {
+ return Value{}, err
+ }
+ b = b[:len(b)-len(p.in)]
+ return rawValueOf(items, b[:len(b):len(b)]), nil
+}
+
+func (p *decoder) consumeChar(c byte, msg string) error {
+ if p.tryConsumeChar(c) {
+ return nil
+ }
+ if len(p.in) == 0 {
+ return io.ErrUnexpectedEOF
+ }
+ return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
+}
+
+func (p *decoder) tryConsumeChar(c byte) bool {
+ if len(p.in) > 0 && p.in[0] == c {
+ p.consume(1)
+ return true
+ }
+ return false
+}
+
+// consume consumes n bytes of input and any subsequent whitespace.
+func (p *decoder) consume(n int) {
+ p.in = p.in[n:]
+ for len(p.in) > 0 {
+ switch p.in[0] {
+ case ' ', '\n', '\r', '\t':
+ p.in = p.in[1:]
+ default:
+ return
+ }
+ }
+}
+
+// Any sequence that looks like a non-delimiter (for error reporting).
+var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
+
+// matchWithDelim matches r with the input b and verifies that the match
+// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
+// As a special case, EOF is considered a delimiter.
+func matchWithDelim(r *regexp.Regexp, b []byte) int {
+ n := len(r.Find(b))
+ if n < len(b) {
+ // Check that that the next character is a delimiter.
+ c := b[n]
+ notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
+ ('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z') ||
+ ('0' <= c && c <= '9'))
+ if notDelim {
+ return 0
+ }
+ }
+ return n
+}
diff --git a/internal/encoding/json/encode.go b/internal/encoding/json/encode.go
new file mode 100644
index 0000000..aea7753
--- /dev/null
+++ b/internal/encoding/json/encode.go
@@ -0,0 +1,128 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "strings"
+
+ "google.golang.org/proto/internal/errors"
+)
+
+// Marshal serializes v as the JSON format.
+//
+// If indent is a non-empty string, it causes every entry for an Array or Object
+// to be preceded by the indent and trailed by a newline.
+func Marshal(v Value, indent string) ([]byte, error) {
+ p := encoder{}
+ if len(indent) > 0 {
+ if strings.Trim(indent, " \t") != "" {
+ return nil, errors.New("indent may only be composed of space and tab characters")
+ }
+ p.indent = indent
+ p.newline = "\n"
+ }
+ err := p.marshalValue(v)
+ if !p.nerr.Merge(err) {
+ return nil, err
+ }
+ return p.out, p.nerr.E
+}
+
+type encoder struct {
+ nerr errors.NonFatal
+ out []byte
+
+ indent string
+ indents []byte
+ newline string // set to "\n" if len(indent) > 0
+}
+
+func (p *encoder) marshalValue(v Value) error {
+ switch v.Type() {
+ case Null:
+ p.out = append(p.out, "null"...)
+ return nil
+ case Bool:
+ if v.Bool() {
+ p.out = append(p.out, "true"...)
+ } else {
+ p.out = append(p.out, "false"...)
+ }
+ return nil
+ case Number:
+ return p.marshalNumber(v)
+ case String:
+ return p.marshalString(v)
+ case Array:
+ return p.marshalArray(v)
+ case Object:
+ return p.marshalObject(v)
+ default:
+ return errors.New("invalid type %v to encode value", v.Type())
+ }
+}
+
+func (p *encoder) marshalArray(v Value) error {
+ if v.Type() != Array {
+ return errors.New("invalid type %v, expected array", v.Type())
+ }
+ elems := v.Array()
+ p.out = append(p.out, '[')
+ p.indents = append(p.indents, p.indent...)
+ if len(elems) > 0 {
+ p.out = append(p.out, p.newline...)
+ }
+ for i, elem := range elems {
+ p.out = append(p.out, p.indents...)
+ if err := p.marshalValue(elem); !p.nerr.Merge(err) {
+ return err
+ }
+ if i < len(elems)-1 {
+ p.out = append(p.out, ',')
+ }
+ p.out = append(p.out, p.newline...)
+ }
+ p.indents = p.indents[:len(p.indents)-len(p.indent)]
+ if len(elems) > 0 {
+ p.out = append(p.out, p.indents...)
+ }
+ p.out = append(p.out, ']')
+ return nil
+}
+
+func (p *encoder) marshalObject(v Value) error {
+ if v.Type() != Object {
+ return errors.New("invalid type %v, expected object", v.Type())
+ }
+ items := v.Object()
+ p.out = append(p.out, '{')
+ p.indents = append(p.indents, p.indent...)
+ if len(items) > 0 {
+ p.out = append(p.out, p.newline...)
+ }
+ for i, item := range items {
+ p.out = append(p.out, p.indents...)
+ if err := p.marshalString(item[0]); !p.nerr.Merge(err) {
+ return err
+ }
+ p.out = append(p.out, ':')
+ if len(p.indent) > 0 {
+ p.out = append(p.out, ' ')
+ }
+ if err := p.marshalValue(item[1]); !p.nerr.Merge(err) {
+ return err
+ }
+ if i < len(items)-1 {
+ p.out = append(p.out, ',')
+ }
+ p.out = append(p.out, p.newline...)
+ }
+ p.indents = p.indents[:len(p.indents)-len(p.indent)]
+ if len(items) > 0 {
+ p.out = append(p.out, p.indents...)
+ }
+ p.out = append(p.out, '}')
+ return nil
+}
diff --git a/internal/encoding/json/json_test.go b/internal/encoding/json/json_test.go
new file mode 100644
index 0000000..3e96fa6
--- /dev/null
+++ b/internal/encoding/json/json_test.go
@@ -0,0 +1,416 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "math"
+ "strings"
+ "testing"
+ "unicode/utf8"
+
+ "github.com/google/go-cmp/cmp"
+ "github.com/google/go-cmp/cmp/cmpopts"
+)
+
+func Test(t *testing.T) {
+ const space = " \n\r\t"
+ var V = ValueOf
+ type Arr = []Value
+ type Obj = [][2]Value
+
+ tests := []struct {
+ in string
+ wantVal Value
+ wantOut string
+ wantOutIndent string
+ wantErr string
+ }{{
+ in: ``,
+ wantErr: `unexpected EOF`,
+ }, {
+ in: space,
+ wantErr: `unexpected EOF`,
+ }, {
+ in: space + `null` + space,
+ wantVal: V(nil),
+ wantOut: `null`,
+ wantOutIndent: `null`,
+ }, {
+ in: space + `true` + space,
+ wantVal: V(true),
+ wantOut: `true`,
+ wantOutIndent: `true`,
+ }, {
+ in: space + `false` + space,
+ wantVal: V(false),
+ wantOut: `false`,
+ wantOutIndent: `false`,
+ }, {
+ in: space + `0` + space,
+ wantVal: V(0.0),
+ wantOut: `0`,
+ wantOutIndent: `0`,
+ }, {
+ in: space + `"hello"` + space,
+ wantVal: V("hello"),
+ wantOut: `"hello"`,
+ wantOutIndent: `"hello"`,
+ }, {
+ in: space + `[]` + space,
+ wantVal: V(Arr{}),
+ wantOut: `[]`,
+ wantOutIndent: `[]`,
+ }, {
+ in: space + `{}` + space,
+ wantVal: V(Obj{}),
+ wantOut: `{}`,
+ wantOutIndent: `{}`,
+ }, {
+ in: `null#invalid`,
+ wantErr: `8 bytes of unconsumed input`,
+ }, {
+ in: `0#invalid`,
+ wantErr: `8 bytes of unconsumed input`,
+ }, {
+ in: `"hello"#invalid`,
+ wantErr: `8 bytes of unconsumed input`,
+ }, {
+ in: `[]#invalid`,
+ wantErr: `8 bytes of unconsumed input`,
+ }, {
+ in: `{}#invalid`,
+ wantErr: `8 bytes of unconsumed input`,
+ }, {
+ in: `[truee,true]`,
+ wantErr: `invalid "truee" as literal`,
+ }, {
+ in: `[falsee,false]`,
+ wantErr: `invalid "falsee" as literal`,
+ }, {
+ in: `[`,
+ wantErr: `unexpected EOF`,
+ }, {
+ in: `[{}]`,
+ wantVal: V(Arr{V(Obj{})}),
+ wantOut: "[{}]",
+ wantOutIndent: "[\n\t{}\n]",
+ }, {
+ in: `[{]}`,
+ wantErr: `invalid character ']' at start of string`,
+ }, {
+ in: `[,]`,
+ wantErr: `invalid "," as value`,
+ }, {
+ in: `{,}`,
+ wantErr: `invalid character ',' at start of string`,
+ }, {
+ in: `{"key""val"}`,
+ wantErr: `invalid character '"', expected ':' in object`,
+ }, {
+ in: `["elem0""elem1"]`,
+ wantErr: `invalid character '"', expected ']' at end of array`,
+ }, {
+ in: `{"hello"`,
+ wantErr: `unexpected EOF`,
+ }, {
+ in: `{"hello"}`,
+ wantErr: `invalid character '}', expected ':' in object`,
+ }, {
+ in: `{"hello":`,
+ wantErr: `unexpected EOF`,
+ }, {
+ in: `{"hello":}`,
+ wantErr: `invalid "}" as value`,
+ }, {
+ in: `{"hello":"goodbye"`,
+ wantErr: `unexpected EOF`,
+ }, {
+ in: `{"hello":"goodbye"]`,
+ wantErr: `invalid character ']', expected '}' at end of object`,
+ }, {
+ in: `{"hello":"goodbye"}`,
+ wantVal: V(Obj{{V("hello"), V("goodbye")}}),
+ wantOut: `{"hello":"goodbye"}`,
+ wantOutIndent: "{\n\t\"hello\": \"goodbye\"\n}",
+ }, {
+ in: `{"hello":"goodbye",}`,
+ wantErr: `invalid character '}' at start of string`,
+ }, {
+ in: `{"k":"v1","k":"v2"}`,
+ wantVal: V(Obj{
+ {V("k"), V("v1")}, {V("k"), V("v2")},
+ }),
+ wantOut: `{"k":"v1","k":"v2"}`,
+ wantOutIndent: "{\n\t\"k\": \"v1\",\n\t\"k\": \"v2\"\n}",
+ }, {
+ in: `{"k":{"k":{"k":"v"}}}`,
+ wantVal: V(Obj{
+ {V("k"), V(Obj{
+ {V("k"), V(Obj{
+ {V("k"), V("v")},
+ })},
+ })},
+ }),
+ wantOut: `{"k":{"k":{"k":"v"}}}`,
+ wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v\"\n\t\t}\n\t}\n}",
+ }, {
+ in: `{"k":{"k":{"k":"v1","k":"v2"}}}`,
+ wantVal: V(Obj{
+ {V("k"), V(Obj{
+ {V("k"), V(Obj{
+ {V("k"), V("v1")},
+ {V("k"), V("v2")},
+ })},
+ })},
+ }),
+ wantOut: `{"k":{"k":{"k":"v1","k":"v2"}}}`,
+ wantOutIndent: "{\n\t\"k\": {\n\t\t\"k\": {\n\t\t\t\"k\": \"v1\",\n\t\t\t\"k\": \"v2\"\n\t\t}\n\t}\n}",
+ }, {
+ in: " x",
+ wantErr: `syntax error (line 1:3)`,
+ }, {
+ in: `["💩"x`,
+ wantErr: `syntax error (line 1:5)`,
+ }, {
+ in: "\n\n[\"🔥🔥🔥\"x",
+ wantErr: `syntax error (line 3:7)`,
+ }, {
+ in: `["👍🏻👍🏿"x`,
+ wantErr: `syntax error (line 1:8)`, // multi-rune emojis; could be column:6
+ }, {
+ in: "\"\x00\"",
+ wantErr: `invalid character '\x00' in string`,
+ }, {
+ in: "\"\xff\"",
+ wantErr: `invalid UTF-8 detected`,
+ wantVal: V(string("\xff")),
+ }, {
+ in: `"` + string(utf8.RuneError) + `"`,
+ wantVal: V(string(utf8.RuneError)),
+ wantOut: `"` + string(utf8.RuneError) + `"`,
+ }, {
+ in: `"\uFFFD"`,
+ wantVal: V(string(utf8.RuneError)),
+ wantOut: `"` + string(utf8.RuneError) + `"`,
+ }, {
+ in: `"\x"`,
+ wantErr: `invalid escape code "\\x" in string`,
+ }, {
+ in: `"\uXXXX"`,
+ wantErr: `invalid escape code "\\uXXXX" in string`,
+ }, {
+ in: `"\uDEAD"`, // unmatched surrogate pair
+ wantErr: `unexpected EOF`,
+ }, {
+ in: `"\uDEAD\uBEEF"`, // invalid surrogate half
+ wantErr: `invalid escape code "\\uBEEF" in string`,
+ }, {
+ in: `"\uD800\udead"`, // valid surrogate pair
+ wantVal: V("𐊭"),
+ wantOut: `"𐊭"`,
+ }, {
+ in: `"\u0000\"\\\/\b\f\n\r\t"`,
+ wantVal: V("\u0000\"\\/\b\f\n\r\t"),
+ wantOut: `"\u0000\"\\/\b\f\n\r\t"`,
+ }, {
+ in: `-`,
+ wantErr: `invalid "-" as number`,
+ }, {
+ in: `-0`,
+ wantVal: V(math.Copysign(0, -1)),
+ wantOut: `-0`,
+ }, {
+ in: `+0`,
+ wantErr: `invalid "+0" as value`,
+ }, {
+ in: `-+`,
+ wantErr: `invalid "-+" as number`,
+ }, {
+ in: `0.`,
+ wantErr: `invalid "0." as number`,
+ }, {
+ in: `.1`,
+ wantErr: `invalid ".1" as value`,
+ }, {
+ in: `0.e1`,
+ wantErr: `invalid "0.e1" as number`,
+ }, {
+ in: `0.0`,
+ wantVal: V(0.0),
+ wantOut: "0",
+ }, {
+ in: `01`,
+ wantErr: `invalid "01" as number`,
+ }, {
+ in: `0e`,
+ wantErr: `invalid "0e" as number`,
+ }, {
+ in: `0e0`,
+ wantVal: V(0.0),
+ wantOut: "0",
+ }, {
+ in: `0E0`,
+ wantVal: V(0.0),
+ wantOut: "0",
+ }, {
+ in: `0Ee`,
+ wantErr: `invalid "0Ee" as number`,
+ }, {
+ in: `-1.0E+1`,
+ wantVal: V(-10.0),
+ wantOut: "-10",
+ }, {
+ in: `
+ {
+ "firstName" : "John",
+ "lastName" : "Smith" ,
+ "isAlive" : true,
+ "age" : 27,
+ "address" : {
+ "streetAddress" : "21 2nd Street" ,
+ "city" : "New York" ,
+ "state" : "NY" ,
+ "postalCode" : "10021-3100"
+ },
+ "phoneNumbers" : [
+ {
+ "type" : "home" ,
+ "number" : "212 555-1234"
+ } ,
+ {
+ "type" : "office" ,
+ "number" : "646 555-4567"
+ } ,
+ {
+ "type" : "mobile" ,
+ "number" : "123 456-7890"
+ }
+ ],
+ "children" : [] ,
+ "spouse" : null
+ }
+ `,
+ wantVal: V(Obj{
+ {V("firstName"), V("John")},
+ {V("lastName"), V("Smith")},
+ {V("isAlive"), V(true)},
+ {V("age"), V(27.0)},
+ {V("address"), V(Obj{
+ {V("streetAddress"), V("21 2nd Street")},
+ {V("city"), V("New York")},
+ {V("state"), V("NY")},
+ {V("postalCode"), V("10021-3100")},
+ })},
+ {V("phoneNumbers"), V(Arr{
+ V(Obj{
+ {V("type"), V("home")},
+ {V("number"), V("212 555-1234")},
+ }),
+ V(Obj{
+ {V("type"), V("office")},
+ {V("number"), V("646 555-4567")},
+ }),
+ V(Obj{
+ {V("type"), V("mobile")},
+ {V("number"), V("123 456-7890")},
+ }),
+ })},
+ {V("children"), V(Arr{})},
+ {V("spouse"), V(nil)},
+ }),
+ wantOut: `{"firstName":"John","lastName":"Smith","isAlive":true,"age":27,"address":{"streetAddress":"21 2nd Street","city":"New York","state":"NY","postalCode":"10021-3100"},"phoneNumbers":[{"type":"home","number":"212 555-1234"},{"type":"office","number":"646 555-4567"},{"type":"mobile","number":"123 456-7890"}],"children":[],"spouse":null}`,
+ wantOutIndent: `{
+ "firstName": "John",
+ "lastName": "Smith",
+ "isAlive": true,
+ "age": 27,
+ "address": {
+ "streetAddress": "21 2nd Street",
+ "city": "New York",
+ "state": "NY",
+ "postalCode": "10021-3100"
+ },
+ "phoneNumbers": [
+ {
+ "type": "home",
+ "number": "212 555-1234"
+ },
+ {
+ "type": "office",
+ "number": "646 555-4567"
+ },
+ {
+ "type": "mobile",
+ "number": "123 456-7890"
+ }
+ ],
+ "children": [],
+ "spouse": null
+}`,
+ }}
+
+ opts := cmp.Options{
+ cmpopts.EquateEmpty(),
+ cmp.Transformer("", func(v Value) interface{} {
+ switch v.typ {
+ case 0:
+ return nil // special case so Value{} == Value{}
+ case Null:
+ return nil
+ case Bool:
+ return v.Bool()
+ case Number:
+ return v.Number()
+ case String:
+ return v.String()
+ case Array:
+ return v.Array()
+ case Object:
+ return v.Object()
+ default:
+ panic("invalid type")
+ }
+ }),
+ }
+ for _, tt := range tests {
+ t.Run("", func(t *testing.T) {
+ if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
+ gotVal, err := Unmarshal([]byte(tt.in))
+ if err == nil {
+ if tt.wantErr != "" {
+ t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
+ }
+ } else {
+ if tt.wantErr == "" {
+ t.Errorf("Unmarshal(): got %v, want nil error", err)
+ } else if !strings.Contains(err.Error(), tt.wantErr) {
+ t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
+ }
+ }
+ if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
+ t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
+ }
+ }
+ if tt.wantOut != "" {
+ gotOut, err := Marshal(tt.wantVal, "")
+ if err != nil {
+ t.Errorf("Marshal(): got %v, want nil error", err)
+ }
+ if string(gotOut) != tt.wantOut {
+ t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
+ }
+ }
+ if tt.wantOutIndent != "" {
+ gotOut, err := Marshal(tt.wantVal, "\t")
+ if err != nil {
+ t.Errorf("Marshal(Indent): got %v, want nil error", err)
+ }
+ if string(gotOut) != tt.wantOutIndent {
+ t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
+ }
+ }
+ })
+ }
+}
diff --git a/internal/encoding/json/number.go b/internal/encoding/json/number.go
new file mode 100644
index 0000000..7d27553
--- /dev/null
+++ b/internal/encoding/json/number.go
@@ -0,0 +1,79 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "io"
+ "math"
+ "regexp"
+ "strconv"
+
+ "google.golang.org/proto/internal/errors"
+)
+
+// marshalNumber encodes v as a Number.
+func (p *encoder) marshalNumber(v Value) error {
+ var err error
+ p.out, err = appendNumber(p.out, v)
+ return err
+}
+func appendNumber(out []byte, v Value) ([]byte, error) {
+ if v.Type() != Number {
+ return nil, errors.New("invalid type %v, expected number", v.Type())
+ }
+ if len(v.raw) > 0 {
+ return append(out, v.raw...), nil
+ }
+ n := v.Number()
+ if math.IsInf(n, 0) || math.IsNaN(n) {
+ return nil, errors.New("invalid number value: %v", n)
+ }
+
+ // JSON number formatting logic based on encoding/json.
+ // See floatEncoder.encode for reference.
+ bits := 64
+ if float64(float32(n)) == n {
+ bits = 32
+ }
+ fmt := byte('f')
+ if abs := math.Abs(n); abs != 0 {
+ if bits == 64 && (abs < 1e-6 || abs >= 1e21) || bits == 32 && (float32(abs) < 1e-6 || float32(abs) >= 1e21) {
+ fmt = 'e'
+ }
+ }
+ out = strconv.AppendFloat(out, n, fmt, -1, bits)
+ if fmt == 'e' {
+ n := len(out)
+ if n >= 4 && out[n-4] == 'e' && out[n-3] == '-' && out[n-2] == '0' {
+ out[n-2] = out[n-1]
+ out = out[:n-1]
+ }
+ }
+ return out, nil
+}
+
+// Exact expression to match a JSON floating-point number.
+// JSON's grammar for floats is more restrictive than Go's grammar.
+var floatRegexp = regexp.MustCompile("^-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?")
+
+// unmarshalNumber decodes a Number from the input.
+func (p *decoder) unmarshalNumber() (Value, error) {
+ v, n, err := consumeNumber(p.in)
+ p.consume(n)
+ return v, err
+}
+func consumeNumber(in []byte) (Value, int, error) {
+ if len(in) == 0 {
+ return Value{}, 0, io.ErrUnexpectedEOF
+ }
+ if n := matchWithDelim(floatRegexp, in); n > 0 {
+ v, err := strconv.ParseFloat(string(in[:n]), 64)
+ if err != nil {
+ return Value{}, 0, err
+ }
+ return rawValueOf(v, in[:n:n]), n, nil
+ }
+ return Value{}, 0, newSyntaxError("invalid %q as number", errRegexp.Find(in))
+}
diff --git a/internal/encoding/json/string.go b/internal/encoding/json/string.go
new file mode 100644
index 0000000..ad80e85
--- /dev/null
+++ b/internal/encoding/json/string.go
@@ -0,0 +1,160 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package json
+
+import (
+ "io"
+ "math/bits"
+ "strconv"
+ "unicode"
+ "unicode/utf16"
+ "unicode/utf8"
+
+ "google.golang.org/proto/internal/errors"
+)
+
+func (p *encoder) marshalString(v Value) error {
+ var err error
+ p.out, err = appendString(p.out, v)
+ return err
+}
+func appendString(out []byte, v Value) ([]byte, error) {
+ if v.Type() != String {
+ return nil, errors.New("invalid type %v, expected string", v.Type())
+ }
+ if len(v.raw) > 0 {
+ return append(out, v.raw...), nil
+ }
+ in := v.String()
+
+ var nerr errors.NonFatal
+ out = append(out, '"')
+ i := indexNeedEscape(in)
+ in, out = in[i:], append(out, in[:i]...)
+ for len(in) > 0 {
+ switch r, n := utf8.DecodeRuneInString(in); {
+ case r == utf8.RuneError && n == 1:
+ nerr.AppendInvalidUTF8("")
+ in, out = in[1:], append(out, in[0]) // preserve invalid byte
+ case r < ' ' || r == '"' || r == '\\':
+ out = append(out, '\\')
+ switch r {
+ case '"', '\\':
+ out = append(out, byte(r))
+ case '\b':
+ out = append(out, 'b')
+ case '\f':
+ out = append(out, 'f')
+ case '\n':
+ out = append(out, 'n')
+ case '\r':
+ out = append(out, 'r')
+ case '\t':
+ out = append(out, 't')
+ default:
+ out = append(out, 'u')
+ out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
+ out = strconv.AppendUint(out, uint64(r), 16)
+ }
+ in = in[n:]
+ default:
+ i := indexNeedEscape(in[n:])
+ in, out = in[n+i:], append(out, in[:n+i]...)
+ }
+ }
+ out = append(out, '"')
+ return out, nerr.E
+}
+
+func (p *decoder) unmarshalString() (Value, error) {
+ v, n, err := consumeString(p.in)
+ p.consume(n)
+ return v, err
+}
+func consumeString(in []byte) (Value, int, error) {
+ var nerr errors.NonFatal
+ in0 := in
+ if len(in) == 0 {
+ return Value{}, 0, io.ErrUnexpectedEOF
+ }
+ if in[0] != '"' {
+ return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
+ }
+ in = in[1:]
+ i := indexNeedEscape(string(in))
+ in, out := in[i:], in[:i:i] // set cap to prevent mutations
+ for len(in) > 0 {
+ switch r, n := utf8.DecodeRune(in); {
+ case r == utf8.RuneError && n == 1:
+ nerr.AppendInvalidUTF8("")
+ in, out = in[1:], append(out, in[0]) // preserve invalid byte
+ case r < ' ':
+ return Value{}, 0, newSyntaxError("invalid character %q in string", r)
+ case r == '"':
+ in = in[1:]
+ n := len(in0) - len(in)
+ v := rawValueOf(string(out), in0[:n:n])
+ return v, n, nerr.E
+ case r == '\\':
+ if len(in) < 2 {
+ return Value{}, 0, io.ErrUnexpectedEOF
+ }
+ switch r := in[1]; r {
+ case '"', '\\', '/':
+ in, out = in[2:], append(out, r)
+ case 'b':
+ in, out = in[2:], append(out, '\b')
+ case 'f':
+ in, out = in[2:], append(out, '\f')
+ case 'n':
+ in, out = in[2:], append(out, '\n')
+ case 'r':
+ in, out = in[2:], append(out, '\r')
+ case 't':
+ in, out = in[2:], append(out, '\t')
+ case 'u':
+ if len(in) < 6 {
+ return Value{}, 0, io.ErrUnexpectedEOF
+ }
+ v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
+ if err != nil {
+ return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
+ }
+ in = in[6:]
+
+ r := rune(v)
+ if utf16.IsSurrogate(r) {
+ if len(in) < 6 {
+ return Value{}, 0, io.ErrUnexpectedEOF
+ }
+ v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
+ r = utf16.DecodeRune(r, rune(v))
+ if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
+ return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:6])
+ }
+ in = in[6:]
+ }
+ out = append(out, string(r)...)
+ default:
+ return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
+ }
+ default:
+ i := indexNeedEscape(string(in[n:]))
+ in, out = in[n+i:], append(out, in[:n+i]...)
+ }
+ }
+ return Value{}, 0, io.ErrUnexpectedEOF
+}
+
+// indexNeedEscape returns the index of the next character that needs escaping.
+// If no characters need escaping, this returns the input length.
+func indexNeedEscape(s string) int {
+ for i, r := range s {
+ if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
+ return i
+ }
+ }
+ return len(s)
+}
diff --git a/internal/encoding/json/value.go b/internal/encoding/json/value.go
new file mode 100644
index 0000000..aeff222
--- /dev/null
+++ b/internal/encoding/json/value.go
@@ -0,0 +1,206 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package json implements the JSON format.
+// This package has no semantic understanding for protocol buffers and is only
+// a parser and composer for the format.
+//
+// This follows RFC 7159, with some notable implementation specifics:
+// * numbers that are out of range result in a decoding error
+// * duplicate keys in objects are not rejected
+//
+// Reasons why the standard encoding/json package is not suitable:
+// * information about duplicate keys is lost
+// * invalid UTF-8 is silently coerced into utf8.RuneError
+package json
+
+import (
+ "fmt"
+ "strings"
+)
+
+// Type represents a type expressible in the JSON format.
+type Type uint8
+
+const (
+ _ Type = iota
+ // Null is the null literal (i.e., "null").
+ Null
+ // Bool is a boolean (i.e., "true" or "false").
+ Bool
+ // Number is a floating-point number (e.g., "1.234" or "1e100").
+ Number
+ // String is an escaped string (e.g., `"the quick brown fox"`).
+ String
+ // Array is an ordered list of values (e.g., `[0, "one", true]`).
+ Array
+ // Object is an ordered map of values (e.g., `{"key": null}`).
+ Object
+)
+
+func (t Type) String() string {
+ switch t {
+ case Null:
+ return "null"
+ case Bool:
+ return "bool"
+ case Number:
+ return "number"
+ case String:
+ return "string"
+ case Array:
+ return "array"
+ case Object:
+ return "object"
+ default:
+ return "<invalid>"
+ }
+}
+
+// Value contains a value of a given Type.
+type Value struct {
+ typ Type
+ raw []byte // raw bytes of the serialized data
+ str string // only for String
+ num float64 // only for Bool or Number
+ arr []Value // only for Array
+ obj [][2]Value // only for Object
+}
+
+// ValueOf returns a Value for a given Go value:
+// nil => Null
+// bool => Bool
+// int32, int64 => Number
+// uint32, uint64 => Number
+// float32, float64 => Number
+// string, []byte => String
+// []Value => Array
+// [][2]Value => Object
+//
+// ValueOf panics if the Go type is not one of the above.
+func ValueOf(v interface{}) Value {
+ switch v := v.(type) {
+ case nil:
+ return Value{typ: Null}
+ case bool:
+ if v {
+ return Value{typ: Bool, num: 1}
+ } else {
+ return Value{typ: Bool, num: 0}
+ }
+ case int32:
+ return Value{typ: Number, num: float64(v)}
+ case int64:
+ return Value{typ: Number, num: float64(v)} // possible loss of precision
+ case uint32:
+ return Value{typ: Number, num: float64(v)}
+ case uint64:
+ return Value{typ: Number, num: float64(v)} // possible loss of precision
+ case float32:
+ return Value{typ: Number, num: float64(v)}
+ case float64:
+ return Value{typ: Number, num: float64(v)}
+ case string:
+ return Value{typ: String, str: string(v)}
+ case []byte:
+ return Value{typ: String, str: string(v)}
+ case []Value:
+ return Value{typ: Array, arr: v}
+ case [][2]Value:
+ return Value{typ: Object, obj: v}
+ default:
+ panic(fmt.Sprintf("invalid type %T", v))
+ }
+}
+func rawValueOf(v interface{}, raw []byte) Value {
+ v2 := ValueOf(v)
+ v2.raw = raw
+ return v2
+}
+
+// Type is the type of the value.
+func (v Value) Type() Type {
+ return v.typ
+}
+
+// Bool returns v as a bool and panics if it is not a Bool.
+func (v Value) Bool() bool {
+ if v.typ != Bool {
+ panic("value is not a boolean")
+ }
+ return v.num != 0
+}
+
+// Number returns v as a float64 and panics if it is not a Number.
+func (v Value) Number() float64 {
+ if v.typ != Number {
+ panic("value is not a number")
+ }
+ return v.num
+}
+
+// String returns v as a string if the Type is String.
+// Otherwise, this returns a formatted string of v for debugging purposes.
+//
+// Since JSON strings must be UTF-8, the marshaler and unmarshaler will verify
+// for UTF-8 correctness.
+func (v Value) String() string {
+ if v.typ != String {
+ return v.stringValue()
+ }
+ return v.str
+}
+func (v Value) stringValue() string {
+ switch v.typ {
+ case Null, Bool, Number:
+ return string(v.Raw())
+ case Array:
+ var ss []string
+ for _, v := range v.Array() {
+ ss = append(ss, v.String())
+ }
+ return "[" + strings.Join(ss, ",") + "]"
+ case Object:
+ var ss []string
+ for _, v := range v.Object() {
+ ss = append(ss, v[0].String()+":"+v[1].String())
+ }
+ return "{" + strings.Join(ss, ",") + "}"
+ default:
+ return "<invalid>"
+ }
+}
+
+// Array returns the elements of v and panics if the Type is not Array.
+// Mutations on the return value may not be observable from the Raw method.
+func (v Value) Array() []Value {
+ if v.typ != Array {
+ panic("value is not an array")
+ }
+ return v.arr
+}
+
+// Object returns the items of v and panics if the Type is not Object.
+// The [2]Value represents a key (of type String) and value pair.
+//
+// Mutations on the return value may not be observable from the Raw method.
+func (v Value) Object() [][2]Value {
+ if v.typ != Object {
+ panic("value is not an object")
+ }
+ return v.obj
+}
+
+// Raw returns the raw representation of the value.
+// The returned value may alias the input given to Unmarshal.
+func (v Value) Raw() []byte {
+ if len(v.raw) > 0 {
+ return v.raw
+ }
+ p := encoder{}
+ if err := p.marshalValue(v); !p.nerr.Merge(err) {
+ return []byte("<invalid>")
+ }
+ return p.out
+}