internal/encoding/json/decode.go - protobuf - Git at Google

 // Copyright 2018 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package json

 import (
 	"bytes"
 	"io"
 	"regexp"
 	"unicode/utf8"

 	"github.com/golang/protobuf/v2/internal/errors"
 )

 type syntaxError struct{ error }

 func newSyntaxError(f string, x ...interface{}) error {
 	return syntaxError{errors.New(f, x...)}
 }

 // Unmarshal parses b as the JSON format.
 // It returns a Value, which represents the input as an AST.
 func Unmarshal(b []byte) (Value, error) {
 	p := decoder{in: b}
 	p.consume(0) // trim leading spaces
 	v, err := p.unmarshalValue()
 	if !p.nerr.Merge(err) {
 		if e, ok := err.(syntaxError); ok {
 			b = b[:len(b)-len(p.in)] // consumed input
 			line := bytes.Count(b, []byte("\n")) + 1
 			if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
 				b = b[i+1:]
 			}
 			column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
 			err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
 		}
 		return Value{}, err
 	}
 	if len(p.in) > 0 {
 		return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
 	}
 	return v, p.nerr.E
 }

 type decoder struct {
 	nerr errors.NonFatal
 	in   []byte
 }

 var literalRegexp = regexp.MustCompile("^(null|true|false)")

 func (p *decoder) unmarshalValue() (Value, error) {
 	if len(p.in) == 0 {
 		return Value{}, io.ErrUnexpectedEOF
 	}
 	switch p.in[0] {
 	case 'n', 't', 'f':
 		if n := matchWithDelim(literalRegexp, p.in); n > 0 {
 			var v Value
 			switch p.in[0] {
 			case 'n':
 				v = rawValueOf(nil, p.in[:n:n])
 			case 't':
 				v = rawValueOf(true, p.in[:n:n])
 			case 'f':
 				v = rawValueOf(false, p.in[:n:n])
 			}
 			p.consume(n)
 			return v, nil
 		}
 		return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
 	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
 		return p.unmarshalNumber()
 	case '"':
 		return p.unmarshalString()
 	case '[':
 		return p.unmarshalArray()
 	case '{':
 		return p.unmarshalObject()
 	default:
 		return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
 	}
 }

 func (p *decoder) unmarshalArray() (Value, error) {
 	b := p.in
 	var elems []Value
 	if err := p.consumeChar('[', "at start of array"); err != nil {
 		return Value{}, err
 	}
 	if len(p.in) > 0 && p.in[0] != ']' {
 		for len(p.in) > 0 {
 			v, err := p.unmarshalValue()
 			if !p.nerr.Merge(err) {
 				return Value{}, err
 			}
 			elems = append(elems, v)
 			if !p.tryConsumeChar(',') {
 				break
 			}
 		}
 	}
 	if err := p.consumeChar(']', "at end of array"); err != nil {
 		return Value{}, err
 	}
 	b = b[:len(b)-len(p.in)]
 	return rawValueOf(elems, b[:len(b):len(b)]), nil
 }

 func (p *decoder) unmarshalObject() (Value, error) {
 	b := p.in
 	var items [][2]Value
 	if err := p.consumeChar('{', "at start of object"); err != nil {
 		return Value{}, err
 	}
 	if len(p.in) > 0 && p.in[0] != '}' {
 		for len(p.in) > 0 {
 			k, err := p.unmarshalString()
 			if !p.nerr.Merge(err) {
 				return Value{}, err
 			}
 			if err := p.consumeChar(':', "in object"); err != nil {
 				return Value{}, err
 			}
 			v, err := p.unmarshalValue()
 			if !p.nerr.Merge(err) {
 				return Value{}, err
 			}
 			items = append(items, [2]Value{k, v})
 			if !p.tryConsumeChar(',') {
 				break
 			}
 		}
 	}
 	if err := p.consumeChar('}', "at end of object"); err != nil {
 		return Value{}, err
 	}
 	b = b[:len(b)-len(p.in)]
 	return rawValueOf(items, b[:len(b):len(b)]), nil
 }

 func (p *decoder) consumeChar(c byte, msg string) error {
 	if p.tryConsumeChar(c) {
 		return nil
 	}
 	if len(p.in) == 0 {
 		return io.ErrUnexpectedEOF
 	}
 	return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
 }

 func (p *decoder) tryConsumeChar(c byte) bool {
 	if len(p.in) > 0 && p.in[0] == c {
 		p.consume(1)
 		return true
 	}
 	return false
 }

 // consume consumes n bytes of input and any subsequent whitespace.
 func (p *decoder) consume(n int) {
 	p.in = p.in[n:]
 	for len(p.in) > 0 {
 		switch p.in[0] {
 		case ' ', '\n', '\r', '\t':
 			p.in = p.in[1:]
 		default:
 			return
 		}
 	}
 }

 // Any sequence that looks like a non-delimiter (for error reporting).
 var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")

 // matchWithDelim matches r with the input b and verifies that the match
 // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
 // As a special case, EOF is considered a delimiter.
 func matchWithDelim(r *regexp.Regexp, b []byte) int {
 	n := len(r.Find(b))
 	if n < len(b) {
 		// Check that that the next character is a delimiter.
 		c := b[n]
 		notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
 			('a' <= c && c <= 'z') ||
 			('A' <= c && c <= 'Z') ||
 			('0' <= c && c <= '9'))
 		if notDelim {
 			return 0
 		}
 	}
 	return n
 }
	// Copyright 2018 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package json

	import (
	"bytes"
	"io"
	"regexp"
	"unicode/utf8"

	"github.com/golang/protobuf/v2/internal/errors"
	)

	type syntaxError struct{ error }

	func newSyntaxError(f string, x ...interface{}) error {
	return syntaxError{errors.New(f, x...)}
	}

	// Unmarshal parses b as the JSON format.
	// It returns a Value, which represents the input as an AST.
	func Unmarshal(b []byte) (Value, error) {
	p := decoder{in: b}
	p.consume(0) // trim leading spaces
	v, err := p.unmarshalValue()
	if !p.nerr.Merge(err) {
	if e, ok := err.(syntaxError); ok {
	b = b[:len(b)-len(p.in)] // consumed input
	line := bytes.Count(b, []byte("\n")) + 1
	if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
	b = b[i+1:]
	}
	column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
	err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
	}
	return Value{}, err
	}
	if len(p.in) > 0 {
	return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
	}
	return v, p.nerr.E
	}

	type decoder struct {
	nerr errors.NonFatal
	in []byte
	}

	var literalRegexp = regexp.MustCompile("^(null\|true\|false)")

	func (p *decoder) unmarshalValue() (Value, error) {
	if len(p.in) == 0 {
	return Value{}, io.ErrUnexpectedEOF
	}
	switch p.in[0] {
	case 'n', 't', 'f':
	if n := matchWithDelim(literalRegexp, p.in); n > 0 {
	var v Value
	switch p.in[0] {
	case 'n':
	v = rawValueOf(nil, p.in[:n:n])
	case 't':
	v = rawValueOf(true, p.in[:n:n])
	case 'f':
	v = rawValueOf(false, p.in[:n:n])
	}
	p.consume(n)
	return v, nil
	}
	return Value{}, newSyntaxError("invalid %q as literal", errRegexp.Find(p.in))
	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
	return p.unmarshalNumber()
	case '"':
	return p.unmarshalString()
	case '[':
	return p.unmarshalArray()
	case '{':
	return p.unmarshalObject()
	default:
	return Value{}, newSyntaxError("invalid %q as value", errRegexp.Find(p.in))
	}
	}

	func (p *decoder) unmarshalArray() (Value, error) {
	b := p.in
	var elems []Value
	if err := p.consumeChar('[', "at start of array"); err != nil {
	return Value{}, err
	}
	if len(p.in) > 0 && p.in[0] != ']' {
	for len(p.in) > 0 {
	v, err := p.unmarshalValue()
	if !p.nerr.Merge(err) {
	return Value{}, err
	}
	elems = append(elems, v)
	if !p.tryConsumeChar(',') {
	break
	}
	}
	}
	if err := p.consumeChar(']', "at end of array"); err != nil {
	return Value{}, err
	}
	b = b[:len(b)-len(p.in)]
	return rawValueOf(elems, b[:len(b):len(b)]), nil
	}

	func (p *decoder) unmarshalObject() (Value, error) {
	b := p.in
	var items [][2]Value
	if err := p.consumeChar('{', "at start of object"); err != nil {
	return Value{}, err
	}
	if len(p.in) > 0 && p.in[0] != '}' {
	for len(p.in) > 0 {
	k, err := p.unmarshalString()
	if !p.nerr.Merge(err) {
	return Value{}, err
	}
	if err := p.consumeChar(':', "in object"); err != nil {
	return Value{}, err
	}
	v, err := p.unmarshalValue()
	if !p.nerr.Merge(err) {
	return Value{}, err
	}
	items = append(items, [2]Value{k, v})
	if !p.tryConsumeChar(',') {
	break
	}
	}
	}
	if err := p.consumeChar('}', "at end of object"); err != nil {
	return Value{}, err
	}
	b = b[:len(b)-len(p.in)]
	return rawValueOf(items, b[:len(b):len(b)]), nil
	}

	func (p *decoder) consumeChar(c byte, msg string) error {
	if p.tryConsumeChar(c) {
	return nil
	}
	if len(p.in) == 0 {
	return io.ErrUnexpectedEOF
	}
	return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
	}

	func (p *decoder) tryConsumeChar(c byte) bool {
	if len(p.in) > 0 && p.in[0] == c {
	p.consume(1)
	return true
	}
	return false
	}

	// consume consumes n bytes of input and any subsequent whitespace.
	func (p *decoder) consume(n int) {
	p.in = p.in[n:]
	for len(p.in) > 0 {
	switch p.in[0] {
	case ' ', '\n', '\r', '\t':
	p.in = p.in[1:]
	default:
	return
	}
	}
	}

	// Any sequence that looks like a non-delimiter (for error reporting).
	var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}\|.)")

	// matchWithDelim matches r with the input b and verifies that the match
	// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
	// As a special case, EOF is considered a delimiter.
	func matchWithDelim(r *regexp.Regexp, b []byte) int {
	n := len(r.Find(b))
	if n < len(b) {
	// Check that that the next character is a delimiter.
	c := b[n]
	notDelim := (c == '-' \|\| c == '+' \|\| c == '.' \|\| c == '_' \|\|
	('a' <= c && c <= 'z') \|\|
	('A' <= c && c <= 'Z') \|\|
	('0' <= c && c <= '9'))
	if notDelim {
	return 0
	}
	}
	return n
	}