internal/encoding/text: initial commit of proto text format parser/serializer Package text provides a parser and serializer for the proto text format. This focuses on the grammar of the format and is agnostic towards specific semantics of protobuf types. High-level API: func Marshal(v Value, indent string, delims [2]byte, outputASCII bool) ([]byte, error) func Unmarshal(b []byte) (Value, error) type Type uint8 const Bool Type ... type Value struct{ ... } func ValueOf(v interface{}) Value func (v Value) Type() Type func (v Value) Bool() (x bool, ok bool) func (v Value) Int(b64 bool) (x int64, ok bool) func (v Value) Uint(b64 bool) (x uint64, ok bool) func (v Value) Float(b64 bool) (x float64, ok bool) func (v Value) Name() (protoreflect.Name, bool) func (v Value) String() string func (v Value) List() []Value func (v Value) Message() [][2]Value func (v Value) Raw() []byte Change-Id: I4a78ec4474c160d0de4d32120651edd931ea2c1e Reviewed-on: https://go-review.googlesource.com/127455 Reviewed-by: Herbie Ong <herbie@google.com>

commit: 27c2a76c850826490ae8ae43d46a4d1276b57ceb [log] [tgz]
author: Joe Tsai <joetsai@digital-static.net> Wed Aug 01 16:48:18 2018 -0700
committer: Joe Tsai <thebrokentoaster@gmail.com> Tue Aug 07 22:44:06 2018 +0000
tree: db80a5fbf04677876d57df858a96ef4974cc387b
parent: 879b18d902097ec222c23b5124c9cccf9c10af9b [diff]
diff --git a/internal/encoding/text/decode.go b/internal/encoding/text/decode.go
new file mode 100644
index 0000000..90faedc
--- /dev/null
+++ b/internal/encoding/text/decode.go

@@ -0,0 +1,247 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+import (
+	"bytes"
+	"io"
+	"regexp"
+	"unicode/utf8"
+
+	"google.golang.org/proto/internal/errors"
+	"google.golang.org/proto/reflect/protoreflect"
+)
+
+type syntaxError struct{ error }
+
+func newSyntaxError(f string, x ...interface{}) error {
+	return syntaxError{errors.New(f, x...)}
+}
+
+// Unmarshal parses b as the proto text format.
+// It returns a Value, which is always of the Message type.
+func Unmarshal(b []byte) (Value, error) {
+	p := decoder{in: b}
+	p.consume(0) // trim leading spaces or comments
+	v, err := p.unmarshalMessage(false)
+	if !p.nerr.Merge(err) {
+		if e, ok := err.(syntaxError); ok {
+			b = b[:len(b)-len(p.in)] // consumed input
+			line := bytes.Count(b, []byte("\n")) + 1
+			if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
+				b = b[i+1:]
+			}
+			column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
+			err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
+		}
+		return Value{}, err
+	}
+	if len(p.in) > 0 {
+		return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
+	}
+	return v, p.nerr.E
+}
+
+type decoder struct {
+	nerr errors.NonFatal
+	in   []byte
+}
+
+func (p *decoder) unmarshalList() (Value, error) {
+	b := p.in
+	var elems []Value
+	if err := p.consumeChar('[', "at start of list"); err != nil {
+		return Value{}, err
+	}
+	if len(p.in) > 0 && p.in[0] != ']' {
+		for len(p.in) > 0 {
+			v, err := p.unmarshalValue()
+			if !p.nerr.Merge(err) {
+				return Value{}, err
+			}
+			elems = append(elems, v)
+			if !p.tryConsumeChar(',') {
+				break
+			}
+		}
+	}
+	if err := p.consumeChar(']', "at end of list"); err != nil {
+		return Value{}, err
+	}
+	b = b[:len(b)-len(p.in)]
+	return rawValueOf(elems, b[:len(b):len(b)]), nil
+}
+
+func (p *decoder) unmarshalMessage(checkDelims bool) (Value, error) {
+	b := p.in
+	var items [][2]Value
+	delims := [2]byte{'{', '}'}
+	if len(p.in) > 0 && p.in[0] == '<' {
+		delims = [2]byte{'<', '>'}
+	}
+	if checkDelims {
+		if err := p.consumeChar(delims[0], "at start of message"); err != nil {
+			return Value{}, err
+		}
+	}
+	for len(p.in) > 0 {
+		if p.in[0] == '}' || p.in[0] == '>' {
+			break
+		}
+		k, err := p.unmarshalKey()
+		if !p.nerr.Merge(err) {
+			return Value{}, err
+		}
+		if !p.tryConsumeChar(':') && len(p.in) > 0 && p.in[0] != '{' && p.in[0] != '<' {
+			return Value{}, newSyntaxError("expected ':' after message key")
+		}
+		v, err := p.unmarshalValue()
+		if !p.nerr.Merge(err) {
+			return Value{}, err
+		}
+		if p.tryConsumeChar(';') || p.tryConsumeChar(',') {
+			// always optional
+		}
+		items = append(items, [2]Value{k, v})
+	}
+	if checkDelims {
+		if err := p.consumeChar(delims[1], "at end of message"); err != nil {
+			return Value{}, err
+		}
+	}
+	b = b[:len(b)-len(p.in)]
+	return rawValueOf(items, b[:len(b):len(b)]), nil
+}
+
+// This expression is more liberal than ConsumeAnyTypeUrl in C++.
+// However, the C++ parser does not handle many legal URL strings.
+// The Go implementation is more liberal to be backwards compatible with
+// the historical Go implementation which was overly liberal (and buggy).
+var urlRegexp = regexp.MustCompile(`^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`)
+
+// unmarshalKey parses the key, which may be a Name, String, or Uint.
+func (p *decoder) unmarshalKey() (v Value, err error) {
+	if p.tryConsumeChar('[') {
+		if len(p.in) == 0 {
+			return Value{}, io.ErrUnexpectedEOF
+		}
+		if p.in[0] == '\'' || p.in[0] == '"' {
+			// Historically, Go's parser allowed a string for the Any type URL.
+			// This is specific to Go and contrary to the C++ implementation,
+			// which does not support strings for the Any type URL.
+			v, err = p.unmarshalString()
+			if !p.nerr.Merge(err) {
+				return Value{}, err
+			}
+		} else if n := matchWithDelim(urlRegexp, p.in); n > 0 {
+			v = rawValueOf(string(p.in[:n]), p.in[:n:n])
+			p.consume(n)
+		} else {
+			return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
+		}
+		if err := p.consumeChar(']', "at end of extension name"); err != nil {
+			return Value{}, err
+		}
+		return v, nil
+	}
+	if matchWithDelim(intRegexp, p.in) > 0 && p.in[0] != '-' {
+		return p.unmarshalNumber()
+	}
+	return p.unmarshalName()
+}
+
+func (p *decoder) unmarshalValue() (Value, error) {
+	if len(p.in) == 0 {
+		return Value{}, io.ErrUnexpectedEOF
+	}
+	switch p.in[0] {
+	case '"', '\'':
+		return p.unmarshalStrings()
+	case '[':
+		return p.unmarshalList()
+	case '{', '<':
+		return p.unmarshalMessage(true)
+	default:
+		n := matchWithDelim(nameRegexp, p.in) // zero if no match
+		if n > 0 && literals[string(p.in[:n])] == nil {
+			return p.unmarshalName()
+		}
+		return p.unmarshalNumber()
+	}
+}
+
+// This expression matches all valid proto identifiers.
+var nameRegexp = regexp.MustCompile(`^[_a-zA-Z][_a-zA-Z0-9]*`)
+
+// unmarshalName unmarshals an unquoted identifier.
+//
+// E.g., `field_name` => ValueOf(protoreflect.Name("field_name"))
+func (p *decoder) unmarshalName() (Value, error) {
+	if n := matchWithDelim(nameRegexp, p.in); n > 0 {
+		v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
+		p.consume(n)
+		return v, nil
+	}
+	return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
+}
+
+func (p *decoder) consumeChar(c byte, msg string) error {
+	if p.tryConsumeChar(c) {
+		return nil
+	}
+	if len(p.in) == 0 {
+		return io.ErrUnexpectedEOF
+	}
+	return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
+}
+
+func (p *decoder) tryConsumeChar(c byte) bool {
+	if len(p.in) > 0 && p.in[0] == c {
+		p.consume(1)
+		return true
+	}
+	return false
+}
+
+// consume consumes n bytes of input and any subsequent whitespace or comments.
+func (p *decoder) consume(n int) {
+	p.in = p.in[n:]
+	for len(p.in) > 0 {
+		switch p.in[0] {
+		case ' ', '\n', '\r', '\t':
+			p.in = p.in[1:]
+		case '#':
+			if i := bytes.IndexByte(p.in, '\n'); i >= 0 {
+				p.in = p.in[i+len("\n"):]
+			} else {
+				p.in = nil
+			}
+		default:
+			return
+		}
+	}
+}
+
+// Any sequence that looks like a non-delimiter (for error reporting).
+var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)")
+
+// matchWithDelim matches r with the input b and verifies that the match
+// terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]").
+// As a special case, EOF is considered a delimiter.
+func matchWithDelim(r *regexp.Regexp, b []byte) int {
+	n := len(r.Find(b))
+	if n < len(b) {
+		// Check that that the next character is a delimiter.
+		c := b[n]
+		notDelim := (c == '-' || c == '+' || c == '.' || c == '_' ||
+			('a' <= c && c <= 'z') ||
+			('A' <= c && c <= 'Z') ||
+			('0' <= c && c <= '9'))
+		if notDelim {
+			return 0
+		}
+	}
+	return n
+}

diff --git a/internal/encoding/text/encode.go b/internal/encoding/text/encode.go
new file mode 100644
index 0000000..ea0fb5a
--- /dev/null
+++ b/internal/encoding/text/encode.go

@@ -0,0 +1,174 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+import (
+	"bytes"
+	"strings"
+
+	"google.golang.org/proto/internal/errors"
+)
+
+// Marshal serializes v as the proto text format, where v must be a Message.
+// In the proto text format, the top-level value is always a message where the
+// delimiters are elided.
+//
+// If indent is a non-empty string, it causes every entry in a List or Message
+// to be preceded by the indent and trailed by a newline.
+//
+// If delims is not the zero value, it controls the delimiter characters used
+// for messages (e.g., "{}" vs "<>").
+//
+// If outputASCII is true, strings will be serialized in such a way that
+// multi-byte UTF-8 sequences are escaped. This property ensures that the
+// overall output is ASCII (as opposed to UTF-8).
+func Marshal(v Value, indent string, delims [2]byte, outputASCII bool) ([]byte, error) {
+	p := encoder{}
+	if len(indent) > 0 {
+		if strings.Trim(indent, " \t") != "" {
+			return nil, errors.New("indent may only be composed of space and tab characters")
+		}
+		p.indent = indent
+		p.newline = "\n"
+	}
+	switch delims {
+	case [2]byte{0, 0}:
+		p.delims = [2]byte{'{', '}'}
+	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
+		p.delims = delims
+	default:
+		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
+	}
+	p.outputASCII = outputASCII
+
+	err := p.marshalMessage(v, false)
+	if !p.nerr.Merge(err) {
+		return nil, err
+	}
+	if len(indent) > 0 {
+		return append(bytes.TrimRight(p.out, "\n"), '\n'), p.nerr.E
+	}
+	return p.out, p.nerr.E
+}
+
+type encoder struct {
+	nerr errors.NonFatal
+	out  []byte
+
+	indent      string
+	indents     []byte
+	newline     string // set to "\n" if len(indent) > 0
+	delims      [2]byte
+	outputASCII bool
+}
+
+func (p *encoder) marshalList(v Value) error {
+	if v.Type() != List {
+		return errors.New("invalid type %v, expected list", v.Type())
+	}
+	elems := v.List()
+	p.out = append(p.out, '[')
+	p.indents = append(p.indents, p.indent...)
+	if len(elems) > 0 {
+		p.out = append(p.out, p.newline...)
+	}
+	for i, elem := range elems {
+		p.out = append(p.out, p.indents...)
+		if err := p.marshalValue(elem); !p.nerr.Merge(err) {
+			return err
+		}
+		if i < len(elems)-1 {
+			p.out = append(p.out, ',')
+		}
+		p.out = append(p.out, p.newline...)
+	}
+	p.indents = p.indents[:len(p.indents)-len(p.indent)]
+	if len(elems) > 0 {
+		p.out = append(p.out, p.indents...)
+	}
+	p.out = append(p.out, ']')
+	return nil
+}
+
+func (p *encoder) marshalMessage(v Value, emitDelims bool) error {
+	if v.Type() != Message {
+		return errors.New("invalid type %v, expected message", v.Type())
+	}
+	items := v.Message()
+	if emitDelims {
+		p.out = append(p.out, p.delims[0])
+		p.indents = append(p.indents, p.indent...)
+		if len(items) > 0 {
+			p.out = append(p.out, p.newline...)
+		}
+	}
+	for i, item := range items {
+		p.out = append(p.out, p.indents...)
+		if err := p.marshalKey(item[0]); !p.nerr.Merge(err) {
+			return err
+		}
+		p.out = append(p.out, ':')
+		if len(p.indent) > 0 {
+			p.out = append(p.out, ' ')
+		}
+		if err := p.marshalValue(item[1]); !p.nerr.Merge(err) {
+			return err
+		}
+		if i < len(items)-1 && len(p.indent) == 0 {
+			p.out = append(p.out, ' ')
+		}
+		p.out = append(p.out, p.newline...)
+	}
+	if emitDelims {
+		p.indents = p.indents[:len(p.indents)-len(p.indent)]
+		if len(items) > 0 {
+			p.out = append(p.out, p.indents...)
+		}
+		p.out = append(p.out, p.delims[1])
+	}
+	return nil
+}
+
+func (p *encoder) marshalKey(v Value) error {
+	switch v.Type() {
+	case String:
+		var err error
+		p.out = append(p.out, '[')
+		if len(urlRegexp.FindString(v.str)) == len(v.str) {
+			p.out = append(p.out, v.str...)
+		} else {
+			err = p.marshalString(v)
+		}
+		p.out = append(p.out, ']')
+		return err
+	case Uint:
+		return p.marshalNumber(v)
+	case Name:
+		s, _ := v.Name()
+		p.out = append(p.out, s...)
+		return nil
+	default:
+		return errors.New("invalid type %v to encode key", v.Type())
+	}
+}
+
+func (p *encoder) marshalValue(v Value) error {
+	switch v.Type() {
+	case Bool, Int, Uint, Float:
+		return p.marshalNumber(v)
+	case String:
+		return p.marshalString(v)
+	case List:
+		return p.marshalList(v)
+	case Message:
+		return p.marshalMessage(v, true)
+	case Name:
+		s, _ := v.Name()
+		p.out = append(p.out, s...)
+		return nil
+	default:
+		return errors.New("invalid type %v to encode value", v.Type())
+	}
+}

diff --git a/internal/encoding/text/number.go b/internal/encoding/text/number.go
new file mode 100644
index 0000000..7a1be93
--- /dev/null
+++ b/internal/encoding/text/number.go

@@ -0,0 +1,124 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+import (
+	"bytes"
+	"io"
+	"math"
+	"regexp"
+	"strconv"
+	"strings"
+
+	"google.golang.org/proto/internal/errors"
+)
+
+// marshalNumber encodes v as either a Bool, Int, Uint, or Float.
+func (p *encoder) marshalNumber(v Value) error {
+	var err error
+	p.out, err = appendNumber(p.out, v)
+	return err
+}
+func appendNumber(out []byte, v Value) ([]byte, error) {
+	if len(v.raw) > 0 {
+		switch v.Type() {
+		case Bool, Int, Uint, Float:
+			return append(out, v.raw...), nil
+		}
+	}
+	switch v.Type() {
+	case Bool:
+		if b, _ := v.Bool(); b {
+			return append(out, "true"...), nil
+		} else {
+			return append(out, "false"...), nil
+		}
+	case Int:
+		return strconv.AppendInt(out, int64(v.num), 10), nil
+	case Uint:
+		return strconv.AppendUint(out, uint64(v.num), 10), nil
+	case Float:
+		switch n := math.Float64frombits(v.num); {
+		case math.IsNaN(n):
+			return append(out, "nan"...), nil
+		case math.IsInf(n, +1):
+			return append(out, "inf"...), nil
+		case math.IsInf(n, -1):
+			return append(out, "-inf"...), nil
+		default:
+			return strconv.AppendFloat(out, n, 'g', -1, 64), nil
+		}
+	default:
+		return nil, errors.New("invalid type %v, expected bool or number", v.Type())
+	}
+}
+
+// These regular expressions were derived by reverse engineering the C++ code
+// in tokenizer.cc and text_format.cc.
+var (
+	literals = map[string]interface{}{
+		// These exact literals are the ones supported in C++.
+		// In C++, a 1-bit unsigned integers is also allowed to represent
+		// a boolean. This is handled in Value.Bool.
+		"t":     true,
+		"true":  true,
+		"True":  true,
+		"f":     false,
+		"false": false,
+		"False": false,
+
+		// C++ permits "-nan" and the case-insensitive variants of these.
+		// However, Go continues to be case-sensitive.
+		"nan":  math.NaN(),
+		"inf":  math.Inf(+1),
+		"-inf": math.Inf(-1),
+	}
+	literalRegexp = regexp.MustCompile("^-?[a-zA-Z]+")
+	intRegexp     = regexp.MustCompile("^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)")
+	floatRegexp   = regexp.MustCompile("^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)")
+)
+
+// unmarshalNumber decodes a Bool, Int, Uint, or Float from the input.
+func (p *decoder) unmarshalNumber() (Value, error) {
+	v, n, err := consumeNumber(p.in)
+	p.consume(n)
+	return v, err
+}
+func consumeNumber(in []byte) (Value, int, error) {
+	if len(in) == 0 {
+		return Value{}, 0, io.ErrUnexpectedEOF
+	}
+	if n := matchWithDelim(literalRegexp, in); n > 0 {
+		if v, ok := literals[string(in[:n])]; ok {
+			return rawValueOf(v, in[:n:n]), n, nil
+		}
+	}
+	if n := matchWithDelim(floatRegexp, in); n > 0 {
+		if bytes.ContainsAny(in[:n], ".eEfF") {
+			s := strings.TrimRight(string(in[:n]), "fF")
+			f, err := strconv.ParseFloat(s, 64)
+			if err != nil {
+				return Value{}, 0, err
+			}
+			return rawValueOf(f, in[:n:n]), n, nil
+		}
+	}
+	if n := matchWithDelim(intRegexp, in); n > 0 {
+		if in[0] == '-' {
+			v, err := strconv.ParseInt(string(in[:n]), 0, 64)
+			if err != nil {
+				return Value{}, 0, err
+			}
+			return rawValueOf(v, in[:n:n]), n, nil
+		} else {
+			v, err := strconv.ParseUint(string(in[:n]), 0, 64)
+			if err != nil {
+				return Value{}, 0, err
+			}
+			return rawValueOf(v, in[:n:n]), n, nil
+		}
+	}
+	return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
+}

diff --git a/internal/encoding/text/string.go b/internal/encoding/text/string.go
new file mode 100644
index 0000000..cc83e85
--- /dev/null
+++ b/internal/encoding/text/string.go

@@ -0,0 +1,229 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+import (
+	"bytes"
+	"io"
+	"math"
+	"math/bits"
+	"strconv"
+	"strings"
+	"unicode"
+	"unicode/utf16"
+	"unicode/utf8"
+
+	"google.golang.org/proto/internal/errors"
+)
+
+func (p *encoder) marshalString(v Value) error {
+	var err error
+	p.out, err = appendString(p.out, v, p.outputASCII)
+	return err
+}
+func appendString(out []byte, v Value, outputASCII bool) ([]byte, error) {
+	if v.Type() != String {
+		return nil, errors.New("invalid type %v, expected string", v.Type())
+	}
+	if len(v.raw) > 0 {
+		return append(out, v.raw...), nil
+	}
+	in := v.String()
+
+	out = append(out, '"')
+	i := indexNeedEscape(in)
+	in, out = in[i:], append(out, in[:i]...)
+	for len(in) > 0 {
+		switch r, n := utf8.DecodeRuneInString(in); {
+		case r == utf8.RuneError && n == 1:
+			// We do not report invalid UTF-8 because strings in the text format
+			// are used to represent both the proto string and bytes type.
+			r = rune(in[0])
+			fallthrough
+		case r < ' ' || r == '"' || r == '\\':
+			out = append(out, '\\')
+			switch r {
+			case '"', '\\':
+				out = append(out, byte(r))
+			case '\n':
+				out = append(out, 'n')
+			case '\r':
+				out = append(out, 'r')
+			case '\t':
+				out = append(out, 't')
+			default:
+				out = append(out, 'x')
+				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
+				out = strconv.AppendUint(out, uint64(r), 16)
+			}
+			in = in[n:]
+		case outputASCII && r >= utf8.RuneSelf:
+			out = append(out, '\\')
+			if r <= math.MaxUint16 {
+				out = append(out, 'u')
+				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
+				out = strconv.AppendUint(out, uint64(r), 16)
+			} else {
+				out = append(out, 'U')
+				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
+				out = strconv.AppendUint(out, uint64(r), 16)
+			}
+			in = in[n:]
+		default:
+			i := indexNeedEscape(in[n:])
+			in, out = in[n+i:], append(out, in[:n+i]...)
+		}
+	}
+	out = append(out, '"')
+	return out, nil
+}
+
+func (p *decoder) unmarshalString() (Value, error) {
+	v, n, err := consumeString(p.in)
+	p.consume(n)
+	return v, err
+}
+func consumeString(in []byte) (Value, int, error) {
+	var nerr errors.NonFatal
+	in0 := in
+	if len(in) == 0 {
+		return Value{}, 0, io.ErrUnexpectedEOF
+	}
+	quote := in[0]
+	if in[0] != '"' && in[0] != '\'' {
+		return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
+	}
+	in = in[1:]
+	i := indexNeedEscape(string(in))
+	in, out := in[i:], in[:i:i] // set cap to prevent mutations
+	for len(in) > 0 {
+		switch r, n := utf8.DecodeRune(in); {
+		case r == utf8.RuneError && n == 1:
+			nerr.AppendInvalidUTF8("")
+			in, out = in[1:], append(out, in[0]) // preserve invalid byte
+		case r == 0 || r == '\n':
+			return Value{}, 0, newSyntaxError("invalid character %q in string", r)
+		case r == rune(quote):
+			in = in[1:]
+			n := len(in0) - len(in)
+			v := rawValueOf(string(out), in0[:n:n])
+			return v, n, nerr.E
+		case r == '\\':
+			if len(in) < 2 {
+				return Value{}, 0, io.ErrUnexpectedEOF
+			}
+			switch r := in[1]; r {
+			case '"', '\'', '\\', '?':
+				in, out = in[2:], append(out, r)
+			case 'a':
+				in, out = in[2:], append(out, '\a')
+			case 'b':
+				in, out = in[2:], append(out, '\b')
+			case 'n':
+				in, out = in[2:], append(out, '\n')
+			case 'r':
+				in, out = in[2:], append(out, '\r')
+			case 't':
+				in, out = in[2:], append(out, '\t')
+			case 'v':
+				in, out = in[2:], append(out, '\v')
+			case 'f':
+				in, out = in[2:], append(out, '\f')
+			case '0', '1', '2', '3', '4', '5', '6', '7':
+				// One, two, or three octal characters.
+				n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567"))
+				if n > 3 {
+					n = 3
+				}
+				v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8)
+				if err != nil {
+					return Value{}, 0, newSyntaxError("invalid octal escape code %q in string", in[:1+n])
+				}
+				in, out = in[1+n:], append(out, byte(v))
+			case 'x':
+				// One or two hexadecimal characters.
+				n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF"))
+				if n > 2 {
+					n = 2
+				}
+				v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8)
+				if err != nil {
+					return Value{}, 0, newSyntaxError("invalid hex escape code %q in string", in[:2+n])
+				}
+				in, out = in[2+n:], append(out, byte(v))
+			case 'u', 'U':
+				// Four or eight hexadecimal characters
+				n := 6
+				if r == 'U' {
+					n = 10
+				}
+				if len(in) < n {
+					return Value{}, 0, io.ErrUnexpectedEOF
+				}
+				v, err := strconv.ParseUint(string(in[2:n]), 16, 32)
+				if utf8.MaxRune < v || err != nil {
+					return Value{}, 0, newSyntaxError("invalid Unicode escape code %q in string", in[:n])
+				}
+				in = in[n:]
+
+				r := rune(v)
+				if utf16.IsSurrogate(r) {
+					if len(in) < 6 {
+						return Value{}, 0, io.ErrUnexpectedEOF
+					}
+					v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
+					r = utf16.DecodeRune(r, rune(v))
+					if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
+						return Value{}, 0, newSyntaxError("invalid Unicode escape code %q in string", in[:6])
+					}
+					in = in[6:]
+				}
+				out = append(out, string(r)...)
+			default:
+				return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
+			}
+		default:
+			i := indexNeedEscape(string(in[n:]))
+			in, out = in[n+i:], append(out, in[:n+i]...)
+		}
+	}
+	return Value{}, 0, io.ErrUnexpectedEOF
+}
+
+// unmarshalStrings unmarshals multiple strings.
+// This differs from unmarshalString since the text format allows
+// multiple back-to-back string literals where they are semantically treated
+// as a single large string with all values concatenated.
+//
+// E.g., `"foo" "bar" "baz"` => ValueOf("foobarbaz")
+func (p *decoder) unmarshalStrings() (Value, error) {
+	// Note that the ending quote is sufficient to unambiguously mark the end
+	// of a string. Thus, the text grammar does not require intervening
+	// whitespace or control characters in-between strings.
+	// Thus, the following is valid:
+	//	`"foo"'bar'"baz"` => ValueOf("foobarbaz")
+	b := p.in
+	var ss []string
+	for len(p.in) > 0 && (p.in[0] == '"' || p.in[0] == '\'') {
+		v, err := p.unmarshalString()
+		if !p.nerr.Merge(err) {
+			return Value{}, err
+		}
+		ss = append(ss, v.String())
+	}
+	b = b[:len(b)-len(p.in)]
+	return rawValueOf(strings.Join(ss, ""), b[:len(b):len(b)]), nil
+}
+
+// indexNeedEscape returns the index of the next character that needs escaping.
+// If no characters need escaping, this returns the input length.
+func indexNeedEscape(s string) int {
+	for i := 0; i < len(s); i++ {
+		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
+			return i
+		}
+	}
+	return len(s)
+}

diff --git a/internal/encoding/text/text_test.go b/internal/encoding/text/text_test.go
new file mode 100644
index 0000000..9b3bff6
--- /dev/null
+++ b/internal/encoding/text/text_test.go

@@ -0,0 +1,860 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+import (
+	"fmt"
+	"math"
+	"strings"
+	"testing"
+	"unicode/utf8"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/go-cmp/cmp/cmpopts"
+	"google.golang.org/proto/internal/flags"
+	"google.golang.org/proto/reflect/protoreflect"
+)
+
+func Test(t *testing.T) {
+	const space = " \n\r\t"
+	var S = fmt.Sprintf
+	var V = ValueOf
+	var ID = func(n protoreflect.Name) Value { return V(n) }
+	type Lst = []Value
+	type Msg = [][2]Value
+
+	tests := []struct {
+		in             string
+		wantVal        Value
+		wantOut        string
+		wantOutBracket string
+		wantOutASCII   string
+		wantOutIndent  string
+		wantErr        string
+	}{{
+		in:            "",
+		wantVal:       V(Msg{}),
+		wantOutIndent: "\n",
+	}, {
+		in:      S("%s# hello%s", space, space),
+		wantVal: V(Msg{}),
+	}, {
+		in:      S("%s# hello\rfoo:bar", space),
+		wantVal: V(Msg{}),
+	}, {
+		// Comments only extend until the newline.
+		in:            S("%s# hello\nfoo:bar", space),
+		wantVal:       V(Msg{{ID("foo"), ID("bar")}}),
+		wantOut:       "foo:bar",
+		wantOutIndent: "foo: bar\n",
+	}, {
+		// NUL is an invalid whitespace since C++ uses C-strings.
+		in:      "\x00",
+		wantErr: `invalid "\x00" as identifier`,
+	}, {
+		in:      "foo:0",
+		wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
+		wantOut: "foo:0",
+	}, {
+		in:      S("%sfoo%s:0", space, space),
+		wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
+	}, {
+		in:      "foo bar:0",
+		wantErr: `expected ':' after message key`,
+	}, {
+		in:            "[foo]:0",
+		wantVal:       V(Msg{{V("foo"), V(uint32(0))}}),
+		wantOut:       "[foo]:0",
+		wantOutIndent: "[foo]: 0\n",
+	}, {
+		in:      S("%s[%sfoo%s]%s:0", space, space, space, space),
+		wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
+	}, {
+		in:            "[proto.package.name]:0",
+		wantVal:       V(Msg{{V("proto.package.name"), V(uint32(0))}}),
+		wantOut:       "[proto.package.name]:0",
+		wantOutIndent: "[proto.package.name]: 0\n",
+	}, {
+		in:      S("%s[%sproto.package.name%s]%s:0", space, space, space, space),
+		wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
+	}, {
+		in:            "['sub.domain.com\x2fpath\x2fto\x2fproto.package.name']:0",
+		wantVal:       V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
+		wantOut:       "[sub.domain.com/path/to/proto.package.name]:0",
+		wantOutIndent: "[sub.domain.com/path/to/proto.package.name]: 0\n",
+	}, {
+		in:      "[\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"]:0",
+		wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
+	}, {
+		in:      S("%s[%s'sub.domain.com\x2fpath\x2fto\x2fproto.package.name'%s]%s:0", space, space, space, space),
+		wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
+	}, {
+		in:      S("%s[%s\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"%s]%s:0", space, space, space, space),
+		wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
+	}, {
+		in:            `['http://example.com/path/to/proto.package.name']:0`,
+		wantVal:       V(Msg{{V("http://example.com/path/to/proto.package.name"), V(uint32(0))}}),
+		wantOut:       `["http://example.com/path/to/proto.package.name"]:0`,
+		wantOutIndent: `["http://example.com/path/to/proto.package.name"]: 0` + "\n",
+	}, {
+		in:      "[proto.package.name:0",
+		wantErr: `invalid character ':', expected ']' at end of extension name`,
+	}, {
+		in:      "[proto.package name]:0",
+		wantErr: `invalid character 'n', expected ']' at end of extension name`,
+	}, {
+		in:      `["proto.package" "name"]:0`,
+		wantErr: `invalid character '"', expected ']' at end of extension name`,
+	}, {
+		in:      `["\z"]`,
+		wantErr: `invalid escape code "\\z" in string`,
+	}, {
+		in:      "[$]",
+		wantErr: `invalid "$" as identifier`,
+	}, {
+		// This parses fine, but should result in a error later since no
+		// type name in proto will ever be just a number.
+		in:      "[20]:0",
+		wantVal: V(Msg{{V("20"), V(uint32(0))}}),
+		wantOut: "[20]:0",
+	}, {
+		in:      "20:0",
+		wantVal: V(Msg{{V(uint32(20)), V(uint32(0))}}),
+		wantOut: "20:0",
+	}, {
+		in:      "0x20:0",
+		wantVal: V(Msg{{V(uint32(0x20)), V(uint32(0))}}),
+		wantOut: "32:0",
+	}, {
+		in:      "020:0",
+		wantVal: V(Msg{{V(uint32(020)), V(uint32(0))}}),
+		wantOut: "16:0",
+	}, {
+		in:      "-20:0",
+		wantErr: `invalid "-20" as identifier`,
+	}, {
+		in: `foo:true bar:"s" baz:{} qux:[] wib:id`,
+		wantVal: V(Msg{
+			{ID("foo"), V(true)},
+			{ID("bar"), V("s")},
+			{ID("baz"), V(Msg{})},
+			{ID("qux"), V(Lst{})},
+			{ID("wib"), ID("id")},
+		}),
+		wantOut:       `foo:true bar:"s" baz:{} qux:[] wib:id`,
+		wantOutIndent: "foo: true\nbar: \"s\"\nbaz: {}\nqux: []\nwib: id\n",
+	}, {
+		in: S(`%sfoo%s:%strue%s %sbar%s:%s"s"%s %sbaz%s:%s<>%s %squx%s:%s[]%s %swib%s:%sid%s`,
+			space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space),
+		wantVal: V(Msg{
+			{ID("foo"), V(true)},
+			{ID("bar"), V("s")},
+			{ID("baz"), V(Msg{})},
+			{ID("qux"), V(Lst{})},
+			{ID("wib"), ID("id")},
+		}),
+	}, {
+		in:            `foo:true;`,
+		wantVal:       V(Msg{{ID("foo"), V(true)}}),
+		wantOut:       "foo:true",
+		wantOutIndent: "foo: true\n",
+	}, {
+		in:      `foo:true,`,
+		wantVal: V(Msg{{ID("foo"), V(true)}}),
+	}, {
+		in:      `foo:bar;,`,
+		wantErr: `invalid "," as identifier`,
+	}, {
+		in:      `foo:bar,;`,
+		wantErr: `invalid ";" as identifier`,
+	}, {
+		in:      `footrue`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `foo true`,
+		wantErr: `expected ':' after message key`,
+	}, {
+		in:      `foo"s"`,
+		wantErr: `expected ':' after message key`,
+	}, {
+		in:      `foo "s"`,
+		wantErr: `expected ':' after message key`,
+	}, {
+		in:             `foo{}`,
+		wantVal:        V(Msg{{ID("foo"), V(Msg{})}}),
+		wantOut:        "foo:{}",
+		wantOutBracket: "foo:<>",
+		wantOutIndent:  "foo: {}\n",
+	}, {
+		in:      `foo {}`,
+		wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
+	}, {
+		in:      `foo<>`,
+		wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
+	}, {
+		in:      `foo <>`,
+		wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
+	}, {
+		in:      `foo[]`,
+		wantErr: `expected ':' after message key`,
+	}, {
+		in:      `foo []`,
+		wantErr: `expected ':' after message key`,
+	}, {
+		in:      `foo:truebar:true`,
+		wantErr: `invalid ":" as identifier`,
+	}, {
+		in:            `foo:"s"bar:true`,
+		wantVal:       V(Msg{{ID("foo"), V("s")}, {ID("bar"), V(true)}}),
+		wantOut:       `foo:"s" bar:true`,
+		wantOutIndent: "foo: \"s\"\nbar: true\n",
+	}, {
+		in:      `foo:0bar:true`,
+		wantErr: `invalid "0bar" as number or bool`,
+	}, {
+		in:             `foo:{}bar:true`,
+		wantVal:        V(Msg{{ID("foo"), V(Msg{})}, {ID("bar"), V(true)}}),
+		wantOut:        "foo:{} bar:true",
+		wantOutBracket: "foo:<> bar:true",
+		wantOutIndent:  "foo: {}\nbar: true\n",
+	}, {
+		in:      `foo:[]bar:true`,
+		wantVal: V(Msg{{ID("foo"), V(Lst{})}, {ID("bar"), V(true)}}),
+	}, {
+		in:             `foo{bar:true}`,
+		wantVal:        V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
+		wantOut:        "foo:{bar:true}",
+		wantOutBracket: "foo:<bar:true>",
+		wantOutIndent:  "foo: {\n\tbar: true\n}\n",
+	}, {
+		in:      `foo<bar:true>`,
+		wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
+	}, {
+		in:      `foo{bar:true,}`,
+		wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
+	}, {
+		in:      `foo{bar:true;}`,
+		wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
+	}, {
+		in:      `foo{`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `foo{ `,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `foo{[`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `foo{[ `,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `foo{bar:true,;}`,
+		wantErr: `invalid ";" as identifier`,
+	}, {
+		in:      `foo{bar:true;,}`,
+		wantErr: `invalid "," as identifier`,
+	}, {
+		in:             `foo<bar:{}>`,
+		wantVal:        V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(Msg{})}})}}),
+		wantOut:        "foo:{bar:{}}",
+		wantOutBracket: "foo:<bar:<>>",
+		wantOutIndent:  "foo: {\n\tbar: {}\n}\n",
+	}, {
+		in:      `foo<bar:{>`,
+		wantErr: `invalid character '>', expected '}' at end of message`,
+	}, {
+		in:      `foo<bar:{}`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:             `arr:[]`,
+		wantVal:        V(Msg{{ID("arr"), V(Lst{})}}),
+		wantOut:        "arr:[]",
+		wantOutBracket: "arr:[]",
+		wantOutIndent:  "arr: []\n",
+	}, {
+		in:      `arr:[,]`,
+		wantErr: `invalid "," as number or bool`,
+	}, {
+		in:      `arr:[0 0]`,
+		wantErr: `invalid character '0', expected ']' at end of list`,
+	}, {
+		in:             `arr:["foo" "bar"]`,
+		wantVal:        V(Msg{{ID("arr"), V(Lst{V("foobar")})}}),
+		wantOut:        `arr:["foobar"]`,
+		wantOutBracket: `arr:["foobar"]`,
+		wantOutIndent:  "arr: [\n\t\"foobar\"\n]\n",
+	}, {
+		in:      `arr:[0,]`,
+		wantErr: `invalid "]" as number or bool`,
+	}, {
+		in: `arr:[true,0,"",id,[],{}]`,
+		wantVal: V(Msg{{ID("arr"), V(Lst{
+			V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
+		})}}),
+		wantOut:        `arr:[true,0,"",id,[],{}]`,
+		wantOutBracket: `arr:[true,0,"",id,[],<>]`,
+		wantOutIndent:  "arr: [\n\ttrue,\n\t0,\n\t\"\",\n\tid,\n\t[],\n\t{}\n]\n",
+	}, {
+		in: S(`arr:[%strue%s,%s0%s,%s""%s,%sid%s,%s[]%s,%s{}%s]`,
+			space, space, space, space, space, space, space, space, space, space, space, space),
+		wantVal: V(Msg{{ID("arr"), V(Lst{
+			V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
+		})}}),
+	}, {
+		in:      `arr:[`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `{`,
+		wantErr: `invalid "{" as identifier`,
+	}, {
+		in:      `<`,
+		wantErr: `invalid "<" as identifier`,
+	}, {
+		in:      `[`,
+		wantErr: "unexpected EOF",
+	}, {
+		in:      `}`,
+		wantErr: "1 bytes of unconsumed input",
+	}, {
+		in:      `>`,
+		wantErr: "1 bytes of unconsumed input",
+	}, {
+		in:      `]`,
+		wantErr: `invalid "]" as identifier`,
+	}, {
+		in:      `str: "'"`,
+		wantVal: V(Msg{{ID("str"), V(`'`)}}),
+		wantOut: `str:"'"`,
+	}, {
+		in:      `str: '"'`,
+		wantVal: V(Msg{{ID("str"), V(`"`)}}),
+		wantOut: `str:"\""`,
+	}, {
+		// String that has as few escaped characters as possible.
+		in: `str: ` + func() string {
+			var b []byte
+			for i := 0; i < utf8.RuneSelf; i++ {
+				switch i {
+				case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
+				default:
+					b = append(b, byte(i))
+				}
+			}
+			return "'" + string(b) + "'"
+		}(),
+		wantVal:      V(Msg{{ID("str"), V("\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f")}}),
+		wantOut:      `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
+		wantOutASCII: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
+	}, {
+		in:           "str: '\xde\xad\xbe\xef'",
+		wantVal:      V(Msg{{ID("str"), V("\xde\xad\xbe\xef")}}),
+		wantOut:      "str:\"\u07ad\\xbe\\xef\"",
+		wantOutASCII: `str:"\u07ad\xbe\xef"`,
+		wantErr:      "invalid UTF-8 detected",
+	}, {
+		// Valid UTF-8 wire encoding, but sub-optimal encoding.
+		in:           "str: '\xc0\x80'",
+		wantVal:      V(Msg{{ID("str"), V("\xc0\x80")}}),
+		wantOut:      `str:"\xc0\x80"`,
+		wantOutASCII: `str:"\xc0\x80"`,
+		wantErr:      "invalid UTF-8 detected",
+	}, {
+		// Valid UTF-8 wire encoding, but invalid rune (surrogate pair).
+		in:           "str: '\xed\xa0\x80'",
+		wantVal:      V(Msg{{ID("str"), V("\xed\xa0\x80")}}),
+		wantOut:      `str:"\xed\xa0\x80"`,
+		wantOutASCII: `str:"\xed\xa0\x80"`,
+		wantErr:      "invalid UTF-8 detected",
+	}, {
+		// Valid UTF-8 wire encoding, but invalid rune (above max rune).
+		in:           "str: '\xf7\xbf\xbf\xbf'",
+		wantVal:      V(Msg{{ID("str"), V("\xf7\xbf\xbf\xbf")}}),
+		wantOut:      `str:"\xf7\xbf\xbf\xbf"`,
+		wantOutASCII: `str:"\xf7\xbf\xbf\xbf"`,
+		wantErr:      "invalid UTF-8 detected",
+	}, {
+		// Valid UTF-8 wire encoding of the RuneError rune.
+		in:           "str: '\xef\xbf\xbd'",
+		wantVal:      V(Msg{{ID("str"), V(string(utf8.RuneError))}}),
+		wantOut:      `str:"` + string(utf8.RuneError) + `"`,
+		wantOutASCII: `str:"\ufffd"`,
+	}, {
+		in:           "str: 'hello\u1234world'",
+		wantVal:      V(Msg{{ID("str"), V("hello\u1234world")}}),
+		wantOut:      "str:\"hello\u1234world\"",
+		wantOutASCII: `str:"hello\u1234world"`,
+	}, {
+		in:           `str: '\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`,
+		wantVal:      V(Msg{{ID("str"), V("\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff")}}),
+		wantOut:      `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
+		wantOutASCII: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
+	}, {
+		in:      `str: '`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `str: '\`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `str: '\'`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `str: '\8'`,
+		wantErr: `invalid escape code "\\8" in string`,
+	}, {
+		in:           `str: '\1x'`,
+		wantVal:      V(Msg{{ID("str"), V("\001x")}}),
+		wantOut:      `str:"\x01x"`,
+		wantOutASCII: `str:"\x01x"`,
+	}, {
+		in:           `str: '\12x'`,
+		wantVal:      V(Msg{{ID("str"), V("\012x")}}),
+		wantOut:      `str:"\nx"`,
+		wantOutASCII: `str:"\nx"`,
+	}, {
+		in:           `str: '\123x'`,
+		wantVal:      V(Msg{{ID("str"), V("\123x")}}),
+		wantOut:      `str:"Sx"`,
+		wantOutASCII: `str:"Sx"`,
+	}, {
+		in:           `str: '\1234x'`,
+		wantVal:      V(Msg{{ID("str"), V("\1234x")}}),
+		wantOut:      `str:"S4x"`,
+		wantOutASCII: `str:"S4x"`,
+	}, {
+		in:           `str: '\1'`,
+		wantVal:      V(Msg{{ID("str"), V("\001")}}),
+		wantOut:      `str:"\x01"`,
+		wantOutASCII: `str:"\x01"`,
+	}, {
+		in:           `str: '\12'`,
+		wantVal:      V(Msg{{ID("str"), V("\012")}}),
+		wantOut:      `str:"\n"`,
+		wantOutASCII: `str:"\n"`,
+	}, {
+		in:           `str: '\123'`,
+		wantVal:      V(Msg{{ID("str"), V("\123")}}),
+		wantOut:      `str:"S"`,
+		wantOutASCII: `str:"S"`,
+	}, {
+		in:           `str: '\1234'`,
+		wantVal:      V(Msg{{ID("str"), V("\1234")}}),
+		wantOut:      `str:"S4"`,
+		wantOutASCII: `str:"S4"`,
+	}, {
+		in:           `str: '\377'`,
+		wantVal:      V(Msg{{ID("str"), V("\377")}}),
+		wantOut:      `str:"\xff"`,
+		wantOutASCII: `str:"\xff"`,
+	}, {
+		// Overflow octal escape.
+		in:      `str: '\400'`,
+		wantErr: `invalid octal escape code "\\400" in string`,
+	}, {
+		in:           `str: '\xfx'`,
+		wantVal:      V(Msg{{ID("str"), V("\x0fx")}}),
+		wantOut:      `str:"\x0fx"`,
+		wantOutASCII: `str:"\x0fx"`,
+	}, {
+		in:           `str: '\xffx'`,
+		wantVal:      V(Msg{{ID("str"), V("\xffx")}}),
+		wantOut:      `str:"\xffx"`,
+		wantOutASCII: `str:"\xffx"`,
+	}, {
+		in:           `str: '\xfffx'`,
+		wantVal:      V(Msg{{ID("str"), V("\xfffx")}}),
+		wantOut:      `str:"\xfffx"`,
+		wantOutASCII: `str:"\xfffx"`,
+	}, {
+		in:           `str: '\xf'`,
+		wantVal:      V(Msg{{ID("str"), V("\x0f")}}),
+		wantOut:      `str:"\x0f"`,
+		wantOutASCII: `str:"\x0f"`,
+	}, {
+		in:           `str: '\xff'`,
+		wantVal:      V(Msg{{ID("str"), V("\xff")}}),
+		wantOut:      `str:"\xff"`,
+		wantOutASCII: `str:"\xff"`,
+	}, {
+		in:           `str: '\xfff'`,
+		wantVal:      V(Msg{{ID("str"), V("\xfff")}}),
+		wantOut:      `str:"\xfff"`,
+		wantOutASCII: `str:"\xfff"`,
+	}, {
+		in:      `str: '\xz'`,
+		wantErr: `invalid hex escape code "\\x" in string`,
+	}, {
+		in:      `str: '\uPo'`,
+		wantErr: `unexpected EOF`,
+	}, {
+		in:      `str: '\uPoo'`,
+		wantErr: `invalid Unicode escape code "\\uPoo'" in string`,
+	}, {
+		in:      `str: '\uPoop'`,
+		wantErr: `invalid Unicode escape code "\\uPoop" in string`,
+	}, {
+		// Unmatched surrogate pair.
+		in:      `str: '\uDEAD'`,
+		wantErr: `unexpected EOF`, // trying to reader other half
+	}, {
+		// Surrogate pair with invalid other half.
+		in:      `str: '\uDEAD\u0000'`,
+		wantErr: `invalid Unicode escape code "\\u0000" in string`,
+	}, {
+		// Properly matched surrogate pair.
+		in:           `str: '\uD800\uDEAD'`,
+		wantVal:      V(Msg{{ID("str"), V("𐊭")}}),
+		wantOut:      `str:"𐊭"`,
+		wantOutASCII: `str:"\U000102ad"`,
+	}, {
+		// Overflow on Unicode rune.
+		in:      `str: '\U00110000'`,
+		wantErr: `invalid Unicode escape code "\\U00110000" in string`,
+	}, {
+		in:      `str: '\z'`,
+		wantErr: `invalid escape code "\\z" in string`,
+	}, {
+		// Strings cannot have NUL literal since C-style strings forbid them.
+		in:      "str: '\x00'",
+		wantErr: `invalid character '\x00' in string`,
+	}, {
+		// Strings cannot have newline literal. The C++ permits them if an
+		// option is specified to allow them. In Go, we always forbid them.
+		in:      "str: '\n'",
+		wantErr: `invalid character '\n' in string`,
+	}, {
+		in:           "name: \"My name is \"\n\"elsewhere\"",
+		wantVal:      V(Msg{{ID("name"), V("My name is elsewhere")}}),
+		wantOut:      `name:"My name is elsewhere"`,
+		wantOutASCII: `name:"My name is elsewhere"`,
+	}, {
+		in:      "name: 'My name is '\n'elsewhere'",
+		wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
+	}, {
+		in:      "name: 'My name is '\n\"elsewhere\"",
+		wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
+	}, {
+		in:      "name: \"My name is \"\n'elsewhere'",
+		wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
+	}, {
+		in:      "name: \"My \"'name '\"is \"\n'elsewhere'",
+		wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
+	}, {
+		in:      `crazy:"x'"'\""\''"'z"`,
+		wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}),
+	}, {
+		in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`,
+		wantVal: V(Msg{{ID("nums"), V(Lst{
+			V(true),
+			ID("T"),
+			V(true),
+			V(true),
+			ID("TRUE"),
+			V(false),
+			ID("F"),
+			V(false),
+			V(false),
+			ID("FALSE"),
+		})}}),
+		wantOut:       "nums:[true,T,true,true,TRUE,false,F,false,false,FALSE]",
+		wantOutIndent: "nums: [\n\ttrue,\n\tT,\n\ttrue,\n\ttrue,\n\tTRUE,\n\tfalse,\n\tF,\n\tfalse,\n\tfalse,\n\tFALSE\n]\n",
+	}, {
+		in: `nums: [nan,inf,-inf,NaN,NAN,Inf,INF]`,
+		wantVal: V(Msg{{ID("nums"), V(Lst{
+			V(math.NaN()),
+			V(math.Inf(+1)),
+			V(math.Inf(-1)),
+			ID("NaN"),
+			ID("NAN"),
+			ID("Inf"),
+			ID("INF"),
+		})}}),
+		wantOut:       "nums:[nan,inf,-inf,NaN,NAN,Inf,INF]",
+		wantOutIndent: "nums: [\n\tnan,\n\tinf,\n\t-inf,\n\tNaN,\n\tNAN,\n\tInf,\n\tINF\n]\n",
+	}, {
+		// C++ permits this, but we currently reject this.
+		in:      `num: -nan`,
+		wantErr: `invalid "-nan" as number or bool`,
+	}, {
+		in: `nums: [0,-0,-9876543210,9876543210,0x0,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`,
+		wantVal: V(Msg{{ID("nums"), V(Lst{
+			V(uint32(0)),
+			V(int32(-0)),
+			V(int64(-9876543210)),
+			V(uint64(9876543210)),
+			V(uint32(0x0)),
+			V(uint64(0x0123456789abcdef)),
+			V(int64(-0x0123456789abcdef)),
+			V(uint64(01234567)),
+			V(int64(-01234567)),
+		})}}),
+		wantOut:       "nums:[0,0,-9876543210,9876543210,0,81985529216486895,-81985529216486895,342391,-342391]",
+		wantOutIndent: "nums: [\n\t0,\n\t0,\n\t-9876543210,\n\t9876543210,\n\t0,\n\t81985529216486895,\n\t-81985529216486895,\n\t342391,\n\t-342391\n]\n",
+	}, {
+		in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
+		wantVal: V(Msg{{ID("nums"), V(Lst{
+			V(0.0),
+			V(0.0),
+			V(1.0),
+			V(10.0),
+			V(-0.0),
+			V(-1.0),
+			V(-10.0),
+			V(1.0),
+			V(0.1e-3),
+			V(1.5e+5),
+			V(1.0e+10),
+			V(0.0),
+		})}}),
+		wantOut:       "nums:[0,0,1,10,0,-1,-10,1,0.0001,150000,1e+10,0]",
+		wantOutIndent: "nums: [\n\t0,\n\t0,\n\t1,\n\t10,\n\t0,\n\t-1,\n\t-10,\n\t1,\n\t0.0001,\n\t150000,\n\t1e+10,\n\t0\n]\n",
+	}, {
+		in: `nums: [0xbeefbeef,0xbeefbeefbeefbeef]`,
+		wantVal: V(Msg{{ID("nums"), func() Value {
+			if flags.Proto1Legacy {
+				return V(Lst{V(int32(-1091584273)), V(int64(-4688318750159552785))})
+			} else {
+				return V(Lst{V(uint32(0xbeefbeef)), V(uint64(0xbeefbeefbeefbeef))})
+			}
+		}()}}),
+	}, {
+		in:      `num: +0`,
+		wantErr: `invalid "+0" as number or bool`,
+	}, {
+		in:      `num: 01.1234`,
+		wantErr: `invalid "01.1234" as number or bool`,
+	}, {
+		in:      `num: 0x`,
+		wantErr: `invalid "0x" as number or bool`,
+	}, {
+		in:      `num: 0xX`,
+		wantErr: `invalid "0xX" as number or bool`,
+	}, {
+		in:      `num: 0800`,
+		wantErr: `invalid "0800" as number or bool`,
+	}, {
+		in:      `num: true.`,
+		wantErr: `invalid "true." as number or bool`,
+	}, {
+		in:      `num: .`,
+		wantErr: `parsing ".": invalid syntax`,
+	}, {
+		in:      `num: -.`,
+		wantErr: `parsing "-.": invalid syntax`,
+	}, {
+		in:      `num: 1e10000`,
+		wantErr: `parsing "1e10000": value out of range`,
+	}, {
+		in:      `num: 99999999999999999999`,
+		wantErr: `parsing "99999999999999999999": value out of range`,
+	}, {
+		in:      `num: -99999999999999999999`,
+		wantErr: `parsing "-99999999999999999999": value out of range`,
+	}, {
+		in:      "x:  -",
+		wantErr: `syntax error (line 1:5)`,
+	}, {
+		in:      "x:[\"💩\"x",
+		wantErr: `syntax error (line 1:7)`,
+	}, {
+		in:      "x:\n\n[\"🔥🔥🔥\"x",
+		wantErr: `syntax error (line 3:7)`,
+	}, {
+		in:      "x:[\"👍🏻👍🏿\"x",
+		wantErr: `syntax error (line 1:10)`, // multi-rune emojis; could be column:8
+	}, {
+		in: `
+			firstName : "John",
+			lastName : "Smith" ,
+			isAlive : true,
+			age : 27,
+			address { # missing colon is okay for messages
+			    streetAddress : "21 2nd Street" ,
+			    city : "New York" ,
+			    state : "NY" ,
+			    postalCode : "10021-3100" ; # trailing semicolon is okay
+			},
+			phoneNumbers : [ {
+			    type : "home" ,
+			    number : "212 555-1234"
+			} , {
+			    type : "office" ,
+			    number : "646 555-4567"
+			} , {
+			    type : "mobile" ,
+			    number : "123 456-7890" , # trailing comma is okay
+			} ],
+			children : [] ,
+			spouse : null`,
+		wantVal: V(Msg{
+			{ID("firstName"), V("John")},
+			{ID("lastName"), V("Smith")},
+			{ID("isAlive"), V(true)},
+			{ID("age"), V(27.0)},
+			{ID("address"), V(Msg{
+				{ID("streetAddress"), V("21 2nd Street")},
+				{ID("city"), V("New York")},
+				{ID("state"), V("NY")},
+				{ID("postalCode"), V("10021-3100")},
+			})},
+			{ID("phoneNumbers"), V([]Value{
+				V(Msg{
+					{ID("type"), V("home")},
+					{ID("number"), V("212 555-1234")},
+				}),
+				V(Msg{
+					{ID("type"), V("office")},
+					{ID("number"), V("646 555-4567")},
+				}),
+				V(Msg{
+					{ID("type"), V("mobile")},
+					{ID("number"), V("123 456-7890")},
+				}),
+			})},
+			{ID("children"), V([]Value{})},
+			{ID("spouse"), V(protoreflect.Name("null"))},
+		}),
+		wantOut:        `firstName:"John" lastName:"Smith" isAlive:true age:27 address:{streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"} phoneNumbers:[{type:"home" number:"212 555-1234"},{type:"office" number:"646 555-4567"},{type:"mobile" number:"123 456-7890"}] children:[] spouse:null`,
+		wantOutBracket: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:<streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"> phoneNumbers:[<type:"home" number:"212 555-1234">,<type:"office" number:"646 555-4567">,<type:"mobile" number:"123 456-7890">] children:[] spouse:null`,
+		wantOutIndent: `firstName: "John"
+lastName: "Smith"
+isAlive: true
+age: 27
+address: {
+	streetAddress: "21 2nd Street"
+	city: "New York"
+	state: "NY"
+	postalCode: "10021-3100"
+}
+phoneNumbers: [
+	{
+		type: "home"
+		number: "212 555-1234"
+	},
+	{
+		type: "office"
+		number: "646 555-4567"
+	},
+	{
+		type: "mobile"
+		number: "123 456-7890"
+	}
+]
+children: []
+spouse: null
+`,
+	}}
+
+	opts := cmp.Options{
+		cmpopts.EquateEmpty(),
+
+		// Transform composites (List and Message).
+		cmp.FilterValues(func(x, y Value) bool {
+			return (x.Type() == List && y.Type() == List) || (x.Type() == Message && y.Type() == Message)
+		}, cmp.Transformer("", func(v Value) interface{} {
+			if v.Type() == List {
+				return v.List()
+			} else {
+				return v.Message()
+			}
+		})),
+
+		// Compare scalars (Bool, Int, Uint, Float, String, Name).
+		cmp.FilterValues(func(x, y Value) bool {
+			return !(x.Type() == List && y.Type() == List) && !(x.Type() == Message && y.Type() == Message)
+		}, cmp.Comparer(func(x, y Value) bool {
+			if x.Type() == List || x.Type() == Message || y.Type() == List || y.Type() == Message {
+				return false
+			}
+			// Ensure golden value is always in x variable.
+			if len(x.raw) > 0 {
+				x, y = y, x
+			}
+			switch x.Type() {
+			case Bool:
+				want, _ := x.Bool()
+				got, ok := y.Bool()
+				return got == want && ok
+			case Int:
+				want, _ := x.Int(true)
+				got, ok := y.Int(want < math.MinInt32 || math.MaxInt32 < want)
+				return got == want && ok
+			case Uint:
+				want, _ := x.Uint(true)
+				got, ok := y.Uint(math.MaxUint32 < want)
+				return got == want && ok
+			case Float:
+				want, _ := x.Float(true)
+				got, ok := y.Float(math.MaxFloat32 < math.Abs(want))
+				if math.IsNaN(got) || math.IsNaN(want) {
+					return math.IsNaN(got) == math.IsNaN(want)
+				}
+				return got == want && ok
+			case Name:
+				want, _ := x.Name()
+				got, ok := y.Name()
+				return got == want && ok
+			default:
+				return x.String() == y.String()
+			}
+		})),
+	}
+	for _, tt := range tests {
+		t.Run("", func(t *testing.T) {
+			if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
+				gotVal, err := Unmarshal([]byte(tt.in))
+				if err == nil {
+					if tt.wantErr != "" {
+						t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
+					}
+				} else {
+					if tt.wantErr == "" {
+						t.Errorf("Unmarshal(): got %v, want nil error", err)
+					} else if !strings.Contains(err.Error(), tt.wantErr) {
+						t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
+					}
+				}
+				if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
+					t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
+				}
+			}
+			if tt.wantOut != "" {
+				gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, false)
+				if err != nil {
+					t.Errorf("Marshal(): got %v, want nil error", err)
+				}
+				if string(gotOut) != tt.wantOut {
+					t.Errorf("Marshal():\ngot:  %s\nwant: %s", gotOut, tt.wantOut)
+				}
+			}
+			if tt.wantOutBracket != "" {
+				gotOut, err := Marshal(tt.wantVal, "", [2]byte{'<', '>'}, false)
+				if err != nil {
+					t.Errorf("Marshal(Bracket): got %v, want nil error", err)
+				}
+				if string(gotOut) != tt.wantOutBracket {
+					t.Errorf("Marshal(Bracket):\ngot:  %s\nwant: %s", gotOut, tt.wantOutBracket)
+				}
+			}
+			if tt.wantOutASCII != "" {
+				gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, true)
+				if err != nil {
+					t.Errorf("Marshal(ASCII): got %v, want nil error", err)
+				}
+				if string(gotOut) != tt.wantOutASCII {
+					t.Errorf("Marshal(ASCII):\ngot:  %s\nwant: %s", gotOut, tt.wantOutASCII)
+				}
+			}
+			if tt.wantOutIndent != "" {
+				gotOut, err := Marshal(tt.wantVal, "\t", [2]byte{0, 0}, false)
+				if err != nil {
+					t.Errorf("Marshal(Indent): got %v, want nil error", err)
+				}
+				if string(gotOut) != tt.wantOutIndent {
+					t.Errorf("Marshal(Indent):\ngot:  %s\nwant: %s", gotOut, tt.wantOutIndent)
+				}
+			}
+		})
+	}
+}

diff --git a/internal/encoding/text/value.go b/internal/encoding/text/value.go
new file mode 100644
index 0000000..8dddd78
--- /dev/null
+++ b/internal/encoding/text/value.go

@@ -0,0 +1,327 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package text implements the text format for protocol buffers.
+// This package has no semantic understanding for protocol buffers and is only
+// a parser and composer for the format.
+//
+// There is no formal specification for the protobuf text format, as such the
+// C++ implementation (see google::protobuf::TextFormat) is the reference
+// implementation of the text format.
+//
+// This package is neither a superset nor a subset of the C++ implementation.
+// This implementation permits a more liberal grammar in some cases to be
+// backwards compatible with the historical Go implementation.
+// Future parsings unique to Go should not be added.
+// Some grammars allowed by the C++ implementation are deliberately
+// not implemented here because they are considered a bug by the protobuf team
+// and should not be replicated.
+//
+// The Go implementation should implement a sufficient amount of the C++
+// grammar such that the default text serialization by C++ can be parsed by Go.
+// However, just because the C++ parser accepts some input does not mean that
+// the Go implementation should as well.
+//
+// The text format is almost a superset of JSON except:
+//	* message keys are not quoted strings, but identifiers
+//	* the top-level value must be a message without the delimiters
+package text
+
+import (
+	"fmt"
+	"math"
+	"strings"
+
+	"google.golang.org/proto/internal/flags"
+	"google.golang.org/proto/reflect/protoreflect"
+)
+
+// Type represents a type expressible in the text format.
+type Type uint8
+
+const (
+	_ Type = iota
+
+	// Bool is a boolean (e.g., "true" or "false").
+	Bool
+	// Int is a signed integer (e.g., "-1423").
+	Int
+	// Uint is an unsigned integer (e.g., "0xdeadbeef").
+	Uint
+	// Float is a floating-point number (e.g., "1.234" or "1e100").
+	Float
+	// String is a quoted string (e.g., `"the quick brown fox"`).
+	String
+	// Name is a protocol buffer identifier (e.g., `field_name`).
+	Name
+	// List is an ordered list of values (e.g., `[0, "one", true]`).
+	List
+	// Message is an ordered map of values (e.g., `{"key": null}`).
+	Message
+)
+
+func (t Type) String() string {
+	switch t {
+	case Bool:
+		return "bool"
+	case Int:
+		return "int"
+	case Uint:
+		return "uint"
+	case Float:
+		return "float"
+	case String:
+		return "string"
+	case Name:
+		return "name"
+	case List:
+		return "list"
+	case Message:
+		return "message"
+	default:
+		return "<invalid>"
+	}
+}
+
+// Value contains a value of a given Type.
+type Value struct {
+	typ Type
+	raw []byte     // raw bytes of the serialized data
+	str string     // only for String or Name
+	num uint64     // only for Bool, Int, Uint, or Float
+	arr []Value    // only for List
+	obj [][2]Value // only for Message
+}
+
+// ValueOf returns a Value for a given Go value:
+//	bool               =>  Bool
+//	int32, int64       =>  Int
+//	uint32, uint64     =>  Uint
+//	float32, float64   =>  Float
+//	string, []byte     =>  String
+//	protoreflect.Name  =>  Name
+//	[]Value            =>  List
+//	[][2]Value         =>  Message
+//
+// ValueOf panics if the Go type is not one of the above.
+func ValueOf(v interface{}) Value {
+	switch v := v.(type) {
+	case bool:
+		if v {
+			return Value{typ: Bool, num: 1}
+		} else {
+			return Value{typ: Bool, num: 0}
+		}
+	case int32:
+		return Value{typ: Int, num: uint64(v)}
+	case int64:
+		return Value{typ: Int, num: uint64(v)}
+	case uint32:
+		return Value{typ: Uint, num: uint64(v)}
+	case uint64:
+		return Value{typ: Uint, num: uint64(v)}
+	case float32:
+		return Value{typ: Float, num: math.Float64bits(float64(v))}
+	case float64:
+		return Value{typ: Float, num: math.Float64bits(float64(v))}
+	case string:
+		return Value{typ: String, str: string(v)}
+	case []byte:
+		return Value{typ: String, str: string(v)}
+	case protoreflect.Name:
+		return Value{typ: Name, str: string(v)}
+	case []Value:
+		return Value{typ: List, arr: v}
+	case [][2]Value:
+		return Value{typ: Message, obj: v}
+	default:
+		panic(fmt.Sprintf("invalid type %T", v))
+	}
+}
+func rawValueOf(v interface{}, raw []byte) Value {
+	v2 := ValueOf(v)
+	v2.raw = raw
+	return v2
+}
+
+// Type is the type of the value. When parsing, this is a best-effort guess
+// at the resulting type. However, there are ambiguities as to the exact type
+// of the value (e.g., "false" is either a bool or a name).
+// Thus, some of the types are convertible with each other.
+// The Bool, Int, Uint, Float, and Name methods return a boolean to report
+// whether the conversion was successful.
+func (v Value) Type() Type {
+	return v.typ
+}
+
+// Bool returns v as a bool and reports whether the conversion succeeded.
+func (v Value) Bool() (x bool, ok bool) {
+	switch v.typ {
+	case Bool:
+		return v.num > 0, true
+	case Uint, Int:
+		// C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1").
+		if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 {
+			return v.num > 0, true
+		}
+	}
+	return false, false
+}
+
+// Int returns v as an int64 of the specified precision and reports whether
+// the conversion succeeded.
+func (v Value) Int(b64 bool) (x int64, ok bool) {
+	switch v.typ {
+	case Int:
+		n := int64(v.num)
+		if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) {
+			return int64(n), true
+		}
+	case Uint:
+		n := uint64(v.num)
+		if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) {
+			return int64(n), true
+		}
+		// C++ accepts large positive hex numbers as negative values.
+		// This feature is here for proto1 backwards compatibility purposes.
+		if flags.Proto1Legacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' {
+			if !b64 {
+				return int64(int32(n)), n <= math.MaxUint32
+			}
+			// if !b64 && n <= math.MaxUint32 {
+			// 	return int64(int32(n)), true
+			// }
+			return int64(n), true
+		}
+	}
+	return 0, false
+}
+
+// Uint returns v as an uint64 of the specified precision and reports whether
+// the conversion succeeded.
+func (v Value) Uint(b64 bool) (x uint64, ok bool) {
+	switch v.typ {
+	case Int:
+		n := int64(v.num)
+		if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) {
+			return uint64(n), true
+		}
+	case Uint:
+		n := uint64(v.num)
+		if b64 || n <= math.MaxUint32 {
+			return uint64(n), true
+		}
+	}
+	return 0, false
+}
+
+// Float returns v as a float64 of the specified precision and reports whether
+// the conversion succeeded.
+func (v Value) Float(b64 bool) (x float64, ok bool) {
+	switch v.typ {
+	case Int:
+		return float64(int64(v.num)), true // possibly lossy, but allowed
+	case Uint:
+		return float64(uint64(v.num)), true // possibly lossy, but allowed
+	case Float:
+		n := math.Float64frombits(v.num)
+		if math.IsNaN(n) || math.IsInf(n, 0) {
+			return float64(n), true
+		}
+		if b64 || math.Abs(n) <= math.MaxFloat32 {
+			return float64(n), true
+		}
+	}
+	return 0, false
+}
+
+// String returns v as a string if the Type is String.
+// Otherwise, this returns a formatted string of v for debugging purposes.
+//
+// Since String is used to represent both text and binary, it is not validated
+// to contain valid UTF-8. When using this value with the string type in proto,
+// it is the user's responsibility perform additional UTF-8 validation.
+func (v Value) String() string {
+	if v.typ != String {
+		return v.stringValue()
+	}
+	return v.str
+}
+func (v Value) stringValue() string {
+	switch v.typ {
+	case Bool, Int, Uint, Float, Name:
+		return string(v.Raw())
+	case List:
+		var ss []string
+		for _, v := range v.List() {
+			ss = append(ss, v.String())
+		}
+		return "[" + strings.Join(ss, ",") + "]"
+	case Message:
+		var ss []string
+		for _, v := range v.Message() {
+			k := v[0].String()
+			if v[0].Type() == String {
+				k = "[" + k + "]"
+			}
+			ss = append(ss, k+":"+v[1].String())
+		}
+		return "{" + strings.Join(ss, ",") + "}"
+	default:
+		return "<invalid>"
+	}
+}
+
+// Name returns the field name or enum value name and reports whether the value
+// can be treated as an identifier.
+func (v Value) Name() (protoreflect.Name, bool) {
+	switch v.typ {
+	case Bool, Float:
+		// Ambiguity arises in unmarshalValue since "nan" may interpreted as
+		// either a Name type (for enum values) or a Float type.
+		// Similarly, "true" may be interpreted as either a Name or Bool type.
+		n := protoreflect.Name(v.raw)
+		if n.IsValid() {
+			return n, true
+		}
+	case Name:
+		return protoreflect.Name(v.str), true
+	}
+	return "", false
+}
+
+// List returns the elements of v and panics if the Type is not List.
+// Mutations on the return value may not be observable from the Raw method.
+func (v Value) List() []Value {
+	if v.typ != List {
+		panic("value is not a list")
+	}
+	return v.arr
+}
+
+// Message returns the items of v and panics if the Type is not Message.
+// The [2]Value represents a key and value pair, where the key is either
+// a Name (representing a field name), a String (representing extension field
+// names or the Any type URL), or an Uint for unknown fields.
+//
+// Mutations on the return value may not be observable from the Raw method.
+func (v Value) Message() [][2]Value {
+	if v.typ != Message {
+		panic("value is not a message")
+	}
+	return v.obj
+}
+
+// Raw returns the raw representation of the value.
+// The returned value may alias the input given to Unmarshal.
+func (v Value) Raw() []byte {
+	if len(v.raw) > 0 {
+		return v.raw
+	}
+	p := encoder{}
+	if err := p.marshalValue(v); !p.nerr.Merge(err) {
+		return []byte("<invalid>")
+	}
+	return p.out
+}

diff --git a/reflect/protoreflect/proto.go b/reflect/protoreflect/proto.go
new file mode 100644
index 0000000..06be22e
--- /dev/null
+++ b/reflect/protoreflect/proto.go

@@ -0,0 +1,22 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package protoreflect
+
+import (
+	"regexp"
+)
+
+// TODO: This is a stub while the full implementation is under review.
+// See https://golang.org/cl/127823.
+
+type Name string
+
+var (
+	regexName = regexp.MustCompile(`^[_a-zA-Z][_a-zA-Z0-9]*$`)
+)
+
+func (n Name) IsValid() bool {
+	return regexName.MatchString(string(n))
+}
commit	27c2a76c850826490ae8ae43d46a4d1276b57ceb	[log] [tgz]
author	Joe Tsai <joetsai@digital-static.net>	Wed Aug 01 16:48:18 2018 -0700
committer	Joe Tsai <thebrokentoaster@gmail.com>	Tue Aug 07 22:44:06 2018 +0000
tree	db80a5fbf04677876d57df858a96ef4974cc387b
parent	879b18d902097ec222c23b5124c9cccf9c10af9b [diff]