encoding/prototext: rewrite of internal/encoding/text
* Fixes golang/protobuf#842. Unmarshal can now parse singular or
repeated message fields without the field separator.
* Fixes golang/protobuf#1011. Handles negative 0 properly.
* For unknown fields with fixed 32-bit and 64-bit wire types, output is
now in hex format with 0x prefix similar to C++ lib output. Previous
Go implementation simply outputs these as decimal numbers %d.
* All parsing errors, except for unexpected EOF should now contain line
and column number info.
* Fixed following conformance-related features:
* Parse nan,inf,-inf,infinity,-infinity as case-insensitive.
* Interpret float32 overflows as inf or -inf.
* Parse large int-like number as proto float.
* Discard unknown map field if DiscardUnknown=true.
* Allow whitespaces/comments in Any type URL and extension field names per spec.
* Improves performance and memory usage. It is now as fast and efficient as
protojson, if not better on most benchmarks.
name old time/op new time/op delta
Text/Unmarshal/google_message1_proto2-4 14.1µs ±43% 8.7µs ±12% -38.27% (p=0.000 n=10+10)
Text/Unmarshal/google_message1_proto3-4 11.6µs ±18% 7.7µs ± 9% -33.69% (p=0.000 n=10+10)
Text/Unmarshal/google_message2-4 6.20ms ±27% 4.10ms ± 5% -33.95% (p=0.000 n=10+10)
Text/Marshal/google_message1_proto2-4 12.8µs ± 6% 10.3µs ±23% -19.54% (p=0.000 n=9+10)
Text/Marshal/google_message1_proto3-4 11.9µs ±16% 8.6µs ±10% -27.45% (p=0.000 n=10+10)
Text/Marshal/google_message2-4 5.59ms ± 5% 5.30ms ±22% ~ (p=0.356 n=9+10)
JSON/Unmarshal/google_message1_proto2-4 12.3µs ±61% 13.9µs ±26% ~ (p=0.190 n=10+10)
JSON/Unmarshal/google_message1_proto3-4 7.51µs ± 6% 7.86µs ± 1% +4.66% (p=0.010 n=10+9)
JSON/Unmarshal/google_message2-4 3.74ms ± 2% 3.94ms ± 2% +5.32% (p=0.000 n=10+10)
JSON/Marshal/google_message1_proto2-4 9.90µs ±12% 9.95µs ± 4% ~ (p=0.315 n=9+10)
JSON/Marshal/google_message1_proto3-4 7.55µs ± 4% 7.93µs ± 3% +4.98% (p=0.000 n=10+10)
JSON/Marshal/google_message2-4 4.29ms ± 5% 4.49ms ± 2% +4.53% (p=0.001 n=10+10)
name old alloc/op new alloc/op delta
Text/Unmarshal/google_message1_proto2-4 12.5kB ± 0% 2.0kB ± 0% -83.87% (p=0.000 n=10+10)
Text/Unmarshal/google_message1_proto3-4 12.2kB ± 0% 1.8kB ± 0% -85.33% (p=0.000 n=10+10)
Text/Unmarshal/google_message2-4 5.35MB ± 0% 0.89MB ± 0% -83.28% (p=0.000 n=10+9)
Text/Marshal/google_message1_proto2-4 12.0kB ± 0% 1.4kB ± 0% -88.15% (p=0.000 n=10+10)
Text/Marshal/google_message1_proto3-4 12.4kB ± 0% 1.9kB ± 0% -84.91% (p=0.000 n=10+10)
Text/Marshal/google_message2-4 5.64MB ± 0% 1.02MB ± 0% -81.85% (p=0.000 n=10+9)
JSON/Unmarshal/google_message1_proto2-4 2.29kB ± 0% 2.29kB ± 0% ~ (all equal)
JSON/Unmarshal/google_message1_proto3-4 2.08kB ± 0% 2.08kB ± 0% ~ (all equal)
JSON/Unmarshal/google_message2-4 899kB ± 0% 899kB ± 0% ~ (p=1.000 n=10+10)
JSON/Marshal/google_message1_proto2-4 1.46kB ± 0% 1.46kB ± 0% ~ (all equal)
JSON/Marshal/google_message1_proto3-4 1.36kB ± 0% 1.36kB ± 0% ~ (all equal)
JSON/Marshal/google_message2-4 1.19MB ± 0% 1.19MB ± 0% ~ (p=0.197 n=10+10)
name old allocs/op new allocs/op delta
Text/Unmarshal/google_message1_proto2-4 133 ± 0% 89 ± 0% -33.08% (p=0.000 n=10+10)
Text/Unmarshal/google_message1_proto3-4 108 ± 0% 67 ± 0% -37.96% (p=0.000 n=10+10)
Text/Unmarshal/google_message2-4 60.0k ± 0% 38.7k ± 0% -35.52% (p=0.000 n=10+10)
Text/Marshal/google_message1_proto2-4 65.0 ± 0% 25.0 ± 0% -61.54% (p=0.000 n=10+10)
Text/Marshal/google_message1_proto3-4 59.0 ± 0% 22.0 ± 0% -62.71% (p=0.000 n=10+10)
Text/Marshal/google_message2-4 27.4k ± 0% 7.3k ± 0% -73.39% (p=0.000 n=10+10)
JSON/Unmarshal/google_message1_proto2-4 95.0 ± 0% 95.0 ± 0% ~ (all equal)
JSON/Unmarshal/google_message1_proto3-4 74.0 ± 0% 74.0 ± 0% ~ (all equal)
JSON/Unmarshal/google_message2-4 36.3k ± 0% 36.3k ± 0% ~ (all equal)
JSON/Marshal/google_message1_proto2-4 27.0 ± 0% 27.0 ± 0% ~ (all equal)
JSON/Marshal/google_message1_proto3-4 30.0 ± 0% 30.0 ± 0% ~ (all equal)
JSON/Marshal/google_message2-4 11.3k ± 0% 11.3k ± 0% ~ (p=1.000 n=10+10)
Change-Id: I377925facde5535f06333b6f25e9c9b358dc062f
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/204602
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/encoding/prototext/decode.go b/encoding/prototext/decode.go
index 4e4d0c9..5e9975d 100644
--- a/encoding/prototext/decode.go
+++ b/encoding/prototext/decode.go
@@ -6,6 +6,7 @@
import (
"fmt"
+ "io"
"strings"
"unicode/utf8"
@@ -55,53 +56,101 @@
func (o UnmarshalOptions) Unmarshal(b []byte, m proto.Message) error {
proto.Reset(m)
- // Parse into text.Value of message type.
- val, err := text.Unmarshal(b)
- if err != nil {
- return err
- }
-
if o.Resolver == nil {
o.Resolver = protoregistry.GlobalTypes
}
- err = o.unmarshalMessage(val.Message(), m.ProtoReflect())
- if err != nil {
+
+ dec := decoder{text.NewDecoder(b), o}
+ if err := dec.unmarshalMessage(m.ProtoReflect(), false); err != nil {
return err
}
-
if o.AllowPartial {
return nil
}
return proto.IsInitialized(m)
}
-// unmarshalMessage unmarshals a [][2]text.Value message into the given protoreflect.Message.
-func (o UnmarshalOptions) unmarshalMessage(tmsg [][2]text.Value, m pref.Message) error {
+type decoder struct {
+ *text.Decoder
+ opts UnmarshalOptions
+}
+
+// newError returns an error object with position info.
+func (d decoder) newError(pos int, f string, x ...interface{}) error {
+ line, column := d.Position(pos)
+ head := fmt.Sprintf("(line %d:%d): ", line, column)
+ return errors.New(head+f, x...)
+}
+
+// unexpectedTokenError returns a syntax error for the given unexpected token.
+func (d decoder) unexpectedTokenError(tok text.Token) error {
+ return d.syntaxError(tok.Pos(), "unexpected token: %s", tok.RawString())
+}
+
+// syntaxError returns a syntax error for given position.
+func (d decoder) syntaxError(pos int, f string, x ...interface{}) error {
+ line, column := d.Position(pos)
+ head := fmt.Sprintf("syntax error (line %d:%d): ", line, column)
+ return errors.New(head+f, x...)
+}
+
+// unmarshalMessage unmarshals into the given protoreflect.Message.
+func (d decoder) unmarshalMessage(m pref.Message, checkDelims bool) error {
messageDesc := m.Descriptor()
if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
return errors.New("no support for proto1 MessageSets")
}
- // Handle expanded Any message.
- if messageDesc.FullName() == "google.protobuf.Any" && isExpandedAny(tmsg) {
- return o.unmarshalAny(tmsg[0], m)
+ if messageDesc.FullName() == "google.protobuf.Any" {
+ return d.unmarshalAny(m, checkDelims)
+ }
+
+ if checkDelims {
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+
+ if tok.Kind() != text.MessageOpen {
+ return d.unexpectedTokenError(tok)
+ }
}
var seenNums set.Ints
var seenOneofs set.Ints
fieldDescs := messageDesc.Fields()
- for _, tfield := range tmsg {
- tkey := tfield[0]
- tval := tfield[1]
+
+ for {
+ // Read field name.
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ switch typ := tok.Kind(); typ {
+ case text.Name:
+ // Continue below.
+ case text.EOF:
+ if checkDelims {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+ default:
+ if checkDelims && typ == text.MessageClose {
+ return nil
+ }
+ return d.unexpectedTokenError(tok)
+ }
// Resolve the field descriptor.
var name pref.Name
var fd pref.FieldDescriptor
var xt pref.ExtensionType
var xtErr error
- switch tkey.Type() {
- case text.Name:
- name, _ = tkey.Name()
+ var isFieldNumberName bool
+
+ switch tok.NameKind() {
+ case text.IdentName:
+ name = pref.Name(tok.IdentName())
fd = fieldDescs.ByName(name)
if fd == nil {
// The proto name of a group field is in all lowercase,
@@ -113,30 +162,30 @@
} else if fd.Kind() == pref.GroupKind && fd.Message().Name() != name {
fd = nil // reset since field name is actually the message name
}
- case text.String:
+
+ case text.TypeName:
// Handle extensions only. This code path is not for Any.
- if messageDesc.FullName() == "google.protobuf.Any" {
- break
- }
- xt, xtErr = o.findExtension(pref.FullName(tkey.String()))
- case text.Uint:
- v, _ := tkey.Uint(false)
- num := pref.FieldNumber(v)
+ xt, xtErr = d.findExtension(pref.FullName(tok.TypeName()))
+
+ case text.FieldNumber:
+ isFieldNumberName = true
+ num := pref.FieldNumber(tok.FieldNumber())
if !num.IsValid() {
- return errors.New("invalid field number: %d", num)
+ return d.newError(tok.Pos(), "invalid field number: %d", num)
}
fd = fieldDescs.ByNumber(num)
if fd == nil {
- xt, xtErr = o.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
+ xt, xtErr = d.opts.Resolver.FindExtensionByNumber(messageDesc.FullName(), num)
}
}
+
if xt != nil {
fd = xt.TypeDescriptor()
if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
- return errors.New("message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
+ return d.newError(tok.Pos(), "message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
}
} else if xtErr != nil && xtErr != protoregistry.NotFound {
- return errors.New("unable to resolve: %v", xtErr)
+ return d.newError(tok.Pos(), "unable to resolve [%s]: %v", tok.RawString(), xtErr)
}
if flags.ProtoLegacy {
if fd != nil && fd.IsWeak() && fd.Message().IsPlaceholder() {
@@ -146,67 +195,64 @@
// Handle unknown fields.
if fd == nil {
- if o.DiscardUnknown || messageDesc.ReservedNames().Has(name) {
+ if d.opts.DiscardUnknown || messageDesc.ReservedNames().Has(name) {
+ d.skipValue()
continue
}
- return errors.New("%v contains unknown field: %v", messageDesc.FullName(), tkey)
+ return d.newError(tok.Pos(), "unknown field: %v", tok.RawString())
}
// Handle fields identified by field number.
- if tkey.Type() == text.Uint {
+ if isFieldNumberName {
// TODO: Add an option to permit parsing field numbers.
//
// This requires careful thought as the MarshalOptions.EmitUnknown
- // option allows formatting unknown fields as the field number
- // and the best-effort textual representation of the field value.
- // In that case, it may not be possible to unmarshal the value from
- // a parser that does have information about the unknown field.
- return errors.New("cannot specify field by number: %v", tkey)
+ // option allows formatting unknown fields as the field number and the
+ // best-effort textual representation of the field value. In that case,
+ // it may not be possible to unmarshal the value from a parser that does
+ // have information about the unknown field.
+ return d.newError(tok.Pos(), "cannot specify field by number: %v", tok.RawString())
}
switch {
case fd.IsList():
- // If input is not a list, turn it into a list.
- var items []text.Value
- if tval.Type() != text.List {
- items = []text.Value{tval}
- } else {
- items = tval.List()
+ kind := fd.Kind()
+ if kind != pref.MessageKind && kind != pref.GroupKind && !tok.HasSeparator() {
+ return d.syntaxError(tok.Pos(), "missing field separator :")
}
list := m.Mutable(fd).List()
- if err := o.unmarshalList(items, fd, list); err != nil {
+ if err := d.unmarshalList(fd, list); err != nil {
return err
}
- case fd.IsMap():
- // If input is not a list, turn it into a list.
- var items []text.Value
- if tval.Type() != text.List {
- items = []text.Value{tval}
- } else {
- items = tval.List()
- }
+ case fd.IsMap():
mmap := m.Mutable(fd).Map()
- if err := o.unmarshalMap(items, fd, mmap); err != nil {
+ if err := d.unmarshalMap(fd, mmap); err != nil {
return err
}
+
default:
+ kind := fd.Kind()
+ if kind != pref.MessageKind && kind != pref.GroupKind && !tok.HasSeparator() {
+ return d.syntaxError(tok.Pos(), "missing field separator :")
+ }
+
// If field is a oneof, check if it has already been set.
if od := fd.ContainingOneof(); od != nil {
idx := uint64(od.Index())
if seenOneofs.Has(idx) {
- return errors.New("oneof %v is already set", od.FullName())
+ return d.newError(tok.Pos(), "error parsing %q, oneof %v is already set", tok.RawString(), od.FullName())
}
seenOneofs.Set(idx)
}
- // Required or optional fields.
num := uint64(fd.Number())
if seenNums.Has(num) {
- return errors.New("non-repeated field %v is repeated", fd.FullName())
+ return d.newError(tok.Pos(), "non-repeated field %q is repeated", tok.RawString())
}
- if err := o.unmarshalSingular(tval, fd, m); err != nil {
+
+ if err := d.unmarshalSingular(fd, m); err != nil {
return err
}
seenNums.Set(num)
@@ -217,285 +263,527 @@
}
// findExtension returns protoreflect.ExtensionType from the Resolver if found.
-func (o UnmarshalOptions) findExtension(xtName pref.FullName) (pref.ExtensionType, error) {
- xt, err := o.Resolver.FindExtensionByName(xtName)
+func (d decoder) findExtension(xtName pref.FullName) (pref.ExtensionType, error) {
+ xt, err := d.opts.Resolver.FindExtensionByName(xtName)
if err == nil {
return xt, nil
}
- return messageset.FindMessageSetExtension(o.Resolver, xtName)
+ return messageset.FindMessageSetExtension(d.opts.Resolver, xtName)
}
-// unmarshalSingular unmarshals given text.Value into the non-repeated field.
-func (o UnmarshalOptions) unmarshalSingular(input text.Value, fd pref.FieldDescriptor, m pref.Message) error {
+// unmarshalSingular unmarshals a non-repeated field value specified by the
+// given FieldDescriptor.
+func (d decoder) unmarshalSingular(fd pref.FieldDescriptor, m pref.Message) error {
var val pref.Value
+ var err error
switch fd.Kind() {
case pref.MessageKind, pref.GroupKind:
- if input.Type() != text.Message {
- return errors.New("%v contains invalid message/group value: %v", fd.FullName(), input)
- }
val = m.NewField(fd)
- if err := o.unmarshalMessage(input.Message(), val.Message()); err != nil {
- return err
- }
+ err = d.unmarshalMessage(val.Message(), true)
default:
- var err error
- val, err = unmarshalScalar(input, fd)
- if err != nil {
- return err
- }
+ val, err = d.unmarshalScalar(fd)
}
- m.Set(fd, val)
-
- return nil
+ if err == nil {
+ m.Set(fd, val)
+ }
+ return err
}
-// unmarshalScalar converts the given text.Value to a scalar/enum protoreflect.Value specified in
-// the given FieldDescriptor. Caller should not pass in a FieldDescriptor for a message/group kind.
-func unmarshalScalar(input text.Value, fd pref.FieldDescriptor) (pref.Value, error) {
- const b32 = false
- const b64 = true
+// unmarshalScalar unmarshals a scalar/enum protoreflect.Value specified by the
+// given FieldDescriptor.
+func (d decoder) unmarshalScalar(fd pref.FieldDescriptor) (pref.Value, error) {
+ tok, err := d.Read()
+ if err != nil {
+ return pref.Value{}, err
+ }
- switch kind := fd.Kind(); kind {
+ if tok.Kind() != text.Scalar {
+ return pref.Value{}, d.unexpectedTokenError(tok)
+ }
+
+ kind := fd.Kind()
+ switch kind {
case pref.BoolKind:
- if b, ok := input.Bool(); ok {
- return pref.ValueOfBool(bool(b)), nil
+ if b, ok := tok.Bool(); ok {
+ return pref.ValueOfBool(b), nil
}
+
case pref.Int32Kind, pref.Sint32Kind, pref.Sfixed32Kind:
- if n, ok := input.Int(b32); ok {
- return pref.ValueOfInt32(int32(n)), nil
+ if n, ok := tok.Int32(); ok {
+ return pref.ValueOfInt32(n), nil
}
+
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
- if n, ok := input.Int(b64); ok {
- return pref.ValueOfInt64(int64(n)), nil
+ if n, ok := tok.Int64(); ok {
+ return pref.ValueOfInt64(n), nil
}
+
case pref.Uint32Kind, pref.Fixed32Kind:
- if n, ok := input.Uint(b32); ok {
- return pref.ValueOfUint32(uint32(n)), nil
+ if n, ok := tok.Uint32(); ok {
+ return pref.ValueOfUint32(n), nil
}
+
case pref.Uint64Kind, pref.Fixed64Kind:
- if n, ok := input.Uint(b64); ok {
- return pref.ValueOfUint64(uint64(n)), nil
+ if n, ok := tok.Uint64(); ok {
+ return pref.ValueOfUint64(n), nil
}
+
case pref.FloatKind:
- if n, ok := input.Float(b32); ok {
- return pref.ValueOfFloat32(float32(n)), nil
+ if n, ok := tok.Float32(); ok {
+ return pref.ValueOfFloat32(n), nil
}
+
case pref.DoubleKind:
- if n, ok := input.Float(b64); ok {
- return pref.ValueOfFloat64(float64(n)), nil
+ if n, ok := tok.Float64(); ok {
+ return pref.ValueOfFloat64(n), nil
}
+
case pref.StringKind:
- if input.Type() == text.String {
- s := input.String()
+ if s, ok := tok.String(); ok {
if utf8.ValidString(s) {
return pref.ValueOfString(s), nil
}
- return pref.Value{}, errors.InvalidUTF8(string(fd.FullName()))
+ return pref.Value{}, d.newError(tok.Pos(), "contains invalid UTF-8")
}
+
case pref.BytesKind:
- if input.Type() == text.String {
- return pref.ValueOfBytes([]byte(input.String())), nil
+ if b, ok := tok.String(); ok {
+ return pref.ValueOfBytes([]byte(b)), nil
}
+
case pref.EnumKind:
- // If input is int32, use directly.
- if n, ok := input.Int(b32); ok {
- return pref.ValueOfEnum(pref.EnumNumber(n)), nil
- }
- if name, ok := input.Name(); ok {
+ if lit, ok := tok.Enum(); ok {
// Lookup EnumNumber based on name.
- if enumVal := fd.Enum().Values().ByName(name); enumVal != nil {
+ if enumVal := fd.Enum().Values().ByName(pref.Name(lit)); enumVal != nil {
return pref.ValueOfEnum(enumVal.Number()), nil
}
}
+ if num, ok := tok.Int32(); ok {
+ return pref.ValueOfEnum(pref.EnumNumber(num)), nil
+ }
+
default:
panic(fmt.Sprintf("invalid scalar kind %v", kind))
}
- return pref.Value{}, errors.New("%v contains invalid scalar value: %v", fd.FullName(), input)
+ return pref.Value{}, d.newError(tok.Pos(), "invalid value for %v type: %v", kind, tok.RawString())
}
-// unmarshalList unmarshals given []text.Value into given protoreflect.List.
-func (o UnmarshalOptions) unmarshalList(inputList []text.Value, fd pref.FieldDescriptor, list pref.List) error {
+// unmarshalList unmarshals into given protoreflect.List. A list value can
+// either be in [] syntax or simply just a single scalar/message value.
+func (d decoder) unmarshalList(fd pref.FieldDescriptor, list pref.List) error {
+ tok, err := d.Peek()
+ if err != nil {
+ return err
+ }
+
switch fd.Kind() {
case pref.MessageKind, pref.GroupKind:
- for _, input := range inputList {
- if input.Type() != text.Message {
- return errors.New("%v contains invalid message/group value: %v", fd.FullName(), input)
+ switch tok.Kind() {
+ case text.ListOpen:
+ d.Read()
+ for {
+ tok, err := d.Peek()
+ if err != nil {
+ return err
+ }
+
+ switch tok.Kind() {
+ case text.ListClose:
+ d.Read()
+ return nil
+ case text.MessageOpen:
+ pval := list.NewElement()
+ if err := d.unmarshalMessage(pval.Message(), true); err != nil {
+ return err
+ }
+ list.Append(pval)
+ default:
+ return d.unexpectedTokenError(tok)
+ }
}
- val := list.NewElement()
- if err := o.unmarshalMessage(input.Message(), val.Message()); err != nil {
+
+ case text.MessageOpen:
+ pval := list.NewElement()
+ if err := d.unmarshalMessage(pval.Message(), true); err != nil {
return err
}
- list.Append(val)
+ list.Append(pval)
+ return nil
}
+
default:
- for _, input := range inputList {
- val, err := unmarshalScalar(input, fd)
+ switch tok.Kind() {
+ case text.ListOpen:
+ d.Read()
+ for {
+ tok, err := d.Peek()
+ if err != nil {
+ return err
+ }
+
+ switch tok.Kind() {
+ case text.ListClose:
+ d.Read()
+ return nil
+ case text.Scalar:
+ pval, err := d.unmarshalScalar(fd)
+ if err != nil {
+ return err
+ }
+ list.Append(pval)
+ default:
+ return d.unexpectedTokenError(tok)
+ }
+ }
+
+ case text.Scalar:
+ pval, err := d.unmarshalScalar(fd)
if err != nil {
return err
}
- list.Append(val)
+ list.Append(pval)
+ return nil
}
}
- return nil
+ return d.unexpectedTokenError(tok)
}
-// unmarshalMap unmarshals given []text.Value into given protoreflect.Map.
-func (o UnmarshalOptions) unmarshalMap(input []text.Value, fd pref.FieldDescriptor, mmap pref.Map) error {
- // Determine ahead whether map entry is a scalar type or a message type in order to call the
- // appropriate unmarshalMapValue func inside the for loop below.
- unmarshalMapValue := unmarshalMapScalarValue
+// unmarshalMap unmarshals into given protoreflect.Map. A map value is a
+// textproto message containing {key: <kvalue>, value: <mvalue>}.
+func (d decoder) unmarshalMap(fd pref.FieldDescriptor, mmap pref.Map) error {
+ // Determine ahead whether map entry is a scalar type or a message type in
+ // order to call the appropriate unmarshalMapValue func inside
+ // unmarshalMapEntry.
+ var unmarshalMapValue func() (pref.Value, error)
switch fd.MapValue().Kind() {
case pref.MessageKind, pref.GroupKind:
- unmarshalMapValue = o.unmarshalMapMessageValue
- }
-
- for _, entry := range input {
- if entry.Type() != text.Message {
- return errors.New("%v contains invalid map entry: %v", fd.FullName(), entry)
+ unmarshalMapValue = func() (pref.Value, error) {
+ pval := mmap.NewValue()
+ if err := d.unmarshalMessage(pval.Message(), true); err != nil {
+ return pref.Value{}, err
+ }
+ return pval, nil
}
- tkey, tval, err := parseMapEntry(entry.Message(), fd.FullName())
- if err != nil {
- return err
- }
- pkey, err := unmarshalMapKey(tkey, fd.MapKey())
- if err != nil {
- return err
- }
- err = unmarshalMapValue(tval, pkey, fd.MapValue(), mmap)
- if err != nil {
- return err
+ default:
+ unmarshalMapValue = func() (pref.Value, error) {
+ return d.unmarshalScalar(fd.MapValue())
}
}
- return nil
-}
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ switch tok.Kind() {
+ case text.MessageOpen:
+ return d.unmarshalMapEntry(fd, mmap, unmarshalMapValue)
-// parseMapEntry parses [][2]text.Value for field names key and value, and return corresponding
-// field values. If there are duplicate field names, the value for the last field is returned. If
-// the field name does not exist, it will return the zero value of text.Value. It will return an
-// error if there are unknown field names.
-func parseMapEntry(mapEntry [][2]text.Value, name pref.FullName) (key text.Value, value text.Value, err error) {
- for _, field := range mapEntry {
- keyStr, ok := field[0].Name()
- if ok {
- switch keyStr {
- case "key":
- if key.Type() != 0 {
- return key, value, errors.New("%v contains duplicate key field", name)
+ case text.ListOpen:
+ for {
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ switch tok.Kind() {
+ case text.ListClose:
+ return nil
+ case text.MessageOpen:
+ if err := d.unmarshalMapEntry(fd, mmap, unmarshalMapValue); err != nil {
+ return err
}
- key = field[1]
- case "value":
- if value.Type() != 0 {
- return key, value, errors.New("%v contains duplicate value field", name)
- }
- value = field[1]
default:
- ok = false
+ return d.unexpectedTokenError(tok)
}
}
- if !ok {
- // TODO: Do not return error if ignore unknown option is added and enabled.
- return key, value, errors.New("%v contains unknown map entry name: %v", name, field[0])
- }
- }
- return key, value, nil
-}
-// unmarshalMapKey converts given text.Value into a protoreflect.MapKey. A map key type is any
-// integral or string type.
-func unmarshalMapKey(input text.Value, fd pref.FieldDescriptor) (pref.MapKey, error) {
- // If input is not set, use the zero value.
- if input.Type() == 0 {
- return fd.Default().MapKey(), nil
- }
-
- val, err := unmarshalScalar(input, fd)
- if err != nil {
- return pref.MapKey{}, errors.New("%v contains invalid key: %v", fd.FullName(), input)
- }
- return val.MapKey(), nil
-}
-
-// unmarshalMapMessageValue unmarshals given message-type text.Value into a protoreflect.Map for
-// the given MapKey.
-func (o UnmarshalOptions) unmarshalMapMessageValue(input text.Value, pkey pref.MapKey, fd pref.FieldDescriptor, mmap pref.Map) error {
- var value [][2]text.Value
- switch input.Type() {
- case 0:
- case text.Message:
- value = input.Message()
default:
- return errors.New("%v contains invalid value: %v", fd.FullName(), input)
+ return d.unexpectedTokenError(tok)
}
- val := mmap.NewValue()
- if err := o.unmarshalMessage(value, val.Message()); err != nil {
- return err
- }
- mmap.Set(pkey, val)
- return nil
}
-// unmarshalMapScalarValue unmarshals given scalar-type text.Value into a protoreflect.Map
-// for the given MapKey.
-func unmarshalMapScalarValue(input text.Value, pkey pref.MapKey, fd pref.FieldDescriptor, mmap pref.Map) error {
- var val pref.Value
- if input.Type() == 0 {
- val = fd.Default()
- } else {
- var err error
- val, err = unmarshalScalar(input, fd)
+// unmarshalMap unmarshals into given protoreflect.Map. A map value is a
+// textproto message containing {key: <kvalue>, value: <mvalue>}.
+func (d decoder) unmarshalMapEntry(fd pref.FieldDescriptor, mmap pref.Map, unmarshalMapValue func() (pref.Value, error)) error {
+ var key pref.MapKey
+ var pval pref.Value
+Loop:
+ for {
+ // Read field name.
+ tok, err := d.Read()
if err != nil {
return err
}
+ switch tok.Kind() {
+ case text.Name:
+ if tok.NameKind() != text.IdentName {
+ if !d.opts.DiscardUnknown {
+ return d.newError(tok.Pos(), "unknown map entry field %q", tok.RawString())
+ }
+ d.skipValue()
+ continue Loop
+ }
+ // Continue below.
+ case text.MessageClose:
+ break Loop
+ default:
+ return d.unexpectedTokenError(tok)
+ }
+
+ name := tok.IdentName()
+ switch name {
+ case "key":
+ if !tok.HasSeparator() {
+ return d.syntaxError(tok.Pos(), "missing field separator :")
+ }
+ if key.IsValid() {
+ return d.newError(tok.Pos(), `map entry "key" cannot be repeated`)
+ }
+ val, err := d.unmarshalScalar(fd.MapKey())
+ if err != nil {
+ return err
+ }
+ key = val.MapKey()
+
+ case "value":
+ if kind := fd.MapValue().Kind(); (kind != pref.MessageKind) && (kind != pref.GroupKind) {
+ if !tok.HasSeparator() {
+ return d.syntaxError(tok.Pos(), "missing field separator :")
+ }
+ }
+ if pval.IsValid() {
+ return d.newError(tok.Pos(), `map entry "value" cannot be repeated`)
+ }
+ pval, err = unmarshalMapValue()
+ if err != nil {
+ return err
+ }
+
+ default:
+ if !d.opts.DiscardUnknown {
+ return d.newError(tok.Pos(), "unknown map entry field %q", name)
+ }
+ d.skipValue()
+ }
}
- mmap.Set(pkey, val)
+
+ if !key.IsValid() {
+ key = fd.MapKey().Default().MapKey()
+ }
+ if !pval.IsValid() {
+ switch fd.MapValue().Kind() {
+ case pref.MessageKind, pref.GroupKind:
+ // If value field is not set for message/group types, construct an
+ // empty one as default.
+ pval = mmap.NewValue()
+ default:
+ pval = fd.MapValue().Default()
+ }
+ }
+ mmap.Set(key, pval)
return nil
}
-// isExpandedAny returns true if given [][2]text.Value may be an expanded Any that contains only one
-// field with key type of text.String type and value type of text.Message.
-func isExpandedAny(tmsg [][2]text.Value) bool {
- if len(tmsg) != 1 {
- return false
+// unmarshalAny unmarshals an Any textproto. It can either be in expanded form
+// or non-expanded form.
+func (d decoder) unmarshalAny(m pref.Message, checkDelims bool) error {
+ var typeURL string
+ var bValue []byte
+
+ // hasFields tracks which valid fields have been seen in the loop below in
+ // order to flag an error if there are duplicates or conflicts. It may
+ // contain the strings "type_url", "value" and "expanded". The literal
+ // "expanded" is used to indicate that the expanded form has been
+ // encountered already.
+ hasFields := map[string]bool{}
+
+ if checkDelims {
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+
+ if tok.Kind() != text.MessageOpen {
+ return d.unexpectedTokenError(tok)
+ }
}
- field := tmsg[0]
- return field[0].Type() == text.String && field[1].Type() == text.Message
-}
+Loop:
+ for {
+ // Read field name. Can only have 3 possible field names, i.e. type_url,
+ // value and type URL name inside [].
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ if typ := tok.Kind(); typ != text.Name {
+ if checkDelims {
+ if typ == text.MessageClose {
+ break Loop
+ }
+ } else if typ == text.EOF {
+ break Loop
+ }
+ return d.unexpectedTokenError(tok)
+ }
-// unmarshalAny unmarshals an expanded Any textproto. This method assumes that the given
-// tfield has key type of text.String and value type of text.Message.
-func (o UnmarshalOptions) unmarshalAny(tfield [2]text.Value, m pref.Message) error {
- typeURL := tfield[0].String()
- value := tfield[1].Message()
+ switch tok.NameKind() {
+ case text.IdentName:
+ // Both type_url and value fields require field separator :.
+ if !tok.HasSeparator() {
+ return d.syntaxError(tok.Pos(), "missing field separator :")
+ }
- mt, err := o.Resolver.FindMessageByURL(typeURL)
- if err != nil {
- return errors.New("unable to resolve message [%v]: %v", typeURL, err)
- }
- // Create new message for the embedded message type and unmarshal the
- // value into it.
- m2 := mt.New()
- if err := o.unmarshalMessage(value, m2); err != nil {
- return err
- }
- // Serialize the embedded message and assign the resulting bytes to the value field.
- b, err := proto.MarshalOptions{
- AllowPartial: true, // never check required fields inside an Any
- Deterministic: true,
- }.Marshal(m2.Interface())
- if err != nil {
- return err
+ switch tok.IdentName() {
+ case "type_url":
+ if hasFields["type_url"] {
+ return d.newError(tok.Pos(), "duplicate Any type_url field")
+ }
+ if hasFields["expanded"] {
+ return d.newError(tok.Pos(), "conflict with [%s] field", typeURL)
+ }
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ var ok bool
+ typeURL, ok = tok.String()
+ if !ok {
+ return d.newError(tok.Pos(), "invalid Any type_url: %v", tok.RawString())
+ }
+ hasFields["type_url"] = true
+
+ case "value":
+ if hasFields["value"] {
+ return d.newError(tok.Pos(), "duplicate Any value field")
+ }
+ if hasFields["expanded"] {
+ return d.newError(tok.Pos(), "conflict with [%s] field", typeURL)
+ }
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ s, ok := tok.String()
+ if !ok {
+ return d.newError(tok.Pos(), "invalid Any value: %v", tok.RawString())
+ }
+ bValue = []byte(s)
+ hasFields["value"] = true
+
+ default:
+ if !d.opts.DiscardUnknown {
+ return d.newError(tok.Pos(), "invalid field name %q in google.protobuf.Any message", tok.RawString())
+ }
+ }
+
+ case text.TypeName:
+ if hasFields["expanded"] {
+ return d.newError(tok.Pos(), "cannot have more than one type")
+ }
+ if hasFields["type_url"] {
+ return d.newError(tok.Pos(), "conflict with type_url field")
+ }
+ typeURL = tok.TypeName()
+ var err error
+ bValue, err = d.unmarshalExpandedAny(typeURL, tok.Pos())
+ if err != nil {
+ return err
+ }
+ hasFields["expanded"] = true
+
+ default:
+ if !d.opts.DiscardUnknown {
+ return d.newError(tok.Pos(), "invalid field name %q in google.protobuf.Any message", tok.RawString())
+ }
+ }
}
fds := m.Descriptor().Fields()
- fdType := fds.ByNumber(fieldnum.Any_TypeUrl)
- fdValue := fds.ByNumber(fieldnum.Any_Value)
-
- m.Set(fdType, pref.ValueOfString(typeURL))
- m.Set(fdValue, pref.ValueOfBytes(b))
-
+ if len(typeURL) > 0 {
+ m.Set(fds.ByNumber(fieldnum.Any_TypeUrl), pref.ValueOfString(typeURL))
+ }
+ if len(bValue) > 0 {
+ m.Set(fds.ByNumber(fieldnum.Any_Value), pref.ValueOfBytes(bValue))
+ }
return nil
}
+
+func (d decoder) unmarshalExpandedAny(typeURL string, pos int) ([]byte, error) {
+ mt, err := d.opts.Resolver.FindMessageByURL(typeURL)
+ if err != nil {
+ return nil, d.newError(pos, "unable to resolve message [%v]: %v", typeURL, err)
+ }
+ // Create new message for the embedded message type and unmarshal the value
+ // field into it.
+ m := mt.New()
+ if err := d.unmarshalMessage(m, true); err != nil {
+ return nil, err
+ }
+ // Serialize the embedded message and return the resulting bytes.
+ b, err := proto.MarshalOptions{
+ AllowPartial: true, // Never check required fields inside an Any.
+ Deterministic: true,
+ }.Marshal(m.Interface())
+ if err != nil {
+ return nil, d.newError(pos, "error in marshaling message into Any.value: %v", err)
+ }
+ return b, nil
+}
+
+// skipValue makes the decoder parse a field value in order to advance the read
+// to the next field. It relies on Read returning an error if the types are not
+// in valid sequence.
+func (d decoder) skipValue() error {
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ // Only need to continue reading for messages and lists.
+ switch tok.Kind() {
+ case text.MessageOpen:
+ return d.skipMessageValue()
+
+ case text.ListOpen:
+ for {
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ switch tok.Kind() {
+ case text.ListClose:
+ return nil
+ case text.MessageOpen:
+ return d.skipMessageValue()
+ default:
+ // Skip items. This will not validate whether skipped values are
+ // of the same type or not, same behavior as C++
+ // TextFormat::Parser::AllowUnknownField(true) version 3.8.0.
+ if err := d.skipValue(); err != nil {
+ return err
+ }
+ }
+ }
+ }
+ return nil
+}
+
+// skipMessageValue makes the decoder parse and skip over all fields in a
+// message. It assumes that the previous read type is MessageOpen.
+func (d decoder) skipMessageValue() error {
+ for {
+ tok, err := d.Read()
+ if err != nil {
+ return err
+ }
+ switch tok.Kind() {
+ case text.MessageClose:
+ return nil
+ case text.Name:
+ if err := d.skipValue(); err != nil {
+ return err
+ }
+ }
+ }
+}
diff --git a/encoding/prototext/decode_test.go b/encoding/prototext/decode_test.go
index e35d146..9150740 100644
--- a/encoding/prototext/decode_test.go
+++ b/encoding/prototext/decode_test.go
@@ -6,6 +6,7 @@
import (
"math"
+ "strings"
"testing"
"google.golang.org/protobuf/encoding/prototext"
@@ -27,7 +28,7 @@
inputMessage proto.Message
inputText string
wantMessage proto.Message
- wantErr bool // TODO: Verify error message content.
+ wantErr string // Expected error substring.
skip bool
}{{
desc: "proto2 empty message",
@@ -125,7 +126,7 @@
desc: "case sensitive",
inputMessage: &pb3.Scalars{},
inputText: `S_BOOL: true`,
- wantErr: true,
+ wantErr: "unknown field: S_BOOL",
}, {
desc: "proto3 scalars",
inputMessage: &pb3.Scalars{},
@@ -162,17 +163,17 @@
desc: "string with invalid UTF-8",
inputMessage: &pb3.Scalars{},
inputText: `s_string: "abc\xff"`,
- wantErr: true,
+ wantErr: "(line 1:11): contains invalid UTF-8",
}, {
desc: "proto2 message contains unknown field",
inputMessage: &pb2.Scalars{},
inputText: "unknown_field: 123",
- wantErr: true,
+ wantErr: "unknown field",
}, {
desc: "proto3 message contains unknown field",
inputMessage: &pb3.Scalars{},
inputText: "unknown_field: 456",
- wantErr: true,
+ wantErr: "unknown field",
}, {
desc: "proto2 message contains discarded unknown field",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
@@ -188,111 +189,104 @@
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb2.Scalars{},
inputText: `13:"hello"`,
- wantErr: true,
+ wantErr: "cannot specify field by number",
}, {
desc: "proto3 message cannot parse field number",
umo: prototext.UnmarshalOptions{DiscardUnknown: true},
inputMessage: &pb3.Scalars{},
inputText: `13:"goodbye"`,
- wantErr: true,
+ wantErr: "cannot specify field by number",
}, {
desc: "proto2 numeric key field",
inputMessage: &pb2.Scalars{},
inputText: "1: true",
- wantErr: true,
+ wantErr: "cannot specify field by number",
}, {
desc: "proto3 numeric key field",
inputMessage: &pb3.Scalars{},
inputText: "1: true",
- wantErr: true,
+ wantErr: "cannot specify field by number",
}, {
desc: "invalid bool value",
inputMessage: &pb3.Scalars{},
inputText: "s_bool: 123",
- wantErr: true,
+ wantErr: "invalid value for bool",
}, {
desc: "invalid int32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_int32: not_a_num",
- wantErr: true,
+ wantErr: "invalid value for int32",
}, {
desc: "invalid int64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_int64: 'not a num either'",
- wantErr: true,
+ wantErr: "invalid value for int64",
}, {
desc: "invalid uint32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_fixed32: -42",
- wantErr: true,
+ wantErr: "invalid value for fixed32",
}, {
desc: "invalid uint64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_uint64: -47",
- wantErr: true,
+ wantErr: "invalid value for uint64",
}, {
desc: "invalid sint32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sint32: '42'",
- wantErr: true,
+ wantErr: "invalid value for sint32",
}, {
desc: "invalid sint64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sint64: '-47'",
- wantErr: true,
+ wantErr: "invalid value for sint64",
}, {
desc: "invalid fixed32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_fixed32: -42",
- wantErr: true,
+ wantErr: "invalid value for fixed32",
}, {
desc: "invalid fixed64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_fixed64: -42",
- wantErr: true,
+ wantErr: "invalid value for fixed64",
}, {
desc: "invalid sfixed32 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sfixed32: 'not valid'",
- wantErr: true,
+ wantErr: "invalid value for sfixed32",
}, {
desc: "invalid sfixed64 value",
inputMessage: &pb3.Scalars{},
inputText: "s_sfixed64: bad",
- wantErr: true,
+ wantErr: "invalid value for sfixed64",
}, {
- desc: "float positive infinity",
- inputMessage: &pb3.Scalars{},
- inputText: "s_float: inf",
- wantMessage: &pb3.Scalars{
- SFloat: float32(math.Inf(1)),
+ desc: "conformance: FloatFieldMaxValue",
+ inputMessage: &pb2.Scalars{},
+ inputText: `opt_float: 3.4028235e+38`,
+ wantMessage: &pb2.Scalars{
+ OptFloat: proto.Float32(3.40282347e+38),
},
}, {
- desc: "float negative infinity",
- inputMessage: &pb3.Scalars{},
- inputText: "s_float: -inf",
- wantMessage: &pb3.Scalars{
- SFloat: float32(math.Inf(-1)),
+ desc: "conformance: FloatFieldLargerThanUint64",
+ inputMessage: &pb2.Scalars{},
+ inputText: `opt_float: 18446744073709551616`,
+ wantMessage: &pb2.Scalars{
+ OptFloat: proto.Float32(1.84467441e+19),
},
}, {
- desc: "double positive infinity",
- inputMessage: &pb3.Scalars{},
- inputText: "s_double: inf",
- wantMessage: &pb3.Scalars{
- SDouble: math.Inf(1),
- },
- }, {
- desc: "double negative infinity",
- inputMessage: &pb3.Scalars{},
- inputText: "s_double: -inf",
- wantMessage: &pb3.Scalars{
- SDouble: math.Inf(-1),
+ desc: "conformance: FloatFieldTooLarge",
+ inputMessage: &pb2.Scalars{},
+ inputText: `opt_float: 3.4028235e+39`,
+ wantMessage: &pb2.Scalars{
+ OptFloat: proto.Float32(float32(math.Inf(1))),
},
}, {
desc: "invalid string value",
inputMessage: &pb3.Scalars{},
inputText: "s_string: invalid_string",
- wantErr: true,
+ wantErr: "invalid value for string type",
}, {
desc: "proto2 bytes set to empty string",
inputMessage: &pb2.Scalars{},
@@ -312,7 +306,7 @@
opt_bool: true
opt_bool: false
`,
- wantErr: true,
+ wantErr: `(line 3:1): non-repeated field "opt_bool" is repeated`,
}, {
desc: "proto2 more duplicate singular field",
inputMessage: &pb2.Scalars{},
@@ -321,14 +315,14 @@
opt_string: "hello"
opt_bool: false
`,
- wantErr: true,
+ wantErr: `(line 4:1): non-repeated field "opt_bool" is repeated`,
}, {
desc: "proto2 invalid singular field",
inputMessage: &pb2.Scalars{},
inputText: `
opt_bool: [true, false]
`,
- wantErr: true,
+ wantErr: "(line 2:11): unexpected token: [",
}, {
desc: "proto3 duplicate singular field",
inputMessage: &pb3.Scalars{},
@@ -336,7 +330,7 @@
s_bool: false
s_bool: true
`,
- wantErr: true,
+ wantErr: `non-repeated field "s_bool" is repeated`,
}, {
desc: "proto3 more duplicate singular field",
inputMessage: &pb3.Scalars{},
@@ -345,7 +339,7 @@
s_string: ""
s_bool: true
`,
- wantErr: true,
+ wantErr: `non-repeated field "s_bool" is repeated`,
}, {
desc: "proto2 enum",
inputMessage: &pb2.Enums{},
@@ -386,7 +380,7 @@
opt_enum: UNNAMED
opt_nested_enum: UNNAMED_TOO
`,
- wantErr: true,
+ wantErr: "invalid value for enum type: UNNAMED",
}, {
desc: "proto3 enum name value",
inputMessage: &pb3.Enums{},
@@ -432,10 +426,21 @@
Optgroup: &pb2.Nests_OptGroup{},
},
}, {
+ desc: "message fields with no field separator",
+ inputMessage: &pb2.Nests{},
+ inputText: `
+opt_nested {}
+OptGroup {}
+`,
+ wantMessage: &pb2.Nests{
+ OptNested: &pb2.Nested{},
+ Optgroup: &pb2.Nests_OptGroup{},
+ },
+ }, {
desc: "group field name",
inputMessage: &pb2.Nests{},
inputText: `optgroup: {}`,
- wantErr: true,
+ wantErr: "unknown field: optgroup",
}, {
desc: "proto2 nested messages",
inputMessage: &pb2.Nests{},
@@ -488,7 +493,7 @@
s_string: "abc\xff"
}
`,
- wantErr: true,
+ wantErr: "contains invalid UTF-8",
}, {
desc: "oneof set to empty string",
inputMessage: &pb3.Oneofs{},
@@ -545,7 +550,7 @@
oneof_enum: ZERO
oneof_string: "hello"
`,
- wantErr: true,
+ wantErr: `error parsing "oneof_string", oneof pb3.Oneofs.union is already set`,
}, {
desc: "repeated scalar using same field name",
inputMessage: &pb2.Repeats{},
@@ -580,7 +585,7 @@
desc: "repeated contains invalid UTF-8",
inputMessage: &pb2.Repeats{},
inputText: `rpt_string: "abc\xff"`,
- wantErr: true,
+ wantErr: "contains invalid UTF-8",
}, {
desc: "repeated enums",
inputMessage: &pb2.Enums{},
@@ -643,6 +648,75 @@
},
},
}, {
+ desc: "repeated message fields without field separator",
+ inputMessage: &pb2.Nests{},
+ inputText: `
+rpt_nested {
+ opt_string: "repeat nested one"
+}
+rpt_nested: [
+ {
+ opt_string: "repeat nested two"
+ },
+ {}
+]
+`,
+ wantMessage: &pb2.Nests{
+ RptNested: []*pb2.Nested{
+ {
+ OptString: proto.String("repeat nested one"),
+ },
+ {
+ OptString: proto.String("repeat nested two"),
+ },
+ {},
+ },
+ },
+ }, {
+ desc: "bools",
+ inputMessage: &pb2.Repeats{},
+ inputText: `
+rpt_bool: [ True, true, t, 1, False, false, f, 0 ]
+`,
+ wantMessage: &pb2.Repeats{
+ RptBool: []bool{true, true, true, true, false, false, false, false},
+ },
+ }, {
+ desc: "special floats and doubles",
+ inputMessage: &pb2.Repeats{},
+ inputText: `
+rpt_float: [ inf, Inf, infinity, InFiniTy, -inf, -inF, -infinitY, -InfinitY, nan, NaN, Nan ],
+rpt_double: [ inf, Inf, infinity, InFiniTy, -inf, -inF, -infinitY, -InfinitY, nan, NaN, Nan ],
+`,
+ wantMessage: &pb2.Repeats{
+ RptFloat: []float32{
+ float32(math.Inf(1)),
+ float32(math.Inf(1)),
+ float32(math.Inf(1)),
+ float32(math.Inf(1)),
+ float32(math.Inf(-1)),
+ float32(math.Inf(-1)),
+ float32(math.Inf(-1)),
+ float32(math.Inf(-1)),
+ float32(math.NaN()),
+ float32(math.NaN()),
+ float32(math.NaN()),
+ },
+ RptDouble: []float64{
+ math.Inf(1),
+ math.Inf(1),
+ math.Inf(1),
+ math.Inf(1),
+ math.Inf(-1),
+ math.Inf(-1),
+ math.Inf(-1),
+ math.Inf(-1),
+ math.NaN(),
+ math.NaN(),
+ math.NaN(),
+ },
+ },
+ }, {
desc: "map fields 1",
inputMessage: &pb3.Maps{},
inputText: `
@@ -650,7 +724,7 @@
key: -101
value: "-101"
}
-int32_to_str: {
+int32_to_str {
key: 0
value: "zero"
}
@@ -662,7 +736,7 @@
key: 255
value: "0xff"
}
-bool_to_uint32: {
+bool_to_uint32 {
key: true
value: 42
}
@@ -708,7 +782,7 @@
inputText: `
str_to_nested: {
key: "nested_one"
- value: {
+ value {
s_string: "nested in a map"
}
}
@@ -783,7 +857,7 @@
value: "cero"
}
`,
- wantErr: true,
+ wantErr: `map entry "key" cannot be repeated`,
}, {
desc: "map contains duplicate value fields",
inputMessage: &pb3.Maps{},
@@ -794,7 +868,7 @@
value: "uno"
}
`,
- wantErr: true,
+ wantErr: `map entry "value" cannot be repeated`,
}, {
desc: "map contains missing key",
inputMessage: &pb3.Maps{},
@@ -899,7 +973,7 @@
value: "abc\xff"
}
`,
- wantErr: true,
+ wantErr: "contains invalid UTF-8",
}, {
desc: "map field key contains invalid UTF-8",
inputMessage: &pb3.Maps{},
@@ -908,7 +982,7 @@
value: {}
}
`,
- wantErr: true,
+ wantErr: "contains invalid UTF-8",
}, {
desc: "map contains unknown field",
inputMessage: &pb3.Maps{},
@@ -919,7 +993,7 @@
unknown: "bad"
}
`,
- wantErr: true,
+ wantErr: `(line 5:3): unknown map entry field "unknown"`,
}, {
desc: "map contains extension-like key field",
inputMessage: &pb3.Maps{},
@@ -929,7 +1003,7 @@
value: "ten"
}
`,
- wantErr: true,
+ wantErr: `unknown map entry field "[key]"`,
}, {
desc: "map contains invalid key",
inputMessage: &pb3.Maps{},
@@ -939,7 +1013,7 @@
value: "cero"
}
`,
- wantErr: true,
+ wantErr: "(line 3:8): invalid value for int32 type",
}, {
desc: "map contains invalid value",
inputMessage: &pb3.Maps{},
@@ -949,7 +1023,7 @@
value: 101
}
`,
- wantErr: true,
+ wantErr: "(line 4:10): invalid value for string type",
}, {
desc: "map contains invalid message value",
inputMessage: &pb3.Maps{},
@@ -959,7 +1033,7 @@
value: 1
}
`,
- wantErr: true,
+ wantErr: "syntax error (line 4:10): unexpected token: 1",
}, {
desc: "map using mix of [] and repeated",
inputMessage: &pb3.Maps{},
@@ -996,7 +1070,7 @@
}, {
desc: "required fields not set",
inputMessage: &pb2.Requireds{},
- wantErr: true,
+ wantErr: "required field",
}, {
desc: "required field set",
inputMessage: &pb2.PartialRequired{},
@@ -1019,7 +1093,7 @@
ReqString: proto.String("hello"),
ReqEnum: pb2.Enum_ONE.Enum(),
},
- wantErr: true,
+ wantErr: "required field",
}, {
desc: "required fields partially set with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@@ -1062,7 +1136,7 @@
wantMessage: &pb2.IndirectRequired{
OptNested: &pb2.NestedWithRequired{},
},
- wantErr: true,
+ wantErr: "required field",
}, {
desc: "indirect required field with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@@ -1088,7 +1162,7 @@
{},
},
},
- wantErr: true,
+ wantErr: "required field",
}, {
desc: "indirect required field in repeated with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@@ -1129,7 +1203,7 @@
},
},
},
- wantErr: true,
+ wantErr: "required field",
}, {
desc: "indirect required field in map with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@@ -1163,7 +1237,7 @@
OneofNested: &pb2.NestedWithRequired{},
},
},
- wantErr: true,
+ wantErr: "required field",
}, {
desc: "indirect required field in oneof with AllowPartial",
umo: prototext.UnmarshalOptions{AllowPartial: true},
@@ -1217,7 +1291,7 @@
desc: "extension field contains invalid UTF-8",
inputMessage: &pb2.Extensions{},
inputText: `[pb2.opt_ext_string]: "abc\xff"`,
- wantErr: true,
+ wantErr: "contains invalid UTF-8",
}, {
desc: "extensions of repeated fields",
inputMessage: &pb2.Extensions{},
@@ -1313,7 +1387,7 @@
desc: "invalid extension field name",
inputMessage: &pb2.Extensions{},
inputText: "[pb2.invalid_message_field]: true",
- wantErr: true,
+ wantErr: "unknown field",
}, {
desc: "MessageSet",
inputMessage: &pb2.MessageSet{},
@@ -1366,7 +1440,7 @@
opt_string: "not a messageset extension"
}
`,
- wantErr: true,
+ wantErr: "unknown field: [pb2.FakeMessageSetExtension]",
skip: !flags.ProtoLegacy,
}, {
desc: "not real MessageSet 3",
@@ -1474,18 +1548,18 @@
s_string: "abc\xff"
}
`,
- wantErr: true,
+ wantErr: "contains invalid UTF-8",
}, {
desc: "Any expanded with unregistered type",
umo: prototext.UnmarshalOptions{Resolver: new(preg.Types)},
inputMessage: &anypb.Any{},
inputText: `[SomeMessage]: {}`,
- wantErr: true,
+ wantErr: "unable to resolve message [SomeMessage]",
}, {
desc: "Any expanded with invalid value",
inputMessage: &anypb.Any{},
inputText: `[pb2.Nested]: 123`,
- wantErr: true,
+ wantErr: "unexpected token: 123",
}, {
desc: "Any expanded with unknown fields",
inputMessage: &anypb.Any{},
@@ -1493,7 +1567,7 @@
[pb2.Nested]: {}
unknown: ""
`,
- wantErr: true,
+ wantErr: `invalid field name "unknown" in google.protobuf.Any message`,
}, {
desc: "Any contains expanded and unexpanded fields",
inputMessage: &anypb.Any{},
@@ -1501,7 +1575,7 @@
[pb2.Nested]: {}
type_url: "pb2.Nested"
`,
- wantErr: true,
+ wantErr: "(line 3:1): conflict with [pb2.Nested] field",
}, {
desc: "weak fields",
inputMessage: &testpb.TestWeak{},
@@ -1516,7 +1590,7 @@
desc: "weak fields; unknown field",
inputMessage: &testpb.TestWeak{},
inputText: `weak_message1:{a:1} weak_message2:{a:1}`,
- wantErr: true, // weak_message2 is unknown since the package containing it is not imported
+ wantErr: "unknown field: weak_message2", // weak_message2 is unknown since the package containing it is not imported
skip: !flags.ProtoLegacy,
}}
@@ -1527,11 +1601,17 @@
}
t.Run(tt.desc, func(t *testing.T) {
err := tt.umo.Unmarshal([]byte(tt.inputText), tt.inputMessage)
- if err != nil && !tt.wantErr {
- t.Errorf("Unmarshal() returned error: %v\n\n", err)
+ if err != nil {
+ if tt.wantErr == "" {
+ t.Errorf("Unmarshal() got unexpected error: %v", err)
+ } else if !strings.Contains(err.Error(), tt.wantErr) {
+ t.Errorf("Unmarshal() error got %q, want %q", err, tt.wantErr)
+ }
+ return
}
- if err == nil && tt.wantErr {
- t.Error("Unmarshal() got nil error, want error\n\n")
+ if tt.wantErr != "" {
+ t.Errorf("Unmarshal() got nil error, want error %q", tt.wantErr)
+ return
}
if tt.wantMessage != nil && !proto.Equal(tt.inputMessage, tt.wantMessage) {
t.Errorf("Unmarshal()\n<got>\n%v\n<want>\n%v\n", tt.inputMessage, tt.wantMessage)
diff --git a/encoding/prototext/encode.go b/encoding/prototext/encode.go
index 1fa5fe4..a2211e9 100644
--- a/encoding/prototext/encode.go
+++ b/encoding/prototext/encode.go
@@ -7,6 +7,7 @@
import (
"fmt"
"sort"
+ "strconv"
"unicode/utf8"
"google.golang.org/protobuf/internal/encoding/messageset"
@@ -90,6 +91,9 @@
// MarshalOptions object. Do not depend on the output being stable. It may
// change over time across different versions of the program.
func (o MarshalOptions) Marshal(m proto.Message) ([]byte, error) {
+ const outputASCII = false
+ var delims = [2]byte{'{', '}'}
+
if o.Multiline && o.Indent == "" {
o.Indent = defaultIndent
}
@@ -97,41 +101,52 @@
o.Resolver = protoregistry.GlobalTypes
}
- v, err := o.marshalMessage(m.ProtoReflect())
+ internalEnc, err := text.NewEncoder(o.Indent, delims, outputASCII)
if err != nil {
return nil, err
}
- delims := [2]byte{'{', '}'}
- const outputASCII = false
- b, err := text.Marshal(v, o.Indent, delims, outputASCII)
+ enc := encoder{internalEnc, o}
+ err = enc.marshalMessage(m.ProtoReflect(), false)
if err != nil {
return nil, err
}
+ out := enc.Bytes()
+ if len(o.Indent) > 0 && len(out) > 0 {
+ out = append(out, '\n')
+ }
if o.AllowPartial {
- return b, nil
+ return out, nil
}
- return b, proto.IsInitialized(m)
+ return out, proto.IsInitialized(m)
}
-// marshalMessage converts a protoreflect.Message to a text.Value.
-func (o MarshalOptions) marshalMessage(m pref.Message) (text.Value, error) {
+type encoder struct {
+ *text.Encoder
+ opts MarshalOptions
+}
+
+// marshalMessage marshals the given protoreflect.Message.
+func (e encoder) marshalMessage(m pref.Message, inclDelims bool) error {
messageDesc := m.Descriptor()
if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
- return text.Value{}, errors.New("no support for proto1 MessageSets")
+ return errors.New("no support for proto1 MessageSets")
+ }
+
+ if inclDelims {
+ e.StartMessage()
+ defer e.EndMessage()
}
// Handle Any expansion.
if messageDesc.FullName() == "google.protobuf.Any" {
- if msg, err := o.marshalAny(m); err == nil {
- // Return as is if no error.
- return msg, nil
+ if e.marshalAny(m) {
+ return nil
}
- // Otherwise continue on to marshal Any as a regular message.
+ // If unable to expand, continue on to marshal Any as a regular message.
}
- // Handle known fields.
- var msgFields [][2]text.Value
+ // Marshal known fields.
fieldDescs := messageDesc.Fields()
size := fieldDescs.Len()
for i := 0; i < size; {
@@ -142,262 +157,254 @@
} else {
i++
}
+
if fd == nil || !m.Has(fd) {
continue
}
- name := text.ValueOf(fd.Name())
+ name := fd.Name()
// Use type name for group field name.
if fd.Kind() == pref.GroupKind {
- name = text.ValueOf(fd.Message().Name())
+ name = fd.Message().Name()
}
- pval := m.Get(fd)
- var err error
- msgFields, err = o.appendField(msgFields, name, pval, fd)
- if err != nil {
- return text.Value{}, err
+ val := m.Get(fd)
+ if err := e.marshalField(string(name), val, fd); err != nil {
+ return err
}
}
- // Handle extensions.
- var err error
- msgFields, err = o.appendExtensions(msgFields, m)
- if err != nil {
- return text.Value{}, err
+ // Marshal extensions.
+ if err := e.marshalExtensions(m); err != nil {
+ return err
}
- // Handle unknown fields.
- if o.EmitUnknown {
- msgFields = appendUnknown(msgFields, m.GetUnknown())
+ // Marshal unknown fields.
+ if e.opts.EmitUnknown {
+ e.marshalUnknown(m.GetUnknown())
}
- return text.ValueOf(msgFields), nil
+ return nil
}
-// appendField marshals a protoreflect.Value and appends it to the given [][2]text.Value.
-func (o MarshalOptions) appendField(msgFields [][2]text.Value, name text.Value, pval pref.Value, fd pref.FieldDescriptor) ([][2]text.Value, error) {
+// marshalField marshals the given field with protoreflect.Value.
+func (e encoder) marshalField(name string, val pref.Value, fd pref.FieldDescriptor) error {
switch {
case fd.IsList():
- items, err := o.marshalList(pval.List(), fd)
- if err != nil {
- return msgFields, err
- }
-
- for _, item := range items {
- msgFields = append(msgFields, [2]text.Value{name, item})
- }
+ return e.marshalList(name, val.List(), fd)
case fd.IsMap():
- items, err := o.marshalMap(pval.Map(), fd)
- if err != nil {
- return msgFields, err
- }
-
- for _, item := range items {
- msgFields = append(msgFields, [2]text.Value{name, item})
- }
+ return e.marshalMap(name, val.Map(), fd)
default:
- tval, err := o.marshalSingular(pval, fd)
- if err != nil {
- return msgFields, err
- }
- msgFields = append(msgFields, [2]text.Value{name, tval})
+ e.WriteName(name)
+ return e.marshalSingular(val, fd)
}
-
- return msgFields, nil
}
-// marshalSingular converts a non-repeated field value to text.Value.
-// This includes all scalar types, enums, messages, and groups.
-func (o MarshalOptions) marshalSingular(val pref.Value, fd pref.FieldDescriptor) (text.Value, error) {
+// marshalSingular marshals the given non-repeated field value. This includes
+// all scalar types, enums, messages, and groups.
+func (e encoder) marshalSingular(val pref.Value, fd pref.FieldDescriptor) error {
kind := fd.Kind()
switch kind {
- case pref.BoolKind,
- pref.Int32Kind, pref.Sint32Kind, pref.Uint32Kind,
- pref.Int64Kind, pref.Sint64Kind, pref.Uint64Kind,
- pref.Sfixed32Kind, pref.Fixed32Kind,
- pref.Sfixed64Kind, pref.Fixed64Kind,
- pref.FloatKind, pref.DoubleKind,
- pref.BytesKind:
- return text.ValueOf(val.Interface()), nil
+ case pref.BoolKind:
+ e.WriteBool(val.Bool())
case pref.StringKind:
s := val.String()
if !utf8.ValidString(s) {
- return text.Value{}, errors.InvalidUTF8(string(fd.FullName()))
+ return errors.InvalidUTF8(string(fd.FullName()))
}
- return text.ValueOf(s), nil
+ e.WriteString(s)
+
+ case pref.Int32Kind, pref.Int64Kind,
+ pref.Sint32Kind, pref.Sint64Kind,
+ pref.Sfixed32Kind, pref.Sfixed64Kind:
+ e.WriteInt(val.Int())
+
+ case pref.Uint32Kind, pref.Uint64Kind,
+ pref.Fixed32Kind, pref.Fixed64Kind:
+ e.WriteUint(val.Uint())
+
+ case pref.FloatKind:
+ // Encoder.WriteFloat handles the special numbers NaN and infinites.
+ e.WriteFloat(val.Float(), 32)
+
+ case pref.DoubleKind:
+ // Encoder.WriteFloat handles the special numbers NaN and infinites.
+ e.WriteFloat(val.Float(), 64)
+
+ case pref.BytesKind:
+ e.WriteString(string(val.Bytes()))
case pref.EnumKind:
num := val.Enum()
if desc := fd.Enum().Values().ByNumber(num); desc != nil {
- return text.ValueOf(desc.Name()), nil
+ e.WriteLiteral(string(desc.Name()))
+ } else {
+ // Use numeric value if there is no enum description.
+ e.WriteInt(int64(num))
}
- // Use numeric value if there is no enum description.
- return text.ValueOf(int32(num)), nil
case pref.MessageKind, pref.GroupKind:
- return o.marshalMessage(val.Message())
- }
+ return e.marshalMessage(val.Message(), true)
- panic(fmt.Sprintf("%v has unknown kind: %v", fd.FullName(), kind))
+ default:
+ panic(fmt.Sprintf("%v has unknown kind: %v", fd.FullName(), kind))
+ }
+ return nil
}
-// marshalList converts a protoreflect.List to []text.Value.
-func (o MarshalOptions) marshalList(list pref.List, fd pref.FieldDescriptor) ([]text.Value, error) {
+// marshalList marshals the given protoreflect.List as multiple name-value fields.
+func (e encoder) marshalList(name string, list pref.List, fd pref.FieldDescriptor) error {
size := list.Len()
- values := make([]text.Value, 0, size)
-
for i := 0; i < size; i++ {
- item := list.Get(i)
- val, err := o.marshalSingular(item, fd)
- if err != nil {
- // Return already marshaled values.
- return values, err
+ e.WriteName(name)
+ if err := e.marshalSingular(list.Get(i), fd); err != nil {
+ return err
}
- values = append(values, val)
}
-
- return values, nil
+ return nil
}
-var (
- mapKeyName = text.ValueOf(pref.Name("key"))
- mapValueName = text.ValueOf(pref.Name("value"))
-)
-
-// marshalMap converts a protoreflect.Map to []text.Value.
-func (o MarshalOptions) marshalMap(mmap pref.Map, fd pref.FieldDescriptor) ([]text.Value, error) {
- // values is a list of messages.
- values := make([]text.Value, 0, mmap.Len())
-
+// marshalMap marshals the given protoreflect.Map as multiple name-value fields.
+func (e encoder) marshalMap(name string, mmap pref.Map, fd pref.FieldDescriptor) error {
var err error
mapsort.Range(mmap, fd.MapKey().Kind(), func(key pref.MapKey, val pref.Value) bool {
- var keyTxtVal text.Value
- keyTxtVal, err = o.marshalSingular(key.Value(), fd.MapKey())
+ e.WriteName(name)
+ e.StartMessage()
+ defer e.EndMessage()
+
+ e.WriteName("key")
+ err = e.marshalSingular(key.Value(), fd.MapKey())
if err != nil {
return false
}
- var valTxtVal text.Value
- valTxtVal, err = o.marshalSingular(val, fd.MapValue())
+
+ e.WriteName("value")
+ err = e.marshalSingular(val, fd.MapValue())
if err != nil {
return false
}
- // Map entry (message) contains 2 fields, first field for key and second field for value.
- msg := text.ValueOf([][2]text.Value{
- {mapKeyName, keyTxtVal},
- {mapValueName, valTxtVal},
- })
- values = append(values, msg)
- err = nil
return true
})
- if err != nil {
- return nil, err
- }
-
- return values, nil
+ return err
}
-// appendExtensions marshals extension fields and appends them to the given [][2]text.Value.
-func (o MarshalOptions) appendExtensions(msgFields [][2]text.Value, m pref.Message) ([][2]text.Value, error) {
- var err error
- var entries [][2]text.Value
+// marshalExtensions marshals extension fields.
+func (e encoder) marshalExtensions(m pref.Message) error {
+ type entry struct {
+ key string
+ value pref.Value
+ desc pref.FieldDescriptor
+ }
+
+ // Get a sorted list based on field key first.
+ var entries []entry
m.Range(func(fd pref.FieldDescriptor, v pref.Value) bool {
if !fd.IsExtension() {
return true
}
-
// For MessageSet extensions, the name used is the parent message.
name := fd.FullName()
if messageset.IsMessageSetExtension(fd) {
name = name.Parent()
}
-
- // Use string type to produce [name] format.
- tname := text.ValueOf(string(name))
- entries, err = o.appendField(entries, tname, v, fd)
- if err != nil {
- return false
- }
- err = nil
+ entries = append(entries, entry{
+ key: string(name),
+ value: v,
+ desc: fd,
+ })
return true
})
- if err != nil {
- return msgFields, err
- }
-
- // Sort extensions lexicographically and append to output.
- sort.SliceStable(entries, func(i, j int) bool {
- return entries[i][0].String() < entries[j][0].String()
+ // Sort extensions lexicographically.
+ sort.Slice(entries, func(i, j int) bool {
+ return entries[i].key < entries[j].key
})
- return append(msgFields, entries...), nil
+
+ // Write out sorted list.
+ for _, entry := range entries {
+ // Extension field name is the proto field name enclosed in [].
+ name := "[" + entry.key + "]"
+ if err := e.marshalField(name, entry.value, entry.desc); err != nil {
+ return err
+ }
+ }
+ return nil
}
-// appendUnknown parses the given []byte and appends field(s) into the given fields slice.
+// marshalUnknown parses the given []byte and marshals fields out.
// This function assumes proper encoding in the given []byte.
-func appendUnknown(fields [][2]text.Value, b []byte) [][2]text.Value {
+func (e encoder) marshalUnknown(b []byte) {
+ const dec = 10
+ const hex = 16
for len(b) > 0 {
- var value interface{}
num, wtype, n := wire.ConsumeTag(b)
b = b[n:]
+ e.WriteName(strconv.FormatInt(int64(num), dec))
switch wtype {
case wire.VarintType:
- value, n = wire.ConsumeVarint(b)
+ var v uint64
+ v, n = wire.ConsumeVarint(b)
+ e.WriteUint(v)
case wire.Fixed32Type:
- value, n = wire.ConsumeFixed32(b)
+ var v uint32
+ v, n = wire.ConsumeFixed32(b)
+ e.WriteLiteral("0x" + strconv.FormatUint(uint64(v), hex))
case wire.Fixed64Type:
- value, n = wire.ConsumeFixed64(b)
+ var v uint64
+ v, n = wire.ConsumeFixed64(b)
+ e.WriteLiteral("0x" + strconv.FormatUint(v, hex))
case wire.BytesType:
- value, n = wire.ConsumeBytes(b)
+ var v []byte
+ v, n = wire.ConsumeBytes(b)
+ e.WriteString(string(v))
case wire.StartGroupType:
+ e.StartMessage()
var v []byte
v, n = wire.ConsumeGroup(num, b)
- var msg [][2]text.Value
- value = appendUnknown(msg, v)
+ e.marshalUnknown(v)
+ e.EndMessage()
default:
- panic(fmt.Sprintf("error parsing unknown field wire type: %v", wtype))
+ panic(fmt.Sprintf("prototext: error parsing unknown field wire type: %v", wtype))
}
- fields = append(fields, [2]text.Value{text.ValueOf(uint32(num)), text.ValueOf(value)})
b = b[n:]
}
- return fields
}
-// marshalAny converts a google.protobuf.Any protoreflect.Message to a text.Value.
-func (o MarshalOptions) marshalAny(m pref.Message) (text.Value, error) {
- fds := m.Descriptor().Fields()
+// marshalAny marshals the given google.protobuf.Any message in expanded form.
+// It returns true if it was able to marshal, else false.
+func (e encoder) marshalAny(any pref.Message) bool {
+ // Construct the embedded message.
+ fds := any.Descriptor().Fields()
fdType := fds.ByNumber(fieldnum.Any_TypeUrl)
- fdValue := fds.ByNumber(fieldnum.Any_Value)
-
- typeURL := m.Get(fdType).String()
- value := m.Get(fdValue)
-
- emt, err := o.Resolver.FindMessageByURL(typeURL)
+ typeURL := any.Get(fdType).String()
+ mt, err := e.opts.Resolver.FindMessageByURL(typeURL)
if err != nil {
- return text.Value{}, err
+ return false
}
- em := emt.New().Interface()
+ m := mt.New().Interface()
+
+ // Unmarshal bytes into embedded message.
+ fdValue := fds.ByNumber(fieldnum.Any_Value)
+ value := any.Get(fdValue)
err = proto.UnmarshalOptions{
AllowPartial: true,
- Resolver: o.Resolver,
- }.Unmarshal(value.Bytes(), em)
+ Resolver: e.opts.Resolver,
+ }.Unmarshal(value.Bytes(), m)
if err != nil {
- return text.Value{}, err
+ return false
}
- msg, err := o.marshalMessage(em.ProtoReflect())
+ // Get current encoder position. If marshaling fails, reset encoder output
+ // back to this position.
+ pos := e.Snapshot()
+
+ // Field name is the proto field name enclosed in [].
+ e.WriteName("[" + typeURL + "]")
+ err = e.marshalMessage(m.ProtoReflect(), true)
if err != nil {
- return text.Value{}, err
+ e.Reset(pos)
+ return false
}
- // Expanded Any field value contains only a single field with the type_url field value as the
- // field name in [] and a text marshaled field value of the embedded message.
- msgFields := [][2]text.Value{
- {
- text.ValueOf(typeURL),
- msg,
- },
- }
- return text.ValueOf(msgFields), nil
+ return true
}
diff --git a/encoding/prototext/encode_test.go b/encoding/prototext/encode_test.go
index 33daea0..db113bc 100644
--- a/encoding/prototext/encode_test.go
+++ b/encoding/prototext/encode_test.go
@@ -845,7 +845,7 @@
m.ProtoReflect().SetUnknown(pack.Message{
pack.Tag{101, pack.VarintType}, pack.Bool(true),
pack.Tag{102, pack.VarintType}, pack.Varint(0xff),
- pack.Tag{103, pack.Fixed32Type}, pack.Uint32(47),
+ pack.Tag{103, pack.Fixed32Type}, pack.Uint32(0x47),
pack.Tag{104, pack.Fixed64Type}, pack.Int64(0xdeadbeef),
}.Marshal())
return m
@@ -853,8 +853,8 @@
want: `opt_string: "this message contains unknown fields"
101: 1
102: 255
-103: 47
-104: 3735928559
+103: 0x47
+104: 0xdeadbeef
`,
}, {
desc: "unknown length-delimited",
@@ -1206,6 +1206,93 @@
want: `type_url: "foo/pb2.Nested"
value: "\x80"
`,
+ }, {
+ desc: "Any expanded in another message",
+ input: func() *pb2.KnownTypes {
+ m1 := &pb2.Nested{
+ OptString: proto.String("message inside Any of another Any field"),
+ }
+ b1, err := proto.MarshalOptions{Deterministic: true}.Marshal(m1)
+ if err != nil {
+ t.Fatalf("error in binary marshaling message for Any.value: %v", err)
+ }
+ m2 := &anypb.Any{
+ TypeUrl: "pb2.Nested",
+ Value: b1,
+ }
+ b2, err := proto.MarshalOptions{Deterministic: true}.Marshal(m2)
+ if err != nil {
+ t.Fatalf("error in binary marshaling message for Any.value: %v", err)
+ }
+ return &pb2.KnownTypes{
+ OptAny: &anypb.Any{
+ TypeUrl: "google.protobuf.Any",
+ Value: b2,
+ },
+ }
+ }(),
+ want: `opt_any: {
+ [google.protobuf.Any]: {
+ [pb2.Nested]: {
+ opt_string: "message inside Any of another Any field"
+ }
+ }
+}
+`,
+ }, {
+ desc: "Any not expanded due to invalid UTF-8",
+ input: func() *pb2.KnownTypes {
+ m := &pb2.Nested{
+ OptString: proto.String("invalid UTF-8 abc\xff"),
+ }
+ b, err := proto.MarshalOptions{Deterministic: true}.Marshal(m)
+ if err != nil {
+ t.Fatalf("error in binary marshaling message for Any.value: %v", err)
+ }
+ return &pb2.KnownTypes{
+ OptAny: &anypb.Any{
+ TypeUrl: "pb2.Nested",
+ Value: b,
+ },
+ }
+ }(),
+ want: `opt_any: {
+ type_url: "pb2.Nested"
+ value: "\n\x12invalid UTF-8 abc\xff"
+}
+`,
+ }, {
+ desc: "Any inside Any not expanded",
+ input: func() *pb2.KnownTypes {
+ m1 := &pb2.Nested{
+ OptString: proto.String("invalid UTF-8 abc\xff"),
+ }
+ b1, err := proto.MarshalOptions{Deterministic: true}.Marshal(m1)
+ if err != nil {
+ t.Fatalf("error in binary marshaling message for Any.value: %v", err)
+ }
+ m2 := &anypb.Any{
+ TypeUrl: "pb2.Nested",
+ Value: b1,
+ }
+ b2, err := proto.MarshalOptions{Deterministic: true}.Marshal(m2)
+ if err != nil {
+ t.Fatalf("error in binary marshaling message for Any.value: %v", err)
+ }
+ return &pb2.KnownTypes{
+ OptAny: &anypb.Any{
+ TypeUrl: "google.protobuf.Any",
+ Value: b2,
+ },
+ }
+ }(),
+ want: `opt_any: {
+ [google.protobuf.Any]: {
+ type_url: "pb2.Nested"
+ value: "\n\x12invalid UTF-8 abc\xff"
+ }
+}
+`,
}}
for _, tt := range tests {
diff --git a/internal/conformance/failing_tests_text_format.txt b/internal/conformance/failing_tests_text_format.txt
index f2a30e8..570abde 100644
--- a/internal/conformance/failing_tests_text_format.txt
+++ b/internal/conformance/failing_tests_text_format.txt
@@ -1,9 +1 @@
Recommended.Proto3.ProtobufInput.MessageUnknownFields_Print.TextFormatOutput
-Required.Proto3.TextFormatInput.FloatFieldLargerThanUint64.ProtobufOutput
-Required.Proto3.TextFormatInput.FloatFieldLargerThanUint64.TextFormatOutput
-Required.Proto3.TextFormatInput.FloatFieldMaxValue.ProtobufOutput
-Required.Proto3.TextFormatInput.FloatFieldMaxValue.TextFormatOutput
-Required.Proto3.TextFormatInput.FloatFieldNaNValue.ProtobufOutput
-Required.Proto3.TextFormatInput.FloatFieldNaNValue.TextFormatOutput
-Required.Proto3.TextFormatInput.FloatFieldTooLarge.ProtobufOutput
-Required.Proto3.TextFormatInput.FloatFieldTooLarge.TextFormatOutput
diff --git a/internal/encoding/defval/default.go b/internal/encoding/defval/default.go
index c1d81a1..fdd9b13 100644
--- a/internal/encoding/defval/default.go
+++ b/internal/encoding/defval/default.go
@@ -175,13 +175,11 @@
func unmarshalBytes(s string) ([]byte, bool) {
// Bytes values use the same escaping as the text format,
// however they lack the surrounding double quotes.
- // TODO: Export unmarshalString in the text package to avoid this hack.
- v, err := ptext.Unmarshal([]byte(`["` + s + `"]:0`))
- if err == nil && len(v.Message()) == 1 {
- s := v.Message()[0][0].String()
- return []byte(s), true
+ v, err := ptext.UnmarshalString(`"` + s + `"`)
+ if err != nil {
+ return nil, false
}
- return nil, false
+ return []byte(v), true
}
// marshalBytes serializes bytes by using C escaping.
diff --git a/internal/encoding/text/decode.go b/internal/encoding/text/decode.go
index c0513a8..8ab2eb4 100644
--- a/internal/encoding/text/decode.go
+++ b/internal/encoding/text/decode.go
@@ -6,238 +6,524 @@
import (
"bytes"
+ "fmt"
"io"
"regexp"
"strconv"
"unicode/utf8"
"google.golang.org/protobuf/internal/errors"
- "google.golang.org/protobuf/reflect/protoreflect"
)
-type syntaxError struct{ error }
+// Decoder is a token-based textproto decoder.
+type Decoder struct {
+ // lastCall is last method called, either readCall or peekCall.
+ // Initial value is readCall.
+ lastCall call
-func newSyntaxError(f string, x ...interface{}) error {
- return syntaxError{errors.New(f, x...)}
-}
+ // lastToken contains the last read token.
+ lastToken Token
-// Unmarshal parses b as the proto text format.
-// It returns a Value, which is always of the Message type.
-func Unmarshal(b []byte) (Value, error) {
- p := decoder{in: b}
- p.consume(0) // trim leading spaces or comments
- v, err := p.unmarshalMessage(false)
- if err != nil {
- if e, ok := err.(syntaxError); ok {
- b = b[:len(b)-len(p.in)] // consumed input
- line := bytes.Count(b, []byte("\n")) + 1
- if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
- b = b[i+1:]
- }
- column := utf8.RuneCount(b) + 1 // ignore multi-rune characters
- err = errors.New("syntax error (line %d:%d): %v", line, column, e.error)
- }
- return Value{}, err
- }
- if len(p.in) > 0 {
- return Value{}, errors.New("%d bytes of unconsumed input", len(p.in))
- }
- return v, nil
-}
+ // lastErr contains the last read error.
+ lastErr error
-type decoder struct {
+ // openStack is a stack containing the byte characters for MessageOpen and
+ // ListOpen kinds. The top of stack represents the message or the list that
+ // the current token is nested in. An empty stack means the current token is
+ // at the top level message. The characters '{' and '<' both represent the
+ // MessageOpen kind.
+ openStack []byte
+
+ // orig is used in reporting line and column.
+ orig []byte
+ // in contains the unconsumed input.
in []byte
}
-func (p *decoder) unmarshalList() (Value, error) {
- b := p.in
- var elems []Value
- if err := p.consumeChar('[', "at start of list"); err != nil {
- return Value{}, err
+// NewDecoder returns a Decoder to read the given []byte.
+func NewDecoder(b []byte) *Decoder {
+ return &Decoder{orig: b, in: b}
+}
+
+// call specifies which Decoder method was invoked.
+type call uint8
+
+const (
+ readCall call = iota
+ peekCall
+)
+
+// Peek looks ahead and returns the next token and error without advancing a read.
+func (d *Decoder) Peek() (Token, error) {
+ defer func() { d.lastCall = peekCall }()
+ if d.lastCall == readCall {
+ d.lastToken, d.lastErr = d.Read()
}
- if len(p.in) > 0 && p.in[0] != ']' {
- for len(p.in) > 0 {
- v, err := p.unmarshalValue()
- if err != nil {
- return Value{}, err
+ return d.lastToken, d.lastErr
+}
+
+// Read returns the next token.
+// It will return an error if there is no valid token.
+func (d *Decoder) Read() (Token, error) {
+ defer func() { d.lastCall = readCall }()
+ if d.lastCall == peekCall {
+ return d.lastToken, d.lastErr
+ }
+
+ tok, err := d.parseNext(d.lastToken.Kind())
+ if err != nil {
+ return Token{}, err
+ }
+
+ switch tok.kind {
+ case comma, semicolon:
+ tok, err = d.parseNext(tok.kind)
+ if err != nil {
+ return Token{}, err
+ }
+ }
+ d.lastToken = tok
+ return tok, nil
+}
+
+const (
+ mismatchedFmt = "mismatched close character %q"
+ unexpectedFmt = "unexpected character %q"
+)
+
+// parseNext parses the next Token based on given last kind.
+func (d *Decoder) parseNext(lastKind Kind) (Token, error) {
+ // Trim leading spaces.
+ d.consume(0)
+ isEOF := false
+ if len(d.in) == 0 {
+ isEOF = true
+ }
+
+ switch lastKind {
+ case EOF:
+ return d.consumeToken(EOF, 0, 0), nil
+
+ case bof:
+ // Start of top level message. Next token can be EOF or Name.
+ if isEOF {
+ return d.consumeToken(EOF, 0, 0), nil
+ }
+ return d.parseFieldName()
+
+ case Name:
+ // Next token can be MessageOpen, ListOpen or Scalar.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case '{', '<':
+ d.pushOpenStack(ch)
+ return d.consumeToken(MessageOpen, 1, 0), nil
+ case '[':
+ d.pushOpenStack(ch)
+ return d.consumeToken(ListOpen, 1, 0), nil
+ default:
+ return d.parseScalar()
+ }
+
+ case Scalar:
+ openKind, closeCh := d.currentOpenKind()
+ switch openKind {
+ case bof:
+ // Top level message.
+ // Next token can be EOF, comma, semicolon or Name.
+ if isEOF {
+ return d.consumeToken(EOF, 0, 0), nil
}
- elems = append(elems, v)
- if !p.tryConsumeChar(',') {
+ switch d.in[0] {
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ case ';':
+ return d.consumeToken(semicolon, 1, 0), nil
+ default:
+ return d.parseFieldName()
+ }
+
+ case MessageOpen:
+ // Next token can be MessageClose, comma, semicolon or Name.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case closeCh:
+ d.popOpenStack()
+ return d.consumeToken(MessageClose, 1, 0), nil
+ case otherCloseChar[closeCh]:
+ return Token{}, d.newSyntaxError(mismatchedFmt, ch)
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ case ';':
+ return d.consumeToken(semicolon, 1, 0), nil
+ default:
+ return d.parseFieldName()
+ }
+
+ case ListOpen:
+ // Next token can be ListClose or comma.
+ switch ch := d.in[0]; ch {
+ case ']':
+ d.popOpenStack()
+ return d.consumeToken(ListClose, 1, 0), nil
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ default:
+ return Token{}, d.newSyntaxError(unexpectedFmt, ch)
+ }
+ }
+
+ case MessageOpen:
+ // Next token can be MessageClose or Name.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ _, closeCh := d.currentOpenKind()
+ switch ch := d.in[0]; ch {
+ case closeCh:
+ d.popOpenStack()
+ return d.consumeToken(MessageClose, 1, 0), nil
+ case otherCloseChar[closeCh]:
+ return Token{}, d.newSyntaxError(mismatchedFmt, ch)
+ default:
+ return d.parseFieldName()
+ }
+
+ case MessageClose:
+ openKind, closeCh := d.currentOpenKind()
+ switch openKind {
+ case bof:
+ // Top level message.
+ // Next token can be EOF, comma, semicolon or Name.
+ if isEOF {
+ return d.consumeToken(EOF, 0, 0), nil
+ }
+ switch ch := d.in[0]; ch {
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ case ';':
+ return d.consumeToken(semicolon, 1, 0), nil
+ default:
+ return d.parseFieldName()
+ }
+
+ case MessageOpen:
+ // Next token can be MessageClose, comma, semicolon or Name.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case closeCh:
+ d.popOpenStack()
+ return d.consumeToken(MessageClose, 1, 0), nil
+ case otherCloseChar[closeCh]:
+ return Token{}, d.newSyntaxError(mismatchedFmt, ch)
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ case ';':
+ return d.consumeToken(semicolon, 1, 0), nil
+ default:
+ return d.parseFieldName()
+ }
+
+ case ListOpen:
+ // Next token can be ListClose or comma
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case closeCh:
+ d.popOpenStack()
+ return d.consumeToken(ListClose, 1, 0), nil
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ default:
+ return Token{}, d.newSyntaxError(unexpectedFmt, ch)
+ }
+ }
+
+ case ListOpen:
+ // Next token can be ListClose, MessageStart or Scalar.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case ']':
+ d.popOpenStack()
+ return d.consumeToken(ListClose, 1, 0), nil
+ case '{', '<':
+ d.pushOpenStack(ch)
+ return d.consumeToken(MessageOpen, 1, 0), nil
+ default:
+ return d.parseScalar()
+ }
+
+ case ListClose:
+ openKind, closeCh := d.currentOpenKind()
+ switch openKind {
+ case bof:
+ // Top level message.
+ // Next token can be EOF, comma, semicolon or Name.
+ if isEOF {
+ return d.consumeToken(EOF, 0, 0), nil
+ }
+ switch ch := d.in[0]; ch {
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ case ';':
+ return d.consumeToken(semicolon, 1, 0), nil
+ default:
+ return d.parseFieldName()
+ }
+
+ case MessageOpen:
+ // Next token can be MessageClose, comma, semicolon or Name.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case closeCh:
+ d.popOpenStack()
+ return d.consumeToken(MessageClose, 1, 0), nil
+ case otherCloseChar[closeCh]:
+ return Token{}, d.newSyntaxError(mismatchedFmt, ch)
+ case ',':
+ return d.consumeToken(comma, 1, 0), nil
+ case ';':
+ return d.consumeToken(semicolon, 1, 0), nil
+ default:
+ return d.parseFieldName()
+ }
+
+ default:
+ // It is not possible to have this case. Let it panic below.
+ }
+
+ case comma, semicolon:
+ openKind, closeCh := d.currentOpenKind()
+ switch openKind {
+ case bof:
+ // Top level message. Next token can be EOF or Name.
+ if isEOF {
+ return d.consumeToken(EOF, 0, 0), nil
+ }
+ return d.parseFieldName()
+
+ case MessageOpen:
+ // Next token can be MessageClose or Name.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+ switch ch := d.in[0]; ch {
+ case closeCh:
+ d.popOpenStack()
+ return d.consumeToken(MessageClose, 1, 0), nil
+ case otherCloseChar[closeCh]:
+ return Token{}, d.newSyntaxError(mismatchedFmt, ch)
+ default:
+ return d.parseFieldName()
+ }
+
+ case ListOpen:
+ if lastKind == semicolon {
+ // It is not be possible to have this case as logic here
+ // should not have produced a semicolon Token when inside a
+ // list. Let it panic below.
break
}
- }
- }
- if err := p.consumeChar(']', "at end of list"); err != nil {
- return Value{}, err
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(elems, b[:len(b):len(b)]), nil
-}
-
-func (p *decoder) unmarshalMessage(checkDelims bool) (Value, error) {
- b := p.in
- var items [][2]Value
- delims := [2]byte{'{', '}'}
- if len(p.in) > 0 && p.in[0] == '<' {
- delims = [2]byte{'<', '>'}
- }
- if checkDelims {
- if err := p.consumeChar(delims[0], "at start of message"); err != nil {
- return Value{}, err
- }
- }
- for len(p.in) > 0 {
- if p.in[0] == '}' || p.in[0] == '>' {
- break
- }
- k, err := p.unmarshalKey()
- if err != nil {
- return Value{}, err
- }
- if !p.tryConsumeChar(':') && len(p.in) > 0 && p.in[0] != '{' && p.in[0] != '<' {
- return Value{}, newSyntaxError("expected ':' after message key")
- }
- v, err := p.unmarshalValue()
- if err != nil {
- return Value{}, err
- }
- if p.tryConsumeChar(';') || p.tryConsumeChar(',') {
- // always optional
- }
- items = append(items, [2]Value{k, v})
- }
- if checkDelims {
- if err := p.consumeChar(delims[1], "at end of message"); err != nil {
- return Value{}, err
- }
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(items, b[:len(b):len(b)]), nil
-}
-
-// unmarshalKey parses the key, which may be a Name, String, or Uint.
-func (p *decoder) unmarshalKey() (v Value, err error) {
- if p.tryConsumeChar('[') {
- if len(p.in) == 0 {
- return Value{}, io.ErrUnexpectedEOF
- }
- if p.in[0] == '\'' || p.in[0] == '"' {
- // Historically, Go's parser allowed a string for the Any type URL.
- // This is specific to Go and contrary to the C++ implementation,
- // which does not support strings for the Any type URL.
- v, err = p.unmarshalString()
- if err != nil {
- return Value{}, err
+ // Next token can be MessageOpen or Scalar.
+ if isEOF {
+ return Token{}, io.ErrUnexpectedEOF
}
- } else {
- v, err = p.unmarshalURL()
- if err != nil {
- return Value{}, err
+ switch ch := d.in[0]; ch {
+ case '{', '<':
+ d.pushOpenStack(ch)
+ return d.consumeToken(MessageOpen, 1, 0), nil
+ default:
+ return d.parseScalar()
}
}
- if err := p.consumeChar(']', "at end of extension name"); err != nil {
- return Value{}, err
- }
- return v, nil
}
- v, err = p.unmarshalName()
- if err == nil {
- return v, nil
- }
- v, err = p.unmarshalNumberKey()
- if err == nil {
- return v, nil
- }
- return Value{}, err
+
+ line, column := d.Position(len(d.orig) - len(d.in))
+ panic(fmt.Sprintf("Decoder.parseNext: bug at handling line %d:%d with lastKind=%v", line, column, lastKind))
}
-// unmarshalURL parses an Any type URL string. The C++ parser does not handle
-// many legal URL strings. This implementation is more liberal and allows for
-// the pattern ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`).
-func (p *decoder) unmarshalURL() (Value, error) {
- s := p.in
- var size int
- for len(s) > 0 && (s[0] == '-' || s[0] == '_' ||
- ('0' <= s[0] && s[0] <= '9') ||
- ('a' <= s[0] && s[0] <= 'z') ||
- ('A' <= s[0] && s[0] <= 'Z')) {
- s = s[1:]
- size++
- if len(s) > 0 && (s[0] == '/' || s[0] == '.') {
- s = s[1:]
- size++
- }
- }
-
- // Last character cannot be '.' or '/'.
- // Next byte should either be a delimiter or it is at the end.
- if size == 0 || p.in[size-1] == '.' || p.in[size-1] == '/' ||
- (len(s) > 0 && !isDelim(s[0])) {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
- v := rawValueOf(string(p.in[:size]), p.in[:size:size])
- p.consume(size)
- return v, nil
+var otherCloseChar = map[byte]byte{
+ '}': '>',
+ '>': '}',
}
-// unmarshalNumberKey parses field number as key. Field numbers are non-negative
-// integers.
-func (p *decoder) unmarshalNumberKey() (Value, error) {
- num, ok := parseNumber(p.in)
- if !ok || num.neg || num.typ == numFloat {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
+// currentOpenKind indicates whether current position is inside a message, list
+// or top-level message by returning MessageOpen, ListOpen or bof respectively.
+// If the returned kind is either a MessageOpen or ListOpen, it also returns the
+// corresponding closing character.
+func (d *Decoder) currentOpenKind() (Kind, byte) {
+ if len(d.openStack) == 0 {
+ return bof, 0
}
- v, err := strconv.ParseUint(string(num.value), 0, 64)
- if err != nil {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
- p.consume(num.size)
- return rawValueOf(v, num.value), nil
-}
-
-func (p *decoder) unmarshalValue() (Value, error) {
- if len(p.in) == 0 {
- return Value{}, io.ErrUnexpectedEOF
- }
- switch p.in[0] {
- case '"', '\'':
- return p.unmarshalStrings()
+ openCh := d.openStack[len(d.openStack)-1]
+ switch openCh {
+ case '{':
+ return MessageOpen, '}'
+ case '<':
+ return MessageOpen, '>'
case '[':
- return p.unmarshalList()
- case '{', '<':
- return p.unmarshalMessage(true)
- default:
- n, ok := consumeName(p.in)
- if ok && literals[string(p.in[:n])] == nil {
- v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
- p.consume(n)
- return v, nil
+ return ListOpen, ']'
+ }
+ panic(fmt.Sprintf("Decoder: openStack contains invalid byte %s", string(openCh)))
+}
+
+func (d *Decoder) pushOpenStack(ch byte) {
+ d.openStack = append(d.openStack, ch)
+}
+
+func (d *Decoder) popOpenStack() {
+ d.openStack = d.openStack[:len(d.openStack)-1]
+}
+
+// parseFieldName parses field name and separator.
+func (d *Decoder) parseFieldName() (tok Token, err error) {
+ defer func() {
+ if err == nil && d.tryConsumeChar(':') {
+ tok.attrs |= hasSeparator
}
- return p.unmarshalNumber()
+ }()
+
+ // Extension or Any type URL.
+ if d.in[0] == '[' {
+ return d.parseTypeName()
+ }
+
+ // Identifier.
+ if size := parseIdent(d.in, false); size > 0 {
+ return d.consumeToken(Name, size, uint8(IdentName)), nil
+ }
+
+ // Field number. Identify if input is a valid number that is not negative
+ // and is decimal integer within 32-bit range.
+ if num := parseNumber(d.in); num.size > 0 {
+ if !num.neg && num.kind == numDec {
+ if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil {
+ return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil
+ }
+ }
+ return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size])
+ }
+
+ return Token{}, d.newSyntaxError("invalid field name: %s", errRegexp.Find(d.in))
+}
+
+// parseTypeName parses Any type URL or extension field name. The name is
+// enclosed in [ and ] characters. The C++ parser does not handle many legal URL
+// strings. This implementation is more liberal and allows for the pattern
+// ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`). Whitespaces and comments are allowed
+// in between [ ], '.', '/' and the sub names.
+func (d *Decoder) parseTypeName() (Token, error) {
+ startPos := len(d.orig) - len(d.in)
+ // Use alias s to advance first in order to use d.in for error handling.
+ // Caller already checks for [ as first character.
+ s := consume(d.in[1:], 0)
+ if len(s) == 0 {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+
+ var name []byte
+ for len(s) > 0 && isTypeNameChar(s[0]) {
+ name = append(name, s[0])
+ s = s[1:]
+ }
+ s = consume(s, 0)
+
+ var closed bool
+ for len(s) > 0 && !closed {
+ switch {
+ case s[0] == ']':
+ s = s[1:]
+ closed = true
+
+ case s[0] == '/', s[0] == '.':
+ if len(name) > 0 && (name[len(name)-1] == '/' || name[len(name)-1] == '.') {
+ return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
+ d.in[startPos:len(d.orig)-len(s)+1])
+ }
+ name = append(name, s[0])
+ s = s[1:]
+ s = consume(s, 0)
+ for len(s) > 0 && isTypeNameChar(s[0]) {
+ name = append(name, s[0])
+ s = s[1:]
+ }
+ s = consume(s, 0)
+
+ default:
+ return Token{}, d.newSyntaxError(
+ "invalid type URL/extension field name: %s", d.in[startPos:len(d.orig)-len(s)+1])
+ }
+ }
+
+ if !closed {
+ return Token{}, io.ErrUnexpectedEOF
+ }
+
+ // First character cannot be '.'. Last character cannot be '.' or '/'.
+ size := len(name)
+ if size == 0 || name[0] == '.' || name[size-1] == '.' || name[size-1] == '/' {
+ return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
+ d.in[startPos:len(d.orig)-len(s)])
+ }
+
+ d.in = s
+ endPos := len(d.orig) - len(d.in)
+ d.consume(0)
+
+ return Token{
+ kind: Name,
+ attrs: uint8(TypeName),
+ pos: startPos,
+ raw: d.orig[startPos:endPos],
+ str: string(name),
+ }, nil
+}
+
+func isTypeNameChar(b byte) bool {
+ return (b == '-' || b == '_' ||
+ ('0' <= b && b <= '9') ||
+ ('a' <= b && b <= 'z') ||
+ ('A' <= b && b <= 'Z'))
+}
+
+func isWhiteSpace(b byte) bool {
+ switch b {
+ case ' ', '\n', '\r', '\t':
+ return true
+ default:
+ return false
}
}
-// unmarshalName unmarshals an unquoted proto identifier.
-// Regular expression that matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
-//
-// E.g., `field_name` => ValueOf(protoreflect.Name("field_name"))
-func (p *decoder) unmarshalName() (Value, error) {
- n, ok := consumeName(p.in)
- if !ok {
- return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in))
- }
-
- v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n])
- p.consume(n)
- return v, nil
-}
-
-func consumeName(input []byte) (int, bool) {
- var n int
+// parseIdent parses an unquoted proto identifier and returns size.
+// If allowNeg is true, it allows '-' to be the first character in the
+// identifier. This is used when parsing literal values like -infinity, etc.
+// Regular expression matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
+func parseIdent(input []byte, allowNeg bool) int {
+ var size int
s := input
if len(s) == 0 {
- return 0, false
+ return 0
+ }
+
+ if allowNeg && s[0] == '-' {
+ s = s[1:]
+ size++
+ if len(s) == 0 {
+ return 0
+ }
}
switch {
@@ -245,9 +531,9 @@
'a' <= s[0] && s[0] <= 'z',
'A' <= s[0] && s[0] <= 'Z':
s = s[1:]
- n++
+ size++
default:
- return 0, false
+ return 0
}
for len(s) > 0 && (s[0] == '_' ||
@@ -255,51 +541,110 @@
'A' <= s[0] && s[0] <= 'Z' ||
'0' <= s[0] && s[0] <= '9') {
s = s[1:]
- n++
+ size++
}
if len(s) > 0 && !isDelim(s[0]) {
- return 0, false
+ return 0
}
- return n, true
+ return size
}
-func (p *decoder) consumeChar(c byte, msg string) error {
- if p.tryConsumeChar(c) {
- return nil
+// parseScalar parses for a string, literal or number value.
+func (d *Decoder) parseScalar() (Token, error) {
+ if d.in[0] == '"' || d.in[0] == '\'' {
+ return d.parseStringValue()
}
- if len(p.in) == 0 {
- return io.ErrUnexpectedEOF
+
+ if tok, ok := d.parseLiteralValue(); ok {
+ return tok, nil
}
- return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg)
+
+ if tok, ok := d.parseNumberValue(); ok {
+ return tok, nil
+ }
+
+ return Token{}, d.newSyntaxError("invalid scalar value: %s", errRegexp.Find(d.in))
}
-func (p *decoder) tryConsumeChar(c byte) bool {
- if len(p.in) > 0 && p.in[0] == c {
- p.consume(1)
+// parseLiteralValue parses a literal value. A literal value is used for
+// bools, special floats and enums. This function simply identifies that the
+// field value is a literal.
+func (d *Decoder) parseLiteralValue() (Token, bool) {
+ size := parseIdent(d.in, true)
+ if size == 0 {
+ return Token{}, false
+ }
+ return d.consumeToken(Scalar, size, literalValue), true
+}
+
+// consumeToken constructs a Token for given Kind from d.in and consumes given
+// size-length from it.
+func (d *Decoder) consumeToken(kind Kind, size int, attrs uint8) Token {
+ // Important to compute raw and pos before consuming.
+ tok := Token{
+ kind: kind,
+ attrs: attrs,
+ pos: len(d.orig) - len(d.in),
+ raw: d.in[:size],
+ }
+ d.consume(size)
+ return tok
+}
+
+// newSyntaxError returns a syntax error with line and column information for
+// current position.
+func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
+ e := errors.New(f, x...)
+ line, column := d.Position(len(d.orig) - len(d.in))
+ return errors.New("syntax error (line %d:%d): %v", line, column, e)
+}
+
+// Position returns line and column number of given index of the original input.
+// It will panic if index is out of range.
+func (d *Decoder) Position(idx int) (line int, column int) {
+ b := d.orig[:idx]
+ line = bytes.Count(b, []byte("\n")) + 1
+ if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
+ b = b[i+1:]
+ }
+ column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
+ return line, column
+}
+
+func (d *Decoder) tryConsumeChar(c byte) bool {
+ if len(d.in) > 0 && d.in[0] == c {
+ d.consume(1)
return true
}
return false
}
// consume consumes n bytes of input and any subsequent whitespace or comments.
-func (p *decoder) consume(n int) {
- p.in = p.in[n:]
- for len(p.in) > 0 {
- switch p.in[0] {
+func (d *Decoder) consume(n int) {
+ d.in = consume(d.in, n)
+ return
+}
+
+// consume consumes n bytes of input and any subsequent whitespace or comments.
+func consume(b []byte, n int) []byte {
+ b = b[n:]
+ for len(b) > 0 {
+ switch b[0] {
case ' ', '\n', '\r', '\t':
- p.in = p.in[1:]
+ b = b[1:]
case '#':
- if i := bytes.IndexByte(p.in, '\n'); i >= 0 {
- p.in = p.in[i+len("\n"):]
+ if i := bytes.IndexByte(b, '\n'); i >= 0 {
+ b = b[i+len("\n"):]
} else {
- p.in = nil
+ b = nil
}
default:
- return
+ return b
}
}
+ return b
}
// Any sequence that looks like a non-delimiter (for error reporting).
diff --git a/internal/encoding/text/decode_number.go b/internal/encoding/text/decode_number.go
new file mode 100644
index 0000000..f2d90b7
--- /dev/null
+++ b/internal/encoding/text/decode_number.go
@@ -0,0 +1,190 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+// parseNumberValue parses a number from the input and returns a Token object.
+func (d *Decoder) parseNumberValue() (Token, bool) {
+ in := d.in
+ num := parseNumber(in)
+ if num.size == 0 {
+ return Token{}, false
+ }
+ numAttrs := num.kind
+ if num.neg {
+ numAttrs |= isNegative
+ }
+ strSize := num.size
+ last := num.size - 1
+ if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') {
+ strSize = last
+ }
+ tok := Token{
+ kind: Scalar,
+ attrs: numberValue,
+ pos: len(d.orig) - len(d.in),
+ raw: d.in[:num.size],
+ str: string(d.in[:strSize]),
+ numAttrs: numAttrs,
+ }
+ d.consume(num.size)
+ return tok, true
+}
+
+const (
+ numDec uint8 = (1 << iota) / 2
+ numHex
+ numOct
+ numFloat
+)
+
+// number is the result of parsing out a valid number from parseNumber. It
+// contains data for doing float or integer conversion via the strconv package
+// in conjunction with the input bytes.
+type number struct {
+ kind uint8
+ neg bool
+ size int
+}
+
+// parseNumber constructs a number object from given input. It allows for the
+// following patterns:
+// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
+// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
+// It also returns the number of parsed bytes for the given number, 0 if it is
+// not a number.
+func parseNumber(input []byte) number {
+ kind := numDec
+ var size int
+ var neg bool
+
+ s := input
+ if len(s) == 0 {
+ return number{}
+ }
+
+ // Optional -
+ if s[0] == '-' {
+ neg = true
+ s = s[1:]
+ size++
+ if len(s) == 0 {
+ return number{}
+ }
+ }
+
+ // C++ allows for whitespace and comments in between the negative sign and
+ // the rest of the number. This logic currently does not but is consistent
+ // with v1.
+
+ switch {
+ case s[0] == '0':
+ if len(s) > 1 {
+ switch {
+ case s[1] == 'x' || s[1] == 'X':
+ // Parse as hex number.
+ kind = numHex
+ n := 2
+ s = s[2:]
+ for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
+ ('a' <= s[0] && s[0] <= 'f') ||
+ ('A' <= s[0] && s[0] <= 'F')) {
+ s = s[1:]
+ n++
+ }
+ if n == 2 {
+ return number{}
+ }
+ size += n
+
+ case '0' <= s[1] && s[1] <= '7':
+ // Parse as octal number.
+ kind = numOct
+ n := 2
+ s = s[2:]
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
+ s = s[1:]
+ n++
+ }
+ size += n
+ }
+
+ if kind&(numHex|numOct) > 0 {
+ if len(s) > 0 && !isDelim(s[0]) {
+ return number{}
+ }
+ return number{kind: kind, neg: neg, size: size}
+ }
+ }
+ s = s[1:]
+ size++
+
+ case '1' <= s[0] && s[0] <= '9':
+ n := 1
+ s = s[1:]
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+ s = s[1:]
+ n++
+ }
+ size += n
+
+ case s[0] == '.':
+ // Set kind to numFloat to signify the intent to parse as float. And
+ // that it needs to have other digits after '.'.
+ kind = numFloat
+
+ default:
+ return number{}
+ }
+
+ // . followed by 0 or more digits.
+ if len(s) > 0 && s[0] == '.' {
+ n := 1
+ s = s[1:]
+ // If decimal point was before any digits, it should be followed by
+ // other digits.
+ if len(s) == 0 && kind == numFloat {
+ return number{}
+ }
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+ s = s[1:]
+ n++
+ }
+ size += n
+ kind = numFloat
+ }
+
+ // e or E followed by an optional - or + and 1 or more digits.
+ if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
+ kind = numFloat
+ s = s[1:]
+ n := 1
+ if s[0] == '+' || s[0] == '-' {
+ s = s[1:]
+ n++
+ if len(s) == 0 {
+ return number{}
+ }
+ }
+ for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+ s = s[1:]
+ n++
+ }
+ size += n
+ }
+
+ // Optional suffix f or F for floats.
+ if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
+ kind = numFloat
+ s = s[1:]
+ size++
+ }
+
+ // Check that next byte is a delimiter or it is at the end.
+ if len(s) > 0 && !isDelim(s[0]) {
+ return number{}
+ }
+
+ return number{kind: kind, neg: neg, size: size}
+}
diff --git a/internal/encoding/text/decode_string.go b/internal/encoding/text/decode_string.go
new file mode 100644
index 0000000..020c8c7
--- /dev/null
+++ b/internal/encoding/text/decode_string.go
@@ -0,0 +1,162 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text
+
+import (
+ "bytes"
+ "io"
+ "strconv"
+ "strings"
+ "unicode"
+ "unicode/utf16"
+ "unicode/utf8"
+
+ "google.golang.org/protobuf/internal/strs"
+)
+
+// parseStringValue parses string field token.
+// This differs from parseString since the text format allows
+// multiple back-to-back string literals where they are semantically treated
+// as a single large string with all values concatenated.
+//
+// E.g., `"foo" "bar" "baz"` => "foobarbaz"
+func (d *Decoder) parseStringValue() (Token, error) {
+ // Note that the ending quote is sufficient to unambiguously mark the end
+ // of a string. Thus, the text grammar does not require intervening
+ // whitespace or control characters in-between strings.
+ // Thus, the following is valid:
+ // `"foo"'bar'"baz"` => "foobarbaz"
+ in0 := d.in
+ var ss []string
+ for len(d.in) > 0 && (d.in[0] == '"' || d.in[0] == '\'') {
+ s, err := d.parseString()
+ if err != nil {
+ return Token{}, err
+ }
+ ss = append(ss, s)
+ }
+ // d.in already points to the end of the value at this point.
+ return Token{
+ kind: Scalar,
+ attrs: stringValue,
+ pos: len(d.orig) - len(in0),
+ raw: in0[:len(in0)-len(d.in)],
+ str: strings.Join(ss, ""),
+ }, nil
+}
+
+// parseString parses a string value enclosed in " or '.
+func (d *Decoder) parseString() (string, error) {
+ in := d.in
+ if len(in) == 0 {
+ return "", io.ErrUnexpectedEOF
+ }
+ quote := in[0]
+ in = in[1:]
+ i := indexNeedEscapeInBytes(in)
+ in, out := in[i:], in[:i:i] // set cap to prevent mutations
+ for len(in) > 0 {
+ switch r, n := utf8.DecodeRune(in); {
+ case r == utf8.RuneError && n == 1:
+ return "", d.newSyntaxError("invalid UTF-8 detected")
+ case r == 0 || r == '\n':
+ return "", d.newSyntaxError("invalid character %q in string", r)
+ case r == rune(quote):
+ in = in[1:]
+ d.consume(len(d.in) - len(in))
+ return string(out), nil
+ case r == '\\':
+ if len(in) < 2 {
+ return "", io.ErrUnexpectedEOF
+ }
+ switch r := in[1]; r {
+ case '"', '\'', '\\', '?':
+ in, out = in[2:], append(out, r)
+ case 'a':
+ in, out = in[2:], append(out, '\a')
+ case 'b':
+ in, out = in[2:], append(out, '\b')
+ case 'n':
+ in, out = in[2:], append(out, '\n')
+ case 'r':
+ in, out = in[2:], append(out, '\r')
+ case 't':
+ in, out = in[2:], append(out, '\t')
+ case 'v':
+ in, out = in[2:], append(out, '\v')
+ case 'f':
+ in, out = in[2:], append(out, '\f')
+ case '0', '1', '2', '3', '4', '5', '6', '7':
+ // One, two, or three octal characters.
+ n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567"))
+ if n > 3 {
+ n = 3
+ }
+ v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8)
+ if err != nil {
+ return "", d.newSyntaxError("invalid octal escape code %q in string", in[:1+n])
+ }
+ in, out = in[1+n:], append(out, byte(v))
+ case 'x':
+ // One or two hexadecimal characters.
+ n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF"))
+ if n > 2 {
+ n = 2
+ }
+ v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8)
+ if err != nil {
+ return "", d.newSyntaxError("invalid hex escape code %q in string", in[:2+n])
+ }
+ in, out = in[2+n:], append(out, byte(v))
+ case 'u', 'U':
+ // Four or eight hexadecimal characters
+ n := 6
+ if r == 'U' {
+ n = 10
+ }
+ if len(in) < n {
+ return "", io.ErrUnexpectedEOF
+ }
+ v, err := strconv.ParseUint(string(in[2:n]), 16, 32)
+ if utf8.MaxRune < v || err != nil {
+ return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:n])
+ }
+ in = in[n:]
+
+ r := rune(v)
+ if utf16.IsSurrogate(r) {
+ if len(in) < 6 {
+ return "", io.ErrUnexpectedEOF
+ }
+ v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
+ r = utf16.DecodeRune(r, rune(v))
+ if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
+ return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:6])
+ }
+ in = in[6:]
+ }
+ out = append(out, string(r)...)
+ default:
+ return "", d.newSyntaxError("invalid escape code %q in string", in[:2])
+ }
+ default:
+ i := indexNeedEscapeInBytes(in[n:])
+ in, out = in[n+i:], append(out, in[:n+i]...)
+ }
+ }
+ return "", io.ErrUnexpectedEOF
+}
+
+// indexNeedEscapeInString returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
+
+// UnmarshalString returns an unescaped string given a textproto string value.
+// String value needs to contain single or double quotes. This is only used by
+// internal/encoding/defval package for unmarshaling bytes.
+func UnmarshalString(s string) (string, error) {
+ d := NewDecoder([]byte(s))
+ return d.parseString()
+}
diff --git a/internal/encoding/text/decode_test.go b/internal/encoding/text/decode_test.go
new file mode 100644
index 0000000..41c5e6d
--- /dev/null
+++ b/internal/encoding/text/decode_test.go
@@ -0,0 +1,1883 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text_test
+
+import (
+ "fmt"
+ "io"
+ "math"
+ "strings"
+ "testing"
+ "unicode/utf8"
+
+ "github.com/google/go-cmp/cmp"
+ "google.golang.org/protobuf/internal/encoding/text"
+ "google.golang.org/protobuf/internal/flags"
+)
+
+var eofErr = io.ErrUnexpectedEOF.Error()
+
+type R struct {
+ // K is expected Kind of the returned Token object from calling Decoder.Read.
+ K text.Kind
+ // E is expected error substring from calling Decoder.Read if set.
+ E string
+ // T contains NT (if K is Name) or ST (if K is Scalar) or nil (others)
+ T interface{}
+ // P is expected Token.Pos if set > 0.
+ P int
+ // RS is expected result from Token.RawString() if not empty.
+ RS string
+}
+
+// NT contains data for checking against a name token.
+type NT struct {
+ K text.NameKind
+ // Sep is true if name token should have separator character, else false.
+ Sep bool
+ // If K is IdentName or TypeName, invoke corresponding getter and compare against this field.
+ S string
+ // If K is FieldNumber, invoke getter and compare against this field.
+ N int32
+}
+
+// ST contains data for checking against a scalar token.
+type ST struct {
+ // checker that is expected to return OK.
+ ok checker
+ // checker that is expected to return not OK.
+ nok checker
+}
+
+// checker provides API for the token wrapper API call types Str, Enum, Bool,
+// Uint64, Uint32, Int64, Int32, Float64, Float32.
+type checker interface {
+ // checkOk checks and expects for token API call to return ok and compare
+ // against implementation-stored value. Returns empty string if success,
+ // else returns error message describing the error.
+ checkOk(text.Token) string
+ // checkNok checks and expects for token API call to return not ok. Returns
+ // empty string if success, else returns error message describing the error.
+ checkNok(text.Token) string
+}
+
+type Str struct {
+ val string
+}
+
+func (s Str) checkOk(tok text.Token) string {
+ got, ok := tok.String()
+ if !ok {
+ return fmt.Sprintf("Token.String() returned not OK for token: %v", tok.RawString())
+ }
+ if got != s.val {
+ return fmt.Sprintf("Token.String() got %q want %q for token: %v", got, s.val, tok.RawString())
+ }
+ return ""
+}
+
+func (s Str) checkNok(tok text.Token) string {
+ if _, ok := tok.String(); ok {
+ return fmt.Sprintf("Token.String() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Enum struct {
+ val string
+}
+
+func (e Enum) checkOk(tok text.Token) string {
+ got, ok := tok.Enum()
+ if !ok {
+ return fmt.Sprintf("Token.Enum() returned not OK for token: %v", tok.RawString())
+ }
+ if got != e.val {
+ return fmt.Sprintf("Token.Enum() got %q want %q for token: %v", got, e.val, tok.RawString())
+ }
+ return ""
+}
+
+func (e Enum) checkNok(tok text.Token) string {
+ if _, ok := tok.Enum(); ok {
+ return fmt.Sprintf("Token.Enum() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Bool struct {
+ val bool
+}
+
+func (b Bool) checkOk(tok text.Token) string {
+ got, ok := tok.Bool()
+ if !ok {
+ return fmt.Sprintf("Token.Bool() returned not OK for token: %v", tok.RawString())
+ }
+ if got != b.val {
+ return fmt.Sprintf("Token.Bool() got %v want %v for token: %v", got, b.val, tok.RawString())
+ }
+ return ""
+}
+
+func (b Bool) checkNok(tok text.Token) string {
+ if _, ok := tok.Bool(); ok {
+ return fmt.Sprintf("Token.Bool() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Uint64 struct {
+ val uint64
+}
+
+func (n Uint64) checkOk(tok text.Token) string {
+ got, ok := tok.Uint64()
+ if !ok {
+ return fmt.Sprintf("Token.Uint64() returned not OK for token: %v", tok.RawString())
+ }
+ if got != n.val {
+ return fmt.Sprintf("Token.Uint64() got %v want %v for token: %v", got, n.val, tok.RawString())
+ }
+ return ""
+}
+
+func (n Uint64) checkNok(tok text.Token) string {
+ if _, ok := tok.Uint64(); ok {
+ return fmt.Sprintf("Token.Uint64() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Uint32 struct {
+ val uint32
+}
+
+func (n Uint32) checkOk(tok text.Token) string {
+ got, ok := tok.Uint32()
+ if !ok {
+ return fmt.Sprintf("Token.Uint32() returned not OK for token: %v", tok.RawString())
+ }
+ if got != n.val {
+ return fmt.Sprintf("Token.Uint32() got %v want %v for token: %v", got, n.val, tok.RawString())
+ }
+ return ""
+}
+
+func (n Uint32) checkNok(tok text.Token) string {
+ if _, ok := tok.Uint32(); ok {
+ return fmt.Sprintf("Token.Uint32() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Int64 struct {
+ val int64
+}
+
+func (n Int64) checkOk(tok text.Token) string {
+ got, ok := tok.Int64()
+ if !ok {
+ return fmt.Sprintf("Token.Int64() returned not OK for token: %v", tok.RawString())
+ }
+ if got != n.val {
+ return fmt.Sprintf("Token.Int64() got %v want %v for token: %v", got, n.val, tok.RawString())
+ }
+ return ""
+}
+
+func (n Int64) checkNok(tok text.Token) string {
+ if _, ok := tok.Int64(); ok {
+ return fmt.Sprintf("Token.Int64() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Int32 struct {
+ val int32
+}
+
+func (n Int32) checkOk(tok text.Token) string {
+ got, ok := tok.Int32()
+ if !ok {
+ return fmt.Sprintf("Token.Int32() returned not OK for token: %v", tok.RawString())
+ }
+ if got != n.val {
+ return fmt.Sprintf("Token.Int32() got %v want %v for token: %v", got, n.val, tok.RawString())
+ }
+ return ""
+}
+
+func (n Int32) checkNok(tok text.Token) string {
+ if _, ok := tok.Int32(); ok {
+ return fmt.Sprintf("Token.Int32() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Float64 struct {
+ val float64
+}
+
+func (n Float64) checkOk(tok text.Token) string {
+ got, ok := tok.Float64()
+ if !ok {
+ return fmt.Sprintf("Token.Float64() returned not OK for token: %v", tok.RawString())
+ }
+ if math.IsNaN(got) && math.IsNaN(n.val) {
+ return ""
+ }
+ if got != n.val {
+ return fmt.Sprintf("Token.Float64() got %v want %v for token: %v", got, n.val, tok.RawString())
+ }
+ return ""
+}
+
+func (n Float64) checkNok(tok text.Token) string {
+ if _, ok := tok.Float64(); ok {
+ return fmt.Sprintf("Token.Float64() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+type Float32 struct {
+ val float32
+}
+
+func (n Float32) checkOk(tok text.Token) string {
+ got, ok := tok.Float32()
+ if !ok {
+ return fmt.Sprintf("Token.Float32() returned not OK for token: %v", tok.RawString())
+ }
+ if math.IsNaN(float64(got)) && math.IsNaN(float64(n.val)) {
+ return ""
+ }
+ if got != n.val {
+ return fmt.Sprintf("Token.Float32() got %v want %v for token: %v", got, n.val, tok.RawString())
+ }
+ return ""
+}
+
+func (n Float32) checkNok(tok text.Token) string {
+ if _, ok := tok.Float32(); ok {
+ return fmt.Sprintf("Token.Float32() returned OK for token: %v", tok.RawString())
+ }
+ return ""
+}
+
+func TestDecoder(t *testing.T) {
+ const space = " \n\r\t"
+ tests := []struct {
+ in string
+ // want is a list of expected Tokens returned from calling Decoder.Read.
+ // An item makes the test code invoke Decoder.Read and compare against
+ // R.K and R.E. If R.K is Name, it compares
+ want []R
+ }{
+ {
+ in: "",
+ want: []R{{K: text.EOF}},
+ },
+ {
+ in: "# comment",
+ want: []R{{K: text.EOF}},
+ },
+ {
+ in: space + "# comment" + space,
+ want: []R{{K: text.EOF}},
+ },
+ {
+ in: space,
+ want: []R{{K: text.EOF, P: len(space)}},
+ },
+ {
+ // Calling Read after EOF will keep returning EOF for
+ // succeeding Read calls.
+ in: space,
+ want: []R{
+ {K: text.EOF},
+ {K: text.EOF},
+ {K: text.EOF},
+ },
+ },
+ {
+ // NUL is an invalid whitespace since C++ uses C-strings.
+ in: "\x00",
+ want: []R{{E: "invalid field name: \x00"}},
+ },
+
+ // Field names.
+ {
+ in: "name",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, S: "name"}, RS: "name"},
+ {E: eofErr},
+ },
+ },
+ {
+ in: space + "name:" + space,
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "name"}},
+ {E: eofErr},
+ },
+ },
+ {
+ in: space + "name" + space + ":" + space,
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "name"}},
+ {E: eofErr},
+ },
+ },
+ {
+ in: "name # comment",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, S: "name"}},
+ {E: eofErr},
+ },
+ },
+ {
+ // Comments only extend until the newline.
+ in: "# comment \nname",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, S: "name"}, P: 11},
+ },
+ },
+ {
+ in: "name # comment \n:",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "name"}},
+ },
+ },
+ {
+ in: "name123",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, S: "name123"}},
+ },
+ },
+ {
+ in: "name_123",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, S: "name_123"}},
+ },
+ },
+ {
+ in: "_123",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, S: "_123"}},
+ },
+ },
+ {
+ in: ":",
+ want: []R{{E: "syntax error (line 1:1): invalid field name: :"}},
+ },
+ {
+ in: "\n\n\n {",
+ want: []R{{E: "syntax error (line 4:2): invalid field name: {"}},
+ },
+ {
+ in: "123name",
+ want: []R{{E: "invalid field name: 123name"}},
+ },
+ {
+ in: "[type]",
+ want: []R{
+ {K: text.Name, T: NT{K: text.TypeName, S: "type"}, RS: "[type]"},
+ },
+ },
+ {
+ // V1 allows this syntax. C++ does not, however, C++ also fails if
+ // field is Any and does not contain '/'.
+ in: "[/type]",
+ want: []R{
+ {K: text.Name, T: NT{K: text.TypeName, S: "/type"}},
+ },
+ },
+ {
+ in: "[.type]",
+ want: []R{{E: "invalid type URL/extension field name: [.type]"}},
+ },
+ {
+ in: "[pkg.Foo.extension_field]",
+ want: []R{
+ {K: text.Name, T: NT{K: text.TypeName, S: "pkg.Foo.extension_field"}},
+ },
+ },
+ {
+ in: "[domain.com/type]",
+ want: []R{
+ {K: text.Name, T: NT{K: text.TypeName, S: "domain.com/type"}},
+ },
+ },
+ {
+ in: "[domain.com/pkg.type]",
+ want: []R{
+ {K: text.Name, T: NT{K: text.TypeName, S: "domain.com/pkg.type"}},
+ },
+ },
+ {
+ in: "[sub.domain.com\x2fpath\x2fto\x2fproto.package.name]",
+ want: []R{
+ {
+ K: text.Name,
+ T: NT{
+ K: text.TypeName,
+ S: "sub.domain.com/path/to/proto.package.name",
+ },
+ RS: "[sub.domain.com\x2fpath\x2fto\x2fproto.package.name]",
+ },
+ },
+ },
+ {
+ // V2 no longer allows a quoted string for the Any type URL.
+ in: `["domain.com/pkg.type"]`,
+ want: []R{{E: `invalid type URL/extension field name: ["`}},
+ },
+ {
+ // V2 no longer allows a quoted string for the Any type URL.
+ in: `['domain.com/pkg.type']`,
+ want: []R{{E: `invalid type URL/extension field name: ['`}},
+ },
+ {
+ in: "[pkg.Foo.extension_field:",
+ want: []R{{E: "invalid type URL/extension field name: [pkg.Foo.extension_field:"}},
+ },
+ {
+ // V2 no longer allows whitespace within identifier "word".
+ in: "[proto.packa ge.field]",
+ want: []R{{E: "invalid type URL/extension field name: [proto.packa g"}},
+ },
+ {
+ // V2 no longer allows comments within identifier "word".
+ in: "[proto.packa # comment\n ge.field]",
+ want: []R{{E: "invalid type URL/extension field name: [proto.packa # comment\n g"}},
+ },
+ {
+ in: "[proto.package.]",
+ want: []R{{E: "invalid type URL/extension field name: [proto.package."}},
+ },
+ {
+ in: "[proto.package/]",
+ want: []R{{E: "invalid type URL/extension field name: [proto.package/"}},
+ },
+ {
+ in: "[proto.package",
+ want: []R{{E: eofErr}},
+ },
+ {
+ in: "[" + space + "type" + space + "]" + space + ":",
+ want: []R{
+ {
+ K: text.Name,
+ T: NT{
+ K: text.TypeName,
+ Sep: true,
+ S: "type",
+ },
+ RS: "[" + space + "type" + space + "]",
+ },
+ },
+ },
+ {
+ // Whitespaces/comments are only allowed betweeb
+ in: "[" + space + "domain" + space + "." + space + "com # comment\n" +
+ "/" + "pkg" + space + "." + space + "type" + space + "]",
+ want: []R{
+ {K: text.Name, T: NT{K: text.TypeName, S: "domain.com/pkg.type"}},
+ },
+ },
+ {
+ in: "42",
+ want: []R{
+ {K: text.Name, T: NT{K: text.FieldNumber, N: 42}},
+ },
+ },
+ {
+ in: "0x42:",
+ want: []R{{E: "invalid field number: 0x42"}},
+ },
+ {
+ in: "042:",
+ want: []R{{E: "invalid field number: 042"}},
+ },
+ {
+ in: "123.456:",
+ want: []R{{E: "invalid field number: 123.456"}},
+ },
+ {
+ in: "-123",
+ want: []R{{E: "invalid field number: -123"}},
+ },
+ {
+ // Field number > math.MaxInt32.
+ in: "2147483648:",
+ want: []R{{E: "invalid field number: 2147483648"}},
+ },
+
+ // String field value. More string parsing specific testing in
+ // TestUnmarshalString.
+ {
+ in: `name: "hello world"`,
+ want: []R{
+ {K: text.Name},
+ {
+ K: text.Scalar,
+ T: ST{ok: Str{"hello world"}, nok: Enum{}},
+ RS: `"hello world"`,
+ },
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name: 'hello'`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ },
+ },
+ {
+ in: `name: "hello'`,
+ want: []R{
+ {K: text.Name},
+ {E: eofErr},
+ },
+ },
+ {
+ in: `name: 'hello`,
+ want: []R{
+ {K: text.Name},
+ {E: eofErr},
+ },
+ },
+ {
+ // Field name without separator is ok. prototext package will need
+ // to determine that this is not valid for scalar values.
+ in: space + `name` + space + `"hello"` + space,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ },
+ },
+ {
+ in: `name'hello'`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ },
+ },
+ {
+ in: `name: ` + space + `"hello"` + space + `,`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name` + space + `:` + `"hello"` + space + `;` + space,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:"hello" , ,`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar},
+ {E: "(line 1:16): invalid field name: ,"},
+ },
+ },
+ {
+ in: `name:"hello" , ;`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar},
+ {E: "(line 1:16): invalid field name: ;"},
+ },
+ },
+ {
+ in: `name:"hello" name:'world'`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"world"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:"hello", name:"world"`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"world"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:"hello"; name:"world",`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"world"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `foo:"hello"bar:"world"`,
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "foo"}},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "bar"}},
+ {K: text.Scalar, T: ST{ok: Str{"world"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `foo:"hello"[bar]:"world"`,
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "foo"}},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.Name, T: NT{K: text.TypeName, Sep: true, S: "bar"}},
+ {K: text.Scalar, T: ST{ok: Str{"world"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:"foo"` + space + `"bar"` + space + `'qux'`,
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "name"}},
+ {K: text.Scalar, T: ST{ok: Str{"foobarqux"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:"foo"'bar'"qux"`,
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "name"}},
+ {K: text.Scalar, T: ST{ok: Str{"foobarqux"}}},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:"foo"` + space + `"bar" # comment` + "\n'qux' # comment",
+ want: []R{
+ {K: text.Name, T: NT{K: text.IdentName, Sep: true, S: "name"}},
+ {K: text.Scalar, T: ST{ok: Str{"foobarqux"}}},
+ {K: text.EOF},
+ },
+ },
+
+ // Lists.
+ {
+ in: `name: [`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {E: eofErr},
+ },
+ },
+ {
+ in: `name: []`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.ListClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name []`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.ListClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name: [,`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {E: `(line 1:8): invalid scalar value: ,`},
+ },
+ },
+ {
+ in: `name: [` + space + `"hello"` + space + `]` + space,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}, P: len(space) + 7},
+ {K: text.ListClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name: ["hello",]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {E: `invalid scalar value: ]`},
+ },
+ },
+ {
+ in: `name: ["foo"` + space + `'bar' "qux"]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"foobarqux"}}},
+ {K: text.ListClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name:` + space + `["foo",` + space + "'bar', # comment\n\n" + `"qux"]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"foo"}}},
+ {K: text.Scalar, T: ST{ok: Str{"bar"}}},
+ {K: text.Scalar, T: ST{ok: Str{"qux"}}},
+ {K: text.ListClose},
+ {K: text.EOF},
+ },
+ },
+
+ {
+ // List within list is not allowed.
+ in: `name: [[]]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {E: `syntax error (line 1:8): invalid scalar value: [`},
+ },
+ },
+ {
+ // List items need to be separated by ,.
+ in: `name: ["foo" true]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"foo"}}},
+ {E: `syntax error (line 1:14): unexpected character 't'`},
+ },
+ },
+ {
+ in: `name: ["foo"; "bar"]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"foo"}}},
+ {E: `syntax error (line 1:13): unexpected character ';'`},
+ },
+ },
+ {
+ in: `name: ["foo", true, ENUM, 1.0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"foo"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"true"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"ENUM"}}},
+ {K: text.Scalar, T: ST{ok: Float32{1.0}}},
+ {K: text.ListClose},
+ },
+ },
+
+ // Boolean literal values.
+ {
+ in: `name: True`,
+ want: []R{
+ {K: text.Name},
+ {
+ K: text.Scalar,
+ T: ST{ok: Bool{true}},
+ },
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name false`,
+ want: []R{
+ {K: text.Name},
+ {
+ K: text.Scalar,
+ T: ST{ok: Bool{false}},
+ },
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `name: [t, f, True, False, true, false, 1, 0, 0x01, 0x00, 01, 00]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // Looks like boolean but not.
+ in: `name: [tRUe, falSE, -1, -0, -0x01, -0x00, -01, -00, 0.0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.Scalar, T: ST{nok: Bool{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `foo: true[bar] false`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Bool{false}}},
+ },
+ },
+
+ // Enum field values.
+ {
+ in: space + `name: ENUM`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Enum{"ENUM"}}},
+ },
+ },
+ {
+ in: space + `name:[TRUE, FALSE, T, F, t, f]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Enum{"TRUE"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"FALSE"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"T"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"F"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"t"}}},
+ {K: text.Scalar, T: ST{ok: Enum{"f"}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `foo: Enum1[bar]:Enum2`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Enum{"Enum1"}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Enum{"Enum2"}}},
+ },
+ },
+ {
+ // Invalid enum values.
+ in: `name: [-inf, -foo, "string", 42, 1.0, 0x47]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Enum{}}},
+ {K: text.Scalar, T: ST{nok: Enum{}}},
+ {K: text.Scalar, T: ST{nok: Enum{}}},
+ {K: text.Scalar, T: ST{nok: Enum{}}},
+ {K: text.Scalar, T: ST{nok: Enum{}}},
+ {K: text.Scalar, T: ST{nok: Enum{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `name: true.`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: true.`},
+ },
+ },
+
+ // Numeric values.
+ {
+ in: `nums:42 nums:0x2A nums:052`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Uint64{42}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Uint64{42}}},
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Uint64{42}}},
+ },
+ },
+ {
+ in: `nums:[-42, -0x2a, -052]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums:[-42, -0x2a, -052]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Int64{-42}}},
+ {K: text.Scalar, T: ST{ok: Int64{-42}}},
+ {K: text.Scalar, T: ST{ok: Int64{-42}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0,0x0,00,-9876543210,9876543210,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Uint64{0}}},
+ {K: text.Scalar, T: ST{ok: Int64{0}}},
+ {K: text.Scalar, T: ST{ok: Uint64{0}}},
+ {K: text.Scalar, T: ST{ok: Int64{-9876543210}}},
+ {K: text.Scalar, T: ST{ok: Uint64{9876543210}}},
+ {K: text.Scalar, T: ST{ok: Uint64{0x0123456789abcdef}}},
+ {K: text.Scalar, T: ST{ok: Int64{-0x0123456789abcdef}}},
+ {K: text.Scalar, T: ST{ok: Uint64{01234567}}},
+ {K: text.Scalar, T: ST{ok: Int64{-01234567}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0,0x0,00,-876543210,876543210,0x01234,-0x01234,01234567,-01234567]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Uint32{0}}},
+ {K: text.Scalar, T: ST{ok: Int32{0}}},
+ {K: text.Scalar, T: ST{ok: Uint32{0}}},
+ {K: text.Scalar, T: ST{ok: Int32{-876543210}}},
+ {K: text.Scalar, T: ST{ok: Uint32{876543210}}},
+ {K: text.Scalar, T: ST{ok: Uint32{0x01234}}},
+ {K: text.Scalar, T: ST{ok: Int32{-0x01234}}},
+ {K: text.Scalar, T: ST{ok: Uint32{01234567}}},
+ {K: text.Scalar, T: ST{ok: Int32{-01234567}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [` +
+ fmt.Sprintf("%d", uint64(math.MaxUint64)) + `,` +
+ fmt.Sprintf("%d", uint32(math.MaxUint32)) + `,` +
+ fmt.Sprintf("%d", math.MaxInt64) + `,` +
+ fmt.Sprintf("%d", math.MinInt64) + `,` +
+ fmt.Sprintf("%d", math.MaxInt32) + `,` +
+ fmt.Sprintf("%d", math.MinInt32) +
+ `]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Uint64{math.MaxUint64}}},
+ {K: text.Scalar, T: ST{ok: Uint32{math.MaxUint32}}},
+ {K: text.Scalar, T: ST{ok: Int64{math.MaxInt64}}},
+ {K: text.Scalar, T: ST{ok: Int64{math.MinInt64}}},
+ {K: text.Scalar, T: ST{ok: Int32{math.MaxInt32}}},
+ {K: text.Scalar, T: ST{ok: Int32{math.MinInt32}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // Integer exceeds range.
+ in: `nums: [` +
+ `18446744073709551616,` + // max uint64 + 1
+ fmt.Sprintf("%d", uint64(math.MaxUint32+1)) + `,` +
+ fmt.Sprintf("%d", uint64(math.MaxInt64+1)) + `,` +
+ `-9223372036854775809,` + // min int64 - 1
+ fmt.Sprintf("%d", uint64(math.MaxInt32+1)) + `,` +
+ fmt.Sprintf("%d", int64(math.MinInt32-1)) + `` +
+ `]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0xbeefbeef, 0xbeefbeefbeefbeef]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {
+ K: text.Scalar,
+ T: func() ST {
+ if flags.ProtoLegacy {
+ return ST{ok: Int32{-1091584273}}
+ }
+ return ST{nok: Int32{}}
+ }(),
+ },
+ {
+ K: text.Scalar,
+ T: func() ST {
+ if flags.ProtoLegacy {
+ return ST{ok: Int64{-4688318750159552785}}
+ }
+ return ST{nok: Int64{}}
+ }(),
+ },
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float64{0.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{0.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{1.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{10.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{-0.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{-1.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{-10.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{1.0}}},
+ {K: text.Scalar, T: ST{ok: Float64{0.1e-3}}},
+ {K: text.Scalar, T: ST{ok: Float64{1.5e+5}}},
+ {K: text.Scalar, T: ST{ok: Float64{1.0e+10}}},
+ {K: text.Scalar, T: ST{ok: Float64{0.0}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float32{0.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{0.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{1.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{10.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{-0.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{-1.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{-10.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{1.0}}},
+ {K: text.Scalar, T: ST{ok: Float32{0.1e-3}}},
+ {K: text.Scalar, T: ST{ok: Float32{1.5e+5}}},
+ {K: text.Scalar, T: ST{ok: Float32{1.0e+10}}},
+ {K: text.Scalar, T: ST{ok: Float32{0.0}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0.,1f,10F,1e1,1.10]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.Scalar, T: ST{nok: Int64{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0.,1f,10F,1e1,1.10]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.Scalar, T: ST{nok: Int32{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0.,1f,10F,1e1,1.10]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [0.,1f,10F,1e1,1.10]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [` +
+ fmt.Sprintf("%g", math.MaxFloat32) + `,` +
+ fmt.Sprintf("%g", -math.MaxFloat32) + `,` +
+ fmt.Sprintf("%g", math.MaxFloat32*2) + `,` +
+ fmt.Sprintf("%g", -math.MaxFloat32*2) + `,` +
+ `3.59539e+308,` + // math.MaxFloat64 * 2
+ `-3.59539e+308,` + // -math.MaxFloat64 * 2
+ fmt.Sprintf("%d000", uint64(math.MaxUint64)) +
+ `]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.MaxFloat32)}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(-math.MaxFloat32)}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.MaxUint64) * 1000}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `nums: [` +
+ fmt.Sprintf("%g", math.MaxFloat64) + `,` +
+ fmt.Sprintf("%g", -math.MaxFloat64) + `,` +
+ `3.59539e+308,` + // math.MaxFloat64 * 2
+ `-3.59539e+308,` + // -math.MaxFloat64 * 2
+ fmt.Sprintf("%d000", uint64(math.MaxUint64)) +
+ `]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float64{math.MaxFloat64}}},
+ {K: text.Scalar, T: ST{ok: Float64{-math.MaxFloat64}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(-1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{float64(math.MaxUint64) * 1000}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // -0 is only valid for signed types. It is not valid for unsigned types.
+ in: `num: [-0, -0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{nok: Uint32{}}},
+ {K: text.Scalar, T: ST{nok: Uint64{}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // -0 is only valid for signed types. It is not valid for unsigned types.
+ in: `num: [-0, -0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Int32{0}}},
+ {K: text.Scalar, T: ST{ok: Int64{0}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // Negative zeros on float64 should preserve sign bit.
+ in: `num: [-0, -.0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float64{math.Copysign(0, -1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Copysign(0, -1)}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // Negative zeros on float32 should preserve sign bit.
+ in: `num: [-0, -.0]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Copysign(0, -1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Copysign(0, -1))}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `num: +0`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: +`},
+ },
+ },
+ {
+ in: `num: 01.1234`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: 01.1234`},
+ },
+ },
+ {
+ in: `num: 0x`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: 0x`},
+ },
+ },
+ {
+ in: `num: 0xX`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: 0xX`},
+ },
+ },
+ {
+ in: `num: 0800`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: 0800`},
+ },
+ },
+ {
+ in: `num: 1.`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Float32{1.0}}},
+ },
+ },
+ {
+ in: `num: -.`,
+ want: []R{
+ {K: text.Name},
+ {E: `invalid scalar value: -.`},
+ },
+ },
+
+ // Float special literal values, case-insensitive match.
+ {
+ in: `name:[nan, NaN, Nan, NAN]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float64{math.NaN()}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.NaN()}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.NaN()}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.NaN()}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `name:[inf, INF, infinity, Infinity, INFinity]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(1)}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `name:[-inf, -INF, -infinity, -Infinity, -INFinity]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(-1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(-1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(-1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(-1)}}},
+ {K: text.Scalar, T: ST{ok: Float64{math.Inf(-1)}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `name:[nan, NaN, Nan, NAN]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.NaN())}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.NaN())}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.NaN())}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.NaN())}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `name:[inf, INF, infinity, Infinity, INFinity]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(1))}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ in: `name:[-inf, -INF, -infinity, -Infinity, -INFinity]`,
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.Scalar, T: ST{ok: Float32{float32(math.Inf(-1))}}},
+ {K: text.ListClose},
+ },
+ },
+ {
+ // C++ permits this, but we currently reject this. It is easy to add
+ // if needed.
+ in: `name: -nan`,
+ want: []R{
+ {K: text.Name},
+ {K: text.Scalar, T: ST{nok: Float64{}}},
+ },
+ },
+ // Messages.
+ {
+ in: `m: {}`,
+ want: []R{
+ {K: text.Name},
+ {K: text.MessageOpen},
+ {K: text.MessageClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `m: <>`,
+ want: []R{
+ {K: text.Name},
+ {K: text.MessageOpen},
+ {K: text.MessageClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: space + `m {` + space + "\n# comment\n" + `}` + space,
+ want: []R{
+ {K: text.Name},
+ {K: text.MessageOpen},
+ {K: text.MessageClose},
+ },
+ },
+ {
+ in: `m { foo: < bar: "hello" > }`,
+ want: []R{
+ {K: text.Name, RS: "m"},
+ {K: text.MessageOpen},
+
+ {K: text.Name, RS: "foo"},
+ {K: text.MessageOpen},
+
+ {K: text.Name, RS: "bar"},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+
+ {K: text.MessageClose},
+
+ {K: text.MessageClose},
+ },
+ },
+ {
+ in: `list [ <s:"hello">, {s:"world"} ]`,
+ want: []R{
+ {K: text.Name, RS: "list"},
+ {K: text.ListOpen},
+
+ {K: text.MessageOpen},
+ {K: text.Name, RS: "s"},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+ {K: text.MessageClose},
+
+ {K: text.MessageOpen},
+ {K: text.Name, RS: "s"},
+ {K: text.Scalar, T: ST{ok: Str{"world"}}},
+ {K: text.MessageClose},
+
+ {K: text.ListClose},
+ {K: text.EOF},
+ },
+ },
+ {
+ in: `m: { >`,
+ want: []R{
+ {K: text.Name},
+ {K: text.MessageOpen},
+ {E: `mismatched close character '>'`},
+ },
+ },
+ {
+ in: `m: <s: "hello"}`,
+ want: []R{
+ {K: text.Name},
+ {K: text.MessageOpen},
+
+ {K: text.Name},
+ {K: text.Scalar, T: ST{ok: Str{"hello"}}},
+
+ {E: `mismatched close character '}'`},
+ },
+ },
+ {
+ in: `{}`,
+ want: []R{{E: `invalid field name: {`}},
+ },
+ {
+ in: `
+m: {
+ foo: true;
+ bar: {
+ enum: ENUM
+ list: [ < >, { } ] ;
+ }
+ [qux]: "end"
+}
+ `,
+ want: []R{
+ {K: text.Name},
+ {K: text.MessageOpen},
+
+ {K: text.Name, RS: "foo"},
+ {K: text.Scalar, T: ST{ok: Bool{true}}},
+
+ {K: text.Name, RS: "bar"},
+ {K: text.MessageOpen},
+
+ {K: text.Name, RS: "enum"},
+ {K: text.Scalar, T: ST{ok: Enum{"ENUM"}}},
+
+ {K: text.Name, RS: "list"},
+ {K: text.ListOpen},
+ {K: text.MessageOpen},
+ {K: text.MessageClose},
+ {K: text.MessageOpen},
+ {K: text.MessageClose},
+ {K: text.ListClose},
+
+ {K: text.MessageClose},
+
+ {K: text.Name, RS: "[qux]"},
+ {K: text.Scalar, T: ST{ok: Str{"end"}}},
+
+ {K: text.MessageClose},
+ {K: text.EOF},
+ },
+ },
+
+ // Other syntax errors.
+ {
+ in: "x: -",
+ want: []R{
+ {K: text.Name},
+ {E: `syntax error (line 1:4): invalid scalar value: -`},
+ },
+ },
+ {
+ in: "x:[\"💩\"x",
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"💩"}}, P: 3},
+ {E: `syntax error (line 1:7)`},
+ },
+ },
+ {
+ in: "x:\n\n[\"🔥🔥🔥\"x",
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"🔥🔥🔥"}}, P: 5},
+ {E: `syntax error (line 3:7)`},
+ },
+ },
+ {
+ // multi-rune emojis; could be column:8
+ in: "x:[\"👍🏻👍🏿\"x",
+ want: []R{
+ {K: text.Name},
+ {K: text.ListOpen},
+ {K: text.Scalar, T: ST{ok: Str{"👍🏻👍🏿"}}, P: 3},
+ {E: `syntax error (line 1:10)`},
+ },
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+ tc := tc
+ dec := text.NewDecoder([]byte(tc.in))
+ for i, want := range tc.want {
+ peekTok, peekErr := dec.Peek()
+ tok, err := dec.Read()
+ if err != nil {
+ if want.E == "" {
+ errorf(t, tc.in, "Read() got unexpected error: %v", err)
+ } else if !strings.Contains(err.Error(), want.E) {
+ errorf(t, tc.in, "Read() got %q, want %q", err, want.E)
+ }
+ return
+ }
+ if want.E != "" {
+ errorf(t, tc.in, "Read() got nil error, want %q", want.E)
+ return
+ }
+ gotK := tok.Kind()
+ if gotK != want.K {
+ errorf(t, tc.in, "Read() got %v, want %v", gotK, want.K)
+ return
+ }
+ checkToken(t, tok, i, want, tc.in)
+ if !cmp.Equal(tok, peekTok, cmp.Comparer(text.TokenEquals)) {
+ errorf(t, tc.in, "Peek() %+v != Read() token %+v", peekTok, tok)
+ }
+ if err != peekErr {
+ errorf(t, tc.in, "Peek() error %v != Read() error %v", err, peekErr)
+ }
+ }
+ })
+ }
+}
+
+func checkToken(t *testing.T, tok text.Token, idx int, r R, in string) {
+ // Validate Token.Pos() if R.P is set.
+ if r.P > 0 {
+ got := tok.Pos()
+ if got != r.P {
+ errorf(t, in, "want#%d: Token.Pos() got %v want %v", idx, got, r.P)
+ }
+ }
+
+ // Validate Token.RawString if R.RS is set.
+ if len(r.RS) > 0 {
+ got := tok.RawString()
+ if got != r.RS {
+ errorf(t, in, "want#%d: Token.RawString() got %v want %v", idx, got, r.P)
+ }
+ }
+
+ // Skip checking for Token details if r.T is not set.
+ if r.T == nil {
+ return
+ }
+
+ switch tok.Kind() {
+ case text.Name:
+ want := r.T.(NT)
+ kind := tok.NameKind()
+ if kind != want.K {
+ errorf(t, in, "want#%d: Token.NameKind() got %v want %v", idx, kind, want.K)
+ return
+ }
+ switch kind {
+ case text.IdentName:
+ got := tok.IdentName()
+ if got != want.S {
+ errorf(t, in, "want#%d: Token.IdentName() got %v want %v", idx, got, want.S)
+ }
+ case text.TypeName:
+ got := tok.TypeName()
+ if got != want.S {
+ errorf(t, in, "want#%d: Token.TypeName() got %v want %v", idx, got, want.S)
+ }
+ case text.FieldNumber:
+ got := tok.FieldNumber()
+ if got != want.N {
+ errorf(t, in, "want#%d: Token.FieldNumber() got %v want %v", idx, got, want.N)
+ }
+ }
+
+ case text.Scalar:
+ want := r.T.(ST)
+ if ok := want.ok; ok != nil {
+ if err := ok.checkOk(tok); err != "" {
+ errorf(t, in, "want#%d: %s", idx, err)
+ }
+ }
+ if nok := want.nok; nok != nil {
+ if err := nok.checkNok(tok); err != "" {
+ errorf(t, in, "want#%d: %s", idx, err)
+ }
+ }
+ }
+}
+
+func errorf(t *testing.T, in string, fmtStr string, args ...interface{}) {
+ t.Helper()
+ vargs := []interface{}{in}
+ for _, arg := range args {
+ vargs = append(vargs, arg)
+ }
+ t.Errorf("input:\n%s\n~end~\n"+fmtStr, vargs...)
+}
+
+func TestUnmarshalString(t *testing.T) {
+ tests := []struct {
+ in string
+ // want is expected string result.
+ want string
+ // err is expected error substring from calling DecodeString if set.
+ err string
+ }{
+ {
+ in: func() string {
+ var b []byte
+ for i := 0; i < utf8.RuneSelf; i++ {
+ switch i {
+ case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
+ default:
+ b = append(b, byte(i))
+ }
+ }
+ return "'" + string(b) + "'"
+ }(),
+ want: "\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f",
+ },
+ {
+ in: "'\xde\xad\xbe\xef'",
+ err: `invalid UTF-8 detected`,
+ },
+ {
+ // Valid UTF-8 wire encoding, but sub-optimal encoding.
+ in: "'\xc0\x80'",
+ err: "invalid UTF-8 detected",
+ },
+ {
+ // Valid UTF-8 wire encoding, but invalid rune (surrogate pair).
+ in: "'\xed\xa0\x80'",
+ err: "invalid UTF-8 detected",
+ },
+ {
+ // Valid UTF-8 wire encoding, but invalid rune (above max rune).
+ in: "'\xf7\xbf\xbf\xbf'",
+ err: "invalid UTF-8 detected",
+ },
+ {
+ // Valid UTF-8 wire encoding of the RuneError rune.
+ in: "'\xef\xbf\xbd'",
+ want: string(utf8.RuneError),
+ },
+ {
+ in: "'hello\u1234world'",
+ want: "hello\u1234world",
+ },
+ {
+ in: `'\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`,
+ want: "\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff",
+ },
+ {
+ in: `str: '\8'`,
+ err: `invalid escape code "\\8" in string`,
+ },
+ {
+ in: `'\1x'`,
+ want: "\001x",
+ },
+ {
+ in: `'\12x'`,
+ want: "\012x",
+ },
+ {
+ in: `'\123x'`,
+ want: "\123x",
+ },
+ {
+ in: `'\1234x'`,
+ want: "\1234x",
+ },
+ {
+ in: `'\1'`,
+ want: "\001",
+ },
+ {
+ in: `'\12'`,
+ want: "\012",
+ },
+ {
+ in: `'\123'`,
+ want: "\123",
+ },
+ {
+ in: `'\1234'`,
+ want: "\1234",
+ },
+ {
+ in: `'\377'`,
+ want: "\377",
+ },
+ {
+ // Overflow octal escape.
+ in: `'\400'`,
+ err: `invalid octal escape code "\\400" in string`,
+ },
+ {
+ in: `'\xfx'`,
+ want: "\x0fx",
+ },
+ {
+ in: `'\xffx'`,
+ want: "\xffx",
+ },
+ {
+ in: `'\xfffx'`,
+ want: "\xfffx",
+ },
+ {
+ in: `'\xf'`,
+ want: "\x0f",
+ },
+ {
+ in: `'\xff'`,
+ want: "\xff",
+ },
+ {
+ in: `'\xfff'`,
+ want: "\xfff",
+ },
+ {
+ in: `'\xz'`,
+ err: `invalid hex escape code "\\x" in string`,
+ },
+ {
+ in: `'\uPo'`,
+ err: eofErr,
+ },
+ {
+ in: `'\uPoo'`,
+ err: `invalid Unicode escape code "\\uPoo'" in string`,
+ },
+ {
+ in: `str: '\uPoop'`,
+ err: `invalid Unicode escape code "\\uPoop" in string`,
+ },
+ {
+ // Unmatched surrogate pair.
+ in: `str: '\uDEAD'`,
+ err: `unexpected EOF`, // trying to reader other half
+ },
+ {
+ // Surrogate pair with invalid other half.
+ in: `str: '\uDEAD\u0000'`,
+ err: `invalid Unicode escape code "\\u0000" in string`,
+ },
+ {
+ // Properly matched surrogate pair.
+ in: `'\uD800\uDEAD'`,
+ want: "𐊭",
+ },
+ {
+ // Overflow on Unicode rune.
+ in: `'\U00110000'`,
+ err: `invalid Unicode escape code "\\U00110000" in string`,
+ },
+ {
+ in: `'\z'`,
+ err: `invalid escape code "\\z" in string`,
+ },
+ {
+ // Strings cannot have NUL literal since C-style strings forbid them.
+ in: "'\x00'",
+ err: `invalid character '\x00' in string`,
+ },
+ {
+ // Strings cannot have newline literal. The C++ permits them if an
+ // option is specified to allow them. In Go, we always forbid them.
+ in: "'\n'",
+ err: `invalid character '\n' in string`,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+ got, err := text.UnmarshalString(tc.in)
+ if err != nil {
+ if tc.err == "" {
+ errorf(t, tc.in, "UnmarshalString() got unexpected error: %q", err)
+ } else if !strings.Contains(err.Error(), tc.err) {
+ errorf(t, tc.in, "UnmarshalString() error got %q, want %q", err, tc.err)
+ }
+ return
+ }
+ if tc.err != "" {
+ errorf(t, tc.in, "UnmarshalString() got nil error, want %q", tc.err)
+ return
+ }
+ if got != tc.want {
+ errorf(t, tc.in, "UnmarshalString()\n[got]\n%s\n[want]\n%s", got, tc.want)
+ }
+ })
+ }
+}
+
+// Tests line and column number produced by Decoder.Position.
+func TestPosition(t *testing.T) {
+ dec := text.NewDecoder([]byte("0123456789\n12345\n789"))
+
+ tests := []struct {
+ pos int
+ row int
+ col int
+ }{
+ {
+ pos: 0,
+ row: 1,
+ col: 1,
+ },
+ {
+ pos: 10,
+ row: 1,
+ col: 11,
+ },
+ {
+ pos: 11,
+ row: 2,
+ col: 1,
+ },
+ {
+ pos: 18,
+ row: 3,
+ col: 2,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+ row, col := dec.Position(tc.pos)
+ if row != tc.row || col != tc.col {
+ t.Errorf("Position(%d) got (%d,%d) want (%d,%d)", tc.pos, row, col, tc.row, tc.col)
+ }
+ })
+ }
+}
diff --git a/internal/encoding/text/decode_token.go b/internal/encoding/text/decode_token.go
new file mode 100644
index 0000000..b7b4e2f
--- /dev/null
+++ b/internal/encoding/text/decode_token.go
@@ -0,0 +1,370 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package text
+
+import (
+ "bytes"
+ "fmt"
+ "math"
+ "strconv"
+ "strings"
+
+ "google.golang.org/protobuf/internal/flags"
+)
+
+// Kind represents a token kind expressible in the textproto format.
+type Kind uint8
+
+const (
+ Invalid Kind = iota
+ EOF
+ Name // Name indicates the field name.
+ Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
+ MessageOpen
+ MessageClose
+ ListOpen
+ ListClose
+
+ // comma and semi-colon are only for parsing in between values and should not be exposed.
+ comma
+ semicolon
+
+ // bof indicates beginning of file, which is the default token
+ // kind at the beginning of parsing.
+ bof = Invalid
+)
+
+func (t Kind) String() string {
+ switch t {
+ case Invalid:
+ return "<invalid>"
+ case EOF:
+ return "eof"
+ case Scalar:
+ return "scalar"
+ case Name:
+ return "name"
+ case MessageOpen:
+ return "{"
+ case MessageClose:
+ return "}"
+ case ListOpen:
+ return "["
+ case ListClose:
+ return "]"
+ case comma:
+ return ","
+ case semicolon:
+ return ";"
+ default:
+ return fmt.Sprintf("<invalid:%v>", uint8(t))
+ }
+}
+
+// NameKind represents different types of field names.
+type NameKind uint8
+
+const (
+ IdentName NameKind = iota + 1
+ TypeName
+ FieldNumber
+)
+
+func (t NameKind) String() string {
+ switch t {
+ case IdentName:
+ return "IdentName"
+ case TypeName:
+ return "TypeName"
+ case FieldNumber:
+ return "FieldNumber"
+ default:
+ return fmt.Sprintf("<invalid:%v>", uint8(t))
+ }
+}
+
+// Bit mask in Token.attrs to indicate if a Name token is followed by the
+// separator char ':'. The field name separator char is optional for message
+// field or repeated message field, but required for all other types. Decoder
+// simply indicates whether a Name token is followed by separator or not. It is
+// up to the prototext package to validate.
+const hasSeparator = 1 << 7
+
+// Scalar value types.
+const (
+ numberValue = iota + 1
+ stringValue
+ literalValue
+)
+
+// Bit mask in Token.numAttrs to indicate that the number is a negative.
+const isNegative = 1 << 7
+
+// Token provides a parsed token kind and value. Values are provided by the
+// different accessor methods.
+type Token struct {
+ // Kind of the Token object.
+ kind Kind
+ // attrs contains metadata for the following Kinds:
+ // Name: hasSeparator bit and one of NameKind.
+ // Scalar: one of numberValue, stringValue, literalValue.
+ attrs uint8
+ // numAttrs contains metadata for numberValue:
+ // - highest bit is whether negative or positive.
+ // - lower bits indicate one of numDec, numHex, numOct, numFloat.
+ numAttrs uint8
+ // pos provides the position of the token in the original input.
+ pos int
+ // raw bytes of the serialized token.
+ // This is a subslice into the original input.
+ raw []byte
+ // str contains parsed string for the following:
+ // - stringValue of Scalar kind
+ // - numberValue of Scalar kind
+ // - TypeName of Name kind
+ str string
+}
+
+// Kind returns the token kind.
+func (t Token) Kind() Kind {
+ return t.kind
+}
+
+// RawString returns the read value in string.
+func (t Token) RawString() string {
+ return string(t.raw)
+}
+
+// Pos returns the token position from the input.
+func (t Token) Pos() int {
+ return t.pos
+}
+
+// NameKind returns IdentName, TypeName or FieldNumber.
+// It panics if type is not Name.
+func (t Token) NameKind() NameKind {
+ if t.kind == Name {
+ return NameKind(t.attrs &^ hasSeparator)
+ }
+ panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
+}
+
+// HasSeparator returns true if the field name is followed by the separator char
+// ':', else false. It panics if type is not Name.
+func (t Token) HasSeparator() bool {
+ if t.kind == Name {
+ return t.attrs&hasSeparator != 0
+ }
+ panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
+}
+
+// IdentName returns the value for IdentName type.
+func (t Token) IdentName() string {
+ if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
+ return string(t.raw)
+ }
+ panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
+}
+
+// TypeName returns the value for TypeName type.
+func (t Token) TypeName() string {
+ if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
+ return t.str
+ }
+ panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
+}
+
+// FieldNumber returns the value for FieldNumber type. It returns a
+// non-negative int32 value. Caller will still need to validate for the correct
+// field number range.
+func (t Token) FieldNumber() int32 {
+ if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
+ panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
+ }
+ // Following should not return an error as it had already been called right
+ // before this Token was constructed.
+ num, _ := strconv.ParseInt(string(t.raw), 10, 32)
+ return int32(num)
+}
+
+// String returns the string value for a Scalar type.
+func (t Token) String() (string, bool) {
+ if t.kind != Scalar || t.attrs != stringValue {
+ return "", false
+ }
+ return t.str, true
+}
+
+// Enum returns the literal value for a Scalar type for use as enum literals.
+func (t Token) Enum() (string, bool) {
+ if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
+ return "", false
+ }
+ return string(t.raw), true
+}
+
+// Bool returns the bool value for a Scalar type.
+func (t Token) Bool() (bool, bool) {
+ if t.kind != Scalar {
+ return false, false
+ }
+ switch t.attrs {
+ case literalValue:
+ if b, ok := boolLits[string(t.raw)]; ok {
+ return b, true
+ }
+ case numberValue:
+ // Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
+ // 0x1, etc.
+ n, err := strconv.ParseUint(t.str, 0, 64)
+ if err == nil {
+ switch n {
+ case 0:
+ return false, true
+ case 1:
+ return true, true
+ }
+ }
+ }
+ return false, false
+}
+
+// These exact boolean literals are the ones supported in C++.
+var boolLits = map[string]bool{
+ "t": true,
+ "true": true,
+ "True": true,
+ "f": false,
+ "false": false,
+ "False": false,
+}
+
+// Uint64 returns the uint64 value for a Scalar type.
+func (t Token) Uint64() (uint64, bool) {
+ if t.kind != Scalar || t.attrs != numberValue ||
+ t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
+ return 0, false
+ }
+ n, err := strconv.ParseUint(t.str, 0, 64)
+ if err != nil {
+ return 0, false
+ }
+ return n, true
+}
+
+// Uint32 returns the uint32 value for a Scalar type.
+func (t Token) Uint32() (uint32, bool) {
+ if t.kind != Scalar || t.attrs != numberValue ||
+ t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
+ return 0, false
+ }
+ n, err := strconv.ParseUint(t.str, 0, 32)
+ if err != nil {
+ return 0, false
+ }
+ return uint32(n), true
+}
+
+// Int64 returns the int64 value for a Scalar type.
+func (t Token) Int64() (int64, bool) {
+ if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
+ return 0, false
+ }
+ if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
+ return n, true
+ }
+ // C++ accepts large positive hex numbers as negative values.
+ // This feature is here for proto1 backwards compatibility purposes.
+ if flags.ProtoLegacy && (t.numAttrs == numHex) {
+ if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
+ return int64(n), true
+ }
+ }
+ return 0, false
+}
+
+// Int32 returns the int32 value for a Scalar type.
+func (t Token) Int32() (int32, bool) {
+ if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
+ return 0, false
+ }
+ if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
+ return int32(n), true
+ }
+ // C++ accepts large positive hex numbers as negative values.
+ // This feature is here for proto1 backwards compatibility purposes.
+ if flags.ProtoLegacy && (t.numAttrs == numHex) {
+ if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
+ return int32(n), true
+ }
+ }
+ return 0, false
+}
+
+// Float64 returns the float64 value for a Scalar type.
+func (t Token) Float64() (float64, bool) {
+ if t.kind != Scalar {
+ return 0, false
+ }
+ switch t.attrs {
+ case literalValue:
+ if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
+ return f, true
+ }
+ case numberValue:
+ n, err := strconv.ParseFloat(t.str, 64)
+ if err == nil {
+ return n, true
+ }
+ nerr := err.(*strconv.NumError)
+ if nerr.Err == strconv.ErrRange {
+ return n, true
+ }
+ }
+ return 0, false
+}
+
+// Float32 returns the float32 value for a Scalar type.
+func (t Token) Float32() (float32, bool) {
+ if t.kind != Scalar {
+ return 0, false
+ }
+ switch t.attrs {
+ case literalValue:
+ if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
+ return float32(f), true
+ }
+ case numberValue:
+ n, err := strconv.ParseFloat(t.str, 64)
+ if err == nil {
+ // Overflows are treated as (-)infinity.
+ return float32(n), true
+ }
+ nerr := err.(*strconv.NumError)
+ if nerr.Err == strconv.ErrRange {
+ return float32(n), true
+ }
+ }
+ return 0, false
+}
+
+// These are the supported float literals which C++ permits case-insensitive
+// variants of these.
+var floatLits = map[string]float64{
+ "nan": math.NaN(),
+ "inf": math.Inf(1),
+ "infinity": math.Inf(1),
+ "-inf": math.Inf(-1),
+ "-infinity": math.Inf(-1),
+}
+
+// TokenEquals returns true if given Tokens are equal, else false.
+func TokenEquals(x, y Token) bool {
+ return x.kind == y.kind &&
+ x.attrs == y.attrs &&
+ x.numAttrs == y.numAttrs &&
+ x.pos == y.pos &&
+ bytes.Equal(x.raw, y.raw) &&
+ x.str == y.str
+}
diff --git a/internal/encoding/text/doc.go b/internal/encoding/text/doc.go
new file mode 100644
index 0000000..0ce8d6f
--- /dev/null
+++ b/internal/encoding/text/doc.go
@@ -0,0 +1,29 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package text implements the text format for protocol buffers.
+// This package has no semantic understanding for protocol buffers and is only
+// a parser and composer for the format.
+//
+// There is no formal specification for the protobuf text format, as such the
+// C++ implementation (see google::protobuf::TextFormat) is the reference
+// implementation of the text format.
+//
+// This package is neither a superset nor a subset of the C++ implementation.
+// This implementation permits a more liberal grammar in some cases to be
+// backwards compatible with the historical Go implementation.
+// Future parsings unique to Go should not be added.
+// Some grammars allowed by the C++ implementation are deliberately
+// not implemented here because they are considered a bug by the protobuf team
+// and should not be replicated.
+//
+// The Go implementation should implement a sufficient amount of the C++
+// grammar such that the default text serialization by C++ can be parsed by Go.
+// However, just because the C++ parser accepts some input does not mean that
+// the Go implementation should as well.
+//
+// The text format is almost a superset of JSON except:
+// * message keys are not quoted strings, but identifiers
+// * the top-level value must be a message without the delimiters
+package text
diff --git a/internal/encoding/text/encode.go b/internal/encoding/text/encode.go
index 6757046..a593978 100644
--- a/internal/encoding/text/encode.go
+++ b/internal/encoding/text/encode.go
@@ -5,16 +5,45 @@
package text
import (
- "regexp"
+ "math"
+ "math/bits"
+ "strconv"
"strings"
+ "unicode/utf8"
"google.golang.org/protobuf/internal/detrand"
"google.golang.org/protobuf/internal/errors"
)
-// Marshal serializes v as the proto text format, where v must be a Message.
-// In the proto text format, the top-level value is always a message where the
-// delimiters are elided.
+// encType represents an encoding type.
+type encType uint8
+
+const (
+ _ encType = (1 << iota) / 2
+ name
+ scalar
+ messageOpen
+ messageClose
+)
+
+// Encoder provides methods to write out textproto constructs and values. The user is
+// responsible for producing valid sequences of constructs and values.
+type Encoder struct {
+ encoderState
+
+ indent string
+ newline string // set to "\n" if len(indent) > 0
+ delims [2]byte
+ outputASCII bool
+}
+
+type encoderState struct {
+ lastType encType
+ indents []byte
+ out []byte
+}
+
+// NewEncoder returns an Encoder.
//
// If indent is a non-empty string, it causes every entry in a List or Message
// to be preceded by the indent and trailed by a newline.
@@ -25,164 +54,214 @@
// If outputASCII is true, strings will be serialized in such a way that
// multi-byte UTF-8 sequences are escaped. This property ensures that the
// overall output is ASCII (as opposed to UTF-8).
-func Marshal(v Value, indent string, delims [2]byte, outputASCII bool) ([]byte, error) {
- p := encoder{}
+func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
+ e := &Encoder{}
if len(indent) > 0 {
if strings.Trim(indent, " \t") != "" {
return nil, errors.New("indent may only be composed of space and tab characters")
}
- p.indent = indent
- p.newline = "\n"
+ e.indent = indent
+ e.newline = "\n"
}
switch delims {
case [2]byte{0, 0}:
- p.delims = [2]byte{'{', '}'}
+ e.delims = [2]byte{'{', '}'}
case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
- p.delims = delims
+ e.delims = delims
default:
return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
}
- p.outputASCII = outputASCII
+ e.outputASCII = outputASCII
- err := p.marshalMessage(v, false)
- if err != nil {
- return nil, err
- }
- return p.out, nil
+ return e, nil
}
-type encoder struct {
- out []byte
-
- indent string
- indents []byte
- newline string // set to "\n" if len(indent) > 0
- delims [2]byte
- outputASCII bool
+// Bytes returns the content of the written bytes.
+func (e *Encoder) Bytes() []byte {
+ return e.out
}
-func (p *encoder) marshalList(v Value) error {
- if v.Type() != List {
- return errors.New("invalid type %v, expected list", v.Type())
- }
- elems := v.List()
- p.out = append(p.out, '[')
- p.indents = append(p.indents, p.indent...)
- if len(elems) > 0 {
- p.out = append(p.out, p.newline...)
- }
- for i, elem := range elems {
- p.out = append(p.out, p.indents...)
- if err := p.marshalValue(elem); err != nil {
- return err
- }
- if i < len(elems)-1 {
- p.out = append(p.out, ',')
- }
- p.out = append(p.out, p.newline...)
- }
- p.indents = p.indents[:len(p.indents)-len(p.indent)]
- if len(elems) > 0 {
- p.out = append(p.out, p.indents...)
- }
- p.out = append(p.out, ']')
- return nil
+// StartMessage writes out the '{' or '<' symbol.
+func (e *Encoder) StartMessage() {
+ e.prepareNext(messageOpen)
+ e.out = append(e.out, e.delims[0])
}
-func (p *encoder) marshalMessage(v Value, emitDelims bool) error {
- if v.Type() != Message {
- return errors.New("invalid type %v, expected message", v.Type())
+// EndMessage writes out the '}' or '>' symbol.
+func (e *Encoder) EndMessage() {
+ e.prepareNext(messageClose)
+ e.out = append(e.out, e.delims[1])
+}
+
+// Writname writes out the field name and the separator ':'.
+func (e *Encoder) WriteName(s string) {
+ e.prepareNext(name)
+ e.out = append(e.out, s...)
+ e.out = append(e.out, ':')
+}
+
+// WriteBool writes out the given boolean value.
+func (e *Encoder) WriteBool(b bool) {
+ if b {
+ e.WriteLiteral("true")
+ } else {
+ e.WriteLiteral("false")
}
- items := v.Message()
- if emitDelims {
- p.out = append(p.out, p.delims[0])
- p.indents = append(p.indents, p.indent...)
- if len(items) > 0 {
- p.out = append(p.out, p.newline...)
+}
+
+// WriteString writes out the given string value.
+func (e *Encoder) WriteString(s string) {
+ e.prepareNext(scalar)
+ e.out = appendString(e.out, s, e.outputASCII)
+}
+
+func appendString(out []byte, in string, outputASCII bool) []byte {
+ out = append(out, '"')
+ i := indexNeedEscapeInString(in)
+ in, out = in[i:], append(out, in[:i]...)
+ for len(in) > 0 {
+ switch r, n := utf8.DecodeRuneInString(in); {
+ case r == utf8.RuneError && n == 1:
+ // We do not report invalid UTF-8 because strings in the text format
+ // are used to represent both the proto string and bytes type.
+ r = rune(in[0])
+ fallthrough
+ case r < ' ' || r == '"' || r == '\\':
+ out = append(out, '\\')
+ switch r {
+ case '"', '\\':
+ out = append(out, byte(r))
+ case '\n':
+ out = append(out, 'n')
+ case '\r':
+ out = append(out, 'r')
+ case '\t':
+ out = append(out, 't')
+ default:
+ out = append(out, 'x')
+ out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
+ out = strconv.AppendUint(out, uint64(r), 16)
+ }
+ in = in[n:]
+ case outputASCII && r >= utf8.RuneSelf:
+ out = append(out, '\\')
+ if r <= math.MaxUint16 {
+ out = append(out, 'u')
+ out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
+ out = strconv.AppendUint(out, uint64(r), 16)
+ } else {
+ out = append(out, 'U')
+ out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
+ out = strconv.AppendUint(out, uint64(r), 16)
+ }
+ in = in[n:]
+ default:
+ i := indexNeedEscapeInString(in[n:])
+ in, out = in[n+i:], append(out, in[:n+i]...)
}
}
- for i, item := range items {
- p.out = append(p.out, p.indents...)
- if err := p.marshalKey(item[0]); err != nil {
- return err
+ out = append(out, '"')
+ return out
+}
+
+// indexNeedEscapeInString returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+func indexNeedEscapeInString(s string) int {
+ for i := 0; i < len(s); i++ {
+ if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
+ return i
}
- p.out = append(p.out, ':')
- if len(p.indent) > 0 {
- p.out = append(p.out, ' ')
- // For multi-line output, add a random extra space after key:
- // to make output unstable.
+ }
+ return len(s)
+}
+
+// WriteFloat writes out the given float value for given bitSize.
+func (e *Encoder) WriteFloat(n float64, bitSize int) {
+ e.prepareNext(scalar)
+ e.out = appendFloat(e.out, n, bitSize)
+}
+
+func appendFloat(out []byte, n float64, bitSize int) []byte {
+ switch {
+ case math.IsNaN(n):
+ return append(out, "nan"...)
+ case math.IsInf(n, +1):
+ return append(out, "inf"...)
+ case math.IsInf(n, -1):
+ return append(out, "-inf"...)
+ default:
+ return strconv.AppendFloat(out, n, 'g', -1, bitSize)
+ }
+}
+
+// WriteInt writes out the given signed integer value.
+func (e *Encoder) WriteInt(n int64) {
+ e.prepareNext(scalar)
+ e.out = append(e.out, strconv.FormatInt(n, 10)...)
+}
+
+// WriteUint writes out the given unsigned integer value.
+func (e *Encoder) WriteUint(n uint64) {
+ e.prepareNext(scalar)
+ e.out = append(e.out, strconv.FormatUint(n, 10)...)
+}
+
+// WriteLiteral writes out the given string as a literal value without quotes.
+// This is used for writing enum literal strings.
+func (e *Encoder) WriteLiteral(s string) {
+ e.prepareNext(scalar)
+ e.out = append(e.out, s...)
+}
+
+// prepareNext adds possible space and indentation for the next value based
+// on last encType and indent option. It also updates e.lastType to next.
+func (e *Encoder) prepareNext(next encType) {
+ defer func() {
+ e.lastType = next
+ }()
+
+ // Single line.
+ if len(e.indent) == 0 {
+ // Add space after each field before the next one.
+ if e.lastType&(scalar|messageClose) != 0 && next == name {
+ e.out = append(e.out, ' ')
+ // Add a random extra space to make output unstable.
if detrand.Bool() {
- p.out = append(p.out, ' ')
+ e.out = append(e.out, ' ')
}
}
-
- if err := p.marshalValue(item[1]); err != nil {
- return err
- }
- if i < len(items)-1 && len(p.indent) == 0 {
- p.out = append(p.out, ' ')
- // For single-line output, add a random extra space after a field
- // to make output unstable.
- if detrand.Bool() {
- p.out = append(p.out, ' ')
- }
- }
- p.out = append(p.out, p.newline...)
+ return
}
- if emitDelims {
- p.indents = p.indents[:len(p.indents)-len(p.indent)]
- if len(items) > 0 {
- p.out = append(p.out, p.indents...)
- }
- p.out = append(p.out, p.delims[1])
- }
- return nil
-}
-// This expression is more liberal than ConsumeAnyTypeUrl in C++.
-// However, the C++ parser does not handle many legal URL strings.
-// The Go implementation is more liberal to be backwards compatible with
-// the historical Go implementation which was overly liberal (and buggy).
-var urlRegexp = regexp.MustCompile(`^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`)
-
-func (p *encoder) marshalKey(v Value) error {
- switch v.Type() {
- case String:
- var err error
- p.out = append(p.out, '[')
- if len(urlRegexp.FindString(v.str)) == len(v.str) {
- p.out = append(p.out, v.str...)
- } else {
- err = p.marshalString(v)
+ // Multi-line.
+ switch {
+ case e.lastType == name:
+ e.out = append(e.out, ' ')
+ // Add a random extra space after name: to make output unstable.
+ if detrand.Bool() {
+ e.out = append(e.out, ' ')
}
- p.out = append(p.out, ']')
- return err
- case Uint:
- return p.marshalNumber(v)
- case Name:
- s, _ := v.Name()
- p.out = append(p.out, s...)
- return nil
- default:
- return errors.New("invalid type %v to encode key", v.Type())
+
+ case e.lastType == messageOpen && next != messageClose:
+ e.indents = append(e.indents, e.indent...)
+ e.out = append(e.out, '\n')
+ e.out = append(e.out, e.indents...)
+
+ case e.lastType&(scalar|messageClose) != 0:
+ if next == messageClose {
+ e.indents = e.indents[:len(e.indents)-len(e.indent)]
+ }
+ e.out = append(e.out, '\n')
+ e.out = append(e.out, e.indents...)
}
}
-func (p *encoder) marshalValue(v Value) error {
- switch v.Type() {
- case Bool, Int, Uint, Float32, Float64:
- return p.marshalNumber(v)
- case String:
- return p.marshalString(v)
- case List:
- return p.marshalList(v)
- case Message:
- return p.marshalMessage(v, true)
- case Name:
- s, _ := v.Name()
- p.out = append(p.out, s...)
- return nil
- default:
- return errors.New("invalid type %v to encode value", v.Type())
- }
+// Snapshot returns the current snapshot for use in Reset.
+func (e *Encoder) Snapshot() encoderState {
+ return e.encoderState
+}
+
+// Reset resets the Encoder to the given encoderState from a Snapshot.
+func (e *Encoder) Reset(es encoderState) {
+ e.encoderState = es
}
diff --git a/internal/encoding/text/encode_test.go b/internal/encoding/text/encode_test.go
new file mode 100644
index 0000000..b9ad92a
--- /dev/null
+++ b/internal/encoding/text/encode_test.go
@@ -0,0 +1,549 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package text_test
+
+import (
+ "math"
+ "strings"
+ "testing"
+ "unicode/utf8"
+
+ "github.com/google/go-cmp/cmp"
+ "google.golang.org/protobuf/internal/detrand"
+ "google.golang.org/protobuf/internal/encoding/text"
+)
+
+// Disable detrand to enable direct comparisons on outputs.
+func init() { detrand.Disable() }
+
+func TestEncoder(t *testing.T) {
+ tests := []encoderTestCase{
+ {
+ desc: "no-opt",
+ write: func(e *text.Encoder) {},
+ wantOut: ``,
+ wantOutIndent: ``,
+ },
+ {
+ desc: "true",
+ write: func(e *text.Encoder) {
+ e.WriteName("bool")
+ e.WriteBool(true)
+ },
+ wantOut: `bool:true`,
+ wantOutIndent: `bool: true`,
+ },
+ {
+ desc: "false",
+ write: func(e *text.Encoder) {
+ e.WriteName("bool")
+ e.WriteBool(false)
+ },
+ wantOut: `bool:false`,
+ wantOutIndent: `bool: false`,
+ },
+ {
+ desc: "bracket name",
+ write: func(e *text.Encoder) {
+ e.WriteName("[extension]")
+ e.WriteString("hello")
+ },
+ wantOut: `[extension]:"hello"`,
+ wantOutIndent: `[extension]: "hello"`,
+ },
+ {
+ desc: "numeric name",
+ write: func(e *text.Encoder) {
+ e.WriteName("01234")
+ e.WriteString("hello")
+ },
+ wantOut: `01234:"hello"`,
+ wantOutIndent: `01234: "hello"`,
+ },
+ {
+ desc: "string",
+ write: func(e *text.Encoder) {
+ e.WriteName("str")
+ e.WriteString("hello world")
+ },
+ wantOut: `str:"hello world"`,
+ wantOutIndent: `str: "hello world"`,
+ },
+ {
+ desc: "enum",
+ write: func(e *text.Encoder) {
+ e.WriteName("enum")
+ e.WriteLiteral("ENUM_VALUE")
+ },
+ wantOut: `enum:ENUM_VALUE`,
+ wantOutIndent: `enum: ENUM_VALUE`,
+ },
+ {
+ desc: "float64",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(1.0199999809265137, 64)
+ },
+ wantOut: `float64:1.0199999809265137`,
+ wantOutIndent: `float64: 1.0199999809265137`,
+ },
+ {
+ desc: "float64 max value",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(math.MaxFloat64, 64)
+ },
+ wantOut: `float64:1.7976931348623157e+308`,
+ wantOutIndent: `float64: 1.7976931348623157e+308`,
+ },
+ {
+ desc: "float64 min value",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(-math.MaxFloat64, 64)
+ },
+ wantOut: `float64:-1.7976931348623157e+308`,
+ wantOutIndent: `float64: -1.7976931348623157e+308`,
+ },
+ {
+ desc: "float64 nan",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(math.NaN(), 64)
+ },
+ wantOut: `float64:nan`,
+ wantOutIndent: `float64: nan`,
+ },
+ {
+ desc: "float64 inf",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(math.Inf(+1), 64)
+ },
+ wantOut: `float64:inf`,
+ wantOutIndent: `float64: inf`,
+ },
+ {
+ desc: "float64 -inf",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(math.Inf(-1), 64)
+ },
+ wantOut: `float64:-inf`,
+ wantOutIndent: `float64: -inf`,
+ },
+ {
+ desc: "float64 negative zero",
+ write: func(e *text.Encoder) {
+ e.WriteName("float64")
+ e.WriteFloat(math.Copysign(0, -1), 64)
+ },
+ wantOut: `float64:-0`,
+ wantOutIndent: `float64: -0`,
+ },
+ {
+ desc: "float32",
+ write: func(e *text.Encoder) {
+ e.WriteName("float")
+ e.WriteFloat(1.02, 32)
+ },
+ wantOut: `float:1.02`,
+ wantOutIndent: `float: 1.02`,
+ },
+ {
+ desc: "float32 max value",
+ write: func(e *text.Encoder) {
+ e.WriteName("float32")
+ e.WriteFloat(math.MaxFloat32, 32)
+ },
+ wantOut: `float32:3.4028235e+38`,
+ wantOutIndent: `float32: 3.4028235e+38`,
+ },
+ {
+ desc: "float32 nan",
+ write: func(e *text.Encoder) {
+ e.WriteName("float32")
+ e.WriteFloat(math.NaN(), 32)
+ },
+ wantOut: `float32:nan`,
+ wantOutIndent: `float32: nan`,
+ },
+ {
+ desc: "float32 inf",
+ write: func(e *text.Encoder) {
+ e.WriteName("float32")
+ e.WriteFloat(math.Inf(+1), 32)
+ },
+ wantOut: `float32:inf`,
+ wantOutIndent: `float32: inf`,
+ },
+ {
+ desc: "float32 -inf",
+ write: func(e *text.Encoder) {
+ e.WriteName("float32")
+ e.WriteFloat(math.Inf(-1), 32)
+ },
+ wantOut: `float32:-inf`,
+ wantOutIndent: `float32: -inf`,
+ },
+ {
+ desc: "float32 negative zero",
+ write: func(e *text.Encoder) {
+ e.WriteName("float32")
+ e.WriteFloat(math.Copysign(0, -1), 32)
+ },
+ wantOut: `float32:-0`,
+ wantOutIndent: `float32: -0`,
+ },
+ {
+ desc: "int64 max value",
+ write: func(e *text.Encoder) {
+ e.WriteName("int")
+ e.WriteInt(math.MaxInt64)
+ },
+ wantOut: `int:9223372036854775807`,
+ wantOutIndent: `int: 9223372036854775807`,
+ },
+ {
+ desc: "int64 min value",
+ write: func(e *text.Encoder) {
+ e.WriteName("int")
+ e.WriteInt(math.MinInt64)
+ },
+ wantOut: `int:-9223372036854775808`,
+ wantOutIndent: `int: -9223372036854775808`,
+ },
+ {
+ desc: "uint",
+ write: func(e *text.Encoder) {
+ e.WriteName("uint")
+ e.WriteUint(math.MaxUint64)
+ },
+ wantOut: `uint:18446744073709551615`,
+ wantOutIndent: `uint: 18446744073709551615`,
+ },
+ {
+ desc: "empty message field",
+ write: func(e *text.Encoder) {
+ e.WriteName("m")
+ e.StartMessage()
+ e.EndMessage()
+ },
+ wantOut: `m:{}`,
+ wantOutIndent: `m: {}`,
+ },
+ {
+ desc: "multiple fields",
+ write: func(e *text.Encoder) {
+ e.WriteName("bool")
+ e.WriteBool(true)
+ e.WriteName("str")
+ e.WriteString("hello")
+ e.WriteName("str")
+ e.WriteString("world")
+ e.WriteName("m")
+ e.StartMessage()
+ e.EndMessage()
+ e.WriteName("[int]")
+ e.WriteInt(49)
+ e.WriteName("float64")
+ e.WriteFloat(1.00023e4, 64)
+ e.WriteName("101")
+ e.WriteString("unknown")
+ },
+ wantOut: `bool:true str:"hello" str:"world" m:{} [int]:49 float64:10002.3 101:"unknown"`,
+ wantOutIndent: `bool: true
+str: "hello"
+str: "world"
+m: {}
+[int]: 49
+float64: 10002.3
+101: "unknown"`,
+ },
+ {
+ desc: "populated message fields",
+ write: func(e *text.Encoder) {
+ e.WriteName("m1")
+ e.StartMessage()
+ {
+ e.WriteName("str")
+ e.WriteString("hello")
+ }
+ e.EndMessage()
+
+ e.WriteName("bool")
+ e.WriteBool(true)
+
+ e.WriteName("m2")
+ e.StartMessage()
+ {
+ e.WriteName("str")
+ e.WriteString("world")
+ e.WriteName("m2-1")
+ e.StartMessage()
+ e.EndMessage()
+ e.WriteName("m2-2")
+ e.StartMessage()
+ {
+ e.WriteName("[int]")
+ e.WriteInt(49)
+ }
+ e.EndMessage()
+ e.WriteName("float64")
+ e.WriteFloat(1.00023e4, 64)
+ }
+ e.EndMessage()
+
+ e.WriteName("101")
+ e.WriteString("unknown")
+ },
+ wantOut: `m1:{str:"hello"} bool:true m2:{str:"world" m2-1:{} m2-2:{[int]:49} float64:10002.3} 101:"unknown"`,
+ wantOutIndent: `m1: {
+ str: "hello"
+}
+bool: true
+m2: {
+ str: "world"
+ m2-1: {}
+ m2-2: {
+ [int]: 49
+ }
+ float64: 10002.3
+}
+101: "unknown"`,
+ },
+ }
+
+ for _, tc := range tests {
+ t.Run(tc.desc, func(t *testing.T) {
+ runEncoderTest(t, tc, [2]byte{})
+
+ // Test using the angle brackets.
+ // Testcases should not contain characters '{' and '}'.
+ tc.wantOut = replaceDelims(tc.wantOut)
+ tc.wantOutIndent = replaceDelims(tc.wantOutIndent)
+ runEncoderTest(t, tc, [2]byte{'<', '>'})
+ })
+ }
+}
+
+type encoderTestCase struct {
+ desc string
+ write func(*text.Encoder)
+ wantOut string
+ wantOutIndent string
+}
+
+func runEncoderTest(t *testing.T, tc encoderTestCase, delims [2]byte) {
+ t.Helper()
+
+ if tc.wantOut != "" {
+ enc, err := text.NewEncoder("", delims, false)
+ if err != nil {
+ t.Fatalf("NewEncoder returned error: %v", err)
+ }
+ tc.write(enc)
+ got := string(enc.Bytes())
+ if got != tc.wantOut {
+ t.Errorf("(compact)\n<got>\n%v\n<want>\n%v\n", got, tc.wantOut)
+ }
+ }
+ if tc.wantOutIndent != "" {
+ enc, err := text.NewEncoder("\t", delims, false)
+ if err != nil {
+ t.Fatalf("NewEncoder returned error: %v", err)
+ }
+ tc.write(enc)
+ got, want := string(enc.Bytes()), tc.wantOutIndent
+ if got != want {
+ t.Errorf("(multi-line)\n<got>\n%v\n<want>\n%v\n<diff -want +got>\n%v\n",
+ got, want, cmp.Diff(want, got))
+ }
+ }
+}
+
+func replaceDelims(s string) string {
+ s = strings.Replace(s, "{", "<", -1)
+ return strings.Replace(s, "}", ">", -1)
+}
+
+// Test for UTF-8 and ASCII outputs.
+func TestEncodeStrings(t *testing.T) {
+ tests := []struct {
+ in string
+ wantOut string
+ wantOutASCII string
+ }{
+ {
+ in: `"`,
+ wantOut: `"\""`,
+ },
+ {
+ in: `'`,
+ wantOut: `"'"`,
+ },
+ {
+ in: "hello\u1234world",
+ wantOut: "\"hello\u1234world\"",
+ wantOutASCII: `"hello\u1234world"`,
+ },
+ {
+ // String that has as few escaped characters as possible.
+ in: func() string {
+ var b []byte
+ for i := 0; i < utf8.RuneSelf; i++ {
+ switch i {
+ case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
+ default:
+ b = append(b, byte(i))
+ }
+ }
+ return string(b)
+ }(),
+ wantOut: `"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
+ wantOutASCII: `"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
+ },
+ {
+ // Valid UTF-8 wire encoding of the RuneError rune.
+ in: string(utf8.RuneError),
+ wantOut: `"` + string(utf8.RuneError) + `"`,
+ wantOutASCII: `"\ufffd"`,
+ },
+ {
+ in: "\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff",
+ wantOut: `"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
+ wantOutASCII: `"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
+ },
+ {
+ in: "\001x",
+ wantOut: `"\x01x"`,
+ wantOutASCII: `"\x01x"`,
+ },
+ {
+ in: "\012x",
+ wantOut: `"\nx"`,
+ wantOutASCII: `"\nx"`,
+ },
+ {
+ in: "\123x",
+ wantOut: `"Sx"`,
+ wantOutASCII: `"Sx"`,
+ },
+ {
+ in: "\1234x",
+ wantOut: `"S4x"`,
+ wantOutASCII: `"S4x"`,
+ },
+ {
+ in: "\001",
+ wantOut: `"\x01"`,
+ wantOutASCII: `"\x01"`,
+ },
+ {
+ in: "\012",
+ wantOut: `"\n"`,
+ wantOutASCII: `"\n"`,
+ },
+ {
+ in: "\123",
+ wantOut: `"S"`,
+ wantOutASCII: `"S"`,
+ },
+ {
+ in: "\1234",
+ wantOut: `"S4"`,
+ wantOutASCII: `"S4"`,
+ },
+ {
+ in: "\377",
+ wantOut: `"\xff"`,
+ wantOutASCII: `"\xff"`,
+ },
+ {
+ in: "\x0fx",
+ wantOut: `"\x0fx"`,
+ wantOutASCII: `"\x0fx"`,
+ },
+ {
+ in: "\xffx",
+ wantOut: `"\xffx"`,
+ wantOutASCII: `"\xffx"`,
+ },
+ {
+ in: "\xfffx",
+ wantOut: `"\xfffx"`,
+ wantOutASCII: `"\xfffx"`,
+ },
+ {
+ in: "\x0f",
+ wantOut: `"\x0f"`,
+ wantOutASCII: `"\x0f"`,
+ },
+ {
+ in: "\xff",
+ wantOut: `"\xff"`,
+ wantOutASCII: `"\xff"`,
+ },
+ {
+ in: "\xfff",
+ wantOut: `"\xfff"`,
+ wantOutASCII: `"\xfff"`,
+ },
+ }
+ for _, tc := range tests {
+ t.Run("", func(t *testing.T) {
+ if tc.wantOut != "" {
+ runEncodeStringsTest(t, tc.in, tc.wantOut, false)
+ }
+ if tc.wantOutASCII != "" {
+ runEncodeStringsTest(t, tc.in, tc.wantOutASCII, true)
+ }
+ })
+ }
+}
+
+func runEncodeStringsTest(t *testing.T, in string, want string, outputASCII bool) {
+ t.Helper()
+
+ charType := "UTF-8"
+ if outputASCII {
+ charType = "ASCII"
+ }
+
+ enc, err := text.NewEncoder("", [2]byte{}, outputASCII)
+ if err != nil {
+ t.Fatalf("[%s] NewEncoder returned error: %v", charType, err)
+ }
+ enc.WriteString(in)
+ got := string(enc.Bytes())
+ if got != want {
+ t.Errorf("[%s] WriteString(%q)\n<got>\n%v\n<want>\n%v\n", charType, in, got, want)
+ }
+}
+
+func TestReset(t *testing.T) {
+ enc, err := text.NewEncoder("\t", [2]byte{}, false)
+ if err != nil {
+ t.Fatalf("NewEncoder returned error: %v", err)
+ }
+
+ enc.WriteName("foo")
+ pos := enc.Snapshot()
+
+ // Attempt to write a message value.
+ enc.StartMessage()
+ enc.WriteName("bar")
+ enc.WriteUint(10)
+
+ // Reset the value and decided to write a string value instead.
+ enc.Reset(pos)
+ enc.WriteString("0123456789")
+
+ got := string(enc.Bytes())
+ want := `foo: "0123456789"`
+ if got != want {
+ t.Errorf("Reset did not restore given position:\n<got>\n%v\n<want>\n%v\n", got, want)
+ }
+}
diff --git a/internal/encoding/text/number.go b/internal/encoding/text/number.go
deleted file mode 100644
index 4e31ee1..0000000
--- a/internal/encoding/text/number.go
+++ /dev/null
@@ -1,337 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package text
-
-import (
- "bytes"
- "io"
- "math"
- "strconv"
-
- "google.golang.org/protobuf/internal/errors"
-)
-
-// marshalNumber encodes v as either a Bool, Int, Uint, Float32, or Float64.
-func (p *encoder) marshalNumber(v Value) error {
- var err error
- p.out, err = appendNumber(p.out, v)
- return err
-}
-func appendNumber(out []byte, v Value) ([]byte, error) {
- if len(v.raw) > 0 {
- switch v.Type() {
- case Bool, Int, Uint, Float32, Float64:
- return append(out, v.raw...), nil
- }
- }
- switch v.Type() {
- case Bool:
- if b, _ := v.Bool(); b {
- return append(out, "true"...), nil
- } else {
- return append(out, "false"...), nil
- }
- case Int:
- return strconv.AppendInt(out, int64(v.num), 10), nil
- case Uint:
- return strconv.AppendUint(out, uint64(v.num), 10), nil
- case Float32:
- return appendFloat(out, v, 32)
- case Float64:
- return appendFloat(out, v, 64)
- default:
- return nil, errors.New("invalid type %v, expected bool or number", v.Type())
- }
-}
-
-func appendFloat(out []byte, v Value, bitSize int) ([]byte, error) {
- switch n := math.Float64frombits(v.num); {
- case math.IsNaN(n):
- return append(out, "nan"...), nil
- case math.IsInf(n, +1):
- return append(out, "inf"...), nil
- case math.IsInf(n, -1):
- return append(out, "-inf"...), nil
- default:
- return strconv.AppendFloat(out, n, 'g', -1, bitSize), nil
- }
-}
-
-// These regular expressions were derived by reverse engineering the C++ code
-// in tokenizer.cc and text_format.cc.
-var (
- literals = map[string]interface{}{
- // These exact literals are the ones supported in C++.
- // In C++, a 1-bit unsigned integers is also allowed to represent
- // a boolean. This is handled in Value.Bool.
- "t": true,
- "true": true,
- "True": true,
- "f": false,
- "false": false,
- "False": false,
-
- // C++ permits "-nan" and the case-insensitive variants of these.
- // However, Go continues to be case-sensitive.
- "nan": math.NaN(),
- "inf": math.Inf(+1),
- "-inf": math.Inf(-1),
- }
-)
-
-// unmarshalNumber decodes a Bool, Int, Uint, or Float64 from the input.
-func (p *decoder) unmarshalNumber() (Value, error) {
- v, n, err := consumeNumber(p.in)
- p.consume(n)
- return v, err
-}
-
-func consumeNumber(in []byte) (Value, int, error) {
- if len(in) == 0 {
- return Value{}, 0, io.ErrUnexpectedEOF
- }
- if v, n := matchLiteral(in); n > 0 {
- return rawValueOf(v, in[:n]), n, nil
- }
-
- num, ok := parseNumber(in)
- if !ok {
- return Value{}, 0, newSyntaxError("invalid %q as number or bool", errRegexp.Find(in))
- }
-
- if num.typ == numFloat {
- f, err := strconv.ParseFloat(string(num.value), 64)
- if err != nil {
- return Value{}, 0, err
- }
- return rawValueOf(f, in[:num.size]), num.size, nil
- }
-
- if num.neg {
- v, err := strconv.ParseInt(string(num.value), 0, 64)
- if err != nil {
- return Value{}, 0, err
- }
- return rawValueOf(v, num.value), num.size, nil
- }
- v, err := strconv.ParseUint(string(num.value), 0, 64)
- if err != nil {
- return Value{}, 0, err
- }
- return rawValueOf(v, num.value), num.size, nil
-}
-
-func matchLiteral(in []byte) (interface{}, int) {
- switch in[0] {
- case 't', 'T':
- rest := in[1:]
- if len(rest) == 0 || isDelim(rest[0]) {
- return true, 1
- }
- if n := matchStringWithDelim("rue", rest); n > 0 {
- return true, 4
- }
- case 'f', 'F':
- rest := in[1:]
- if len(rest) == 0 || isDelim(rest[0]) {
- return false, 1
- }
- if n := matchStringWithDelim("alse", rest); n > 0 {
- return false, 5
- }
- case 'n':
- if n := matchStringWithDelim("nan", in); n > 0 {
- return math.NaN(), 3
- }
- case 'i':
- if n := matchStringWithDelim("inf", in); n > 0 {
- return math.Inf(1), 3
- }
- case '-':
- if n := matchStringWithDelim("-inf", in); n > 0 {
- return math.Inf(-1), 4
- }
- }
- return nil, 0
-}
-
-func matchStringWithDelim(s string, b []byte) int {
- if !bytes.HasPrefix(b, []byte(s)) {
- return 0
- }
-
- n := len(s)
- if n < len(b) && !isDelim(b[n]) {
- return 0
- }
- return n
-}
-
-type numType uint8
-
-const (
- numDec numType = (1 << iota) / 2
- numHex
- numOct
- numFloat
-)
-
-// number is the result of parsing out a valid number from parseNumber. It
-// contains data for doing float or integer conversion via the strconv package.
-type number struct {
- typ numType
- neg bool
- // Size of input taken up by the number. This may not be the same as
- // len(number.value).
- size int
- // Bytes for doing strconv.Parse{Float,Int,Uint} conversion.
- value []byte
-}
-
-// parseNumber constructs a number object from given input. It allows for the
-// following patterns:
-// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
-// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
-func parseNumber(input []byte) (number, bool) {
- var size int
- var neg bool
- typ := numDec
-
- s := input
- if len(s) == 0 {
- return number{}, false
- }
-
- // Optional -
- if s[0] == '-' {
- neg = true
- s = s[1:]
- size++
- if len(s) == 0 {
- return number{}, false
- }
- }
-
- // C++ allows for whitespace and comments in between the negative sign and
- // the rest of the number. This logic currently does not but is consistent
- // with v1.
-
- switch {
- case s[0] == '0':
- if len(s) > 1 {
- switch {
- case s[1] == 'x' || s[1] == 'X':
- // Parse as hex number.
- typ = numHex
- n := 2
- s = s[2:]
- for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
- ('a' <= s[0] && s[0] <= 'f') ||
- ('A' <= s[0] && s[0] <= 'F')) {
- s = s[1:]
- n++
- }
- if n == 2 {
- return number{}, false
- }
- size += n
-
- case '0' <= s[1] && s[1] <= '7':
- // Parse as octal number.
- typ = numOct
- n := 2
- s = s[2:]
- for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
- s = s[1:]
- n++
- }
- size += n
- }
-
- if typ&(numHex|numOct) > 0 {
- if len(s) > 0 && !isDelim(s[0]) {
- return number{}, false
- }
- return number{
- typ: typ,
- size: size,
- neg: neg,
- value: input[:size],
- }, true
- }
- }
- s = s[1:]
- size++
-
- case '1' <= s[0] && s[0] <= '9':
- n := 1
- s = s[1:]
- for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- n++
- }
- size += n
-
- case s[0] == '.':
- // Handled below.
-
- default:
- return number{}, false
- }
-
- // . followed by 0 or more digits.
- if len(s) > 0 && s[0] == '.' {
- typ = numFloat
- n := 1
- s = s[1:]
- for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- n++
- }
- size += n
- }
-
- // e or E followed by an optional - or + and 1 or more digits.
- if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
- typ = numFloat
- s = s[1:]
- n := 1
- if s[0] == '+' || s[0] == '-' {
- s = s[1:]
- n++
- if len(s) == 0 {
- return number{}, false
- }
- }
- for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
- s = s[1:]
- n++
- }
- size += n
- }
-
- // At this point, input[:size] contains a valid number that can be converted
- // via strconv.Parse{Float,Int,Uint}.
- value := input[:size]
-
- // Optional suffix f or F for floats.
- if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
- typ = numFloat
- s = s[1:]
- size++
- }
-
- // Check that next byte is a delimiter or it is at the end.
- if len(s) > 0 && !isDelim(s[0]) {
- return number{}, false
- }
-
- return number{
- typ: typ,
- size: size,
- neg: neg,
- value: value,
- }, true
-}
diff --git a/internal/encoding/text/string.go b/internal/encoding/text/string.go
deleted file mode 100644
index 36314ff..0000000
--- a/internal/encoding/text/string.go
+++ /dev/null
@@ -1,229 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package text
-
-import (
- "bytes"
- "io"
- "math"
- "math/bits"
- "strconv"
- "strings"
- "unicode"
- "unicode/utf16"
- "unicode/utf8"
-
- "google.golang.org/protobuf/internal/errors"
- "google.golang.org/protobuf/internal/strs"
-)
-
-func (p *encoder) marshalString(v Value) error {
- var err error
- p.out, err = appendString(p.out, v, p.outputASCII)
- return err
-}
-func appendString(out []byte, v Value, outputASCII bool) ([]byte, error) {
- if v.Type() != String {
- return nil, errors.New("invalid type %v, expected string", v.Type())
- }
- if len(v.raw) > 0 {
- return append(out, v.raw...), nil
- }
- in := v.String()
-
- out = append(out, '"')
- i := indexNeedEscapeInString(in)
- in, out = in[i:], append(out, in[:i]...)
- for len(in) > 0 {
- switch r, n := utf8.DecodeRuneInString(in); {
- case r == utf8.RuneError && n == 1:
- // We do not report invalid UTF-8 because strings in the text format
- // are used to represent both the proto string and bytes type.
- r = rune(in[0])
- fallthrough
- case r < ' ' || r == '"' || r == '\\':
- out = append(out, '\\')
- switch r {
- case '"', '\\':
- out = append(out, byte(r))
- case '\n':
- out = append(out, 'n')
- case '\r':
- out = append(out, 'r')
- case '\t':
- out = append(out, 't')
- default:
- out = append(out, 'x')
- out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
- }
- in = in[n:]
- case outputASCII && r >= utf8.RuneSelf:
- out = append(out, '\\')
- if r <= math.MaxUint16 {
- out = append(out, 'u')
- out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
- } else {
- out = append(out, 'U')
- out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
- out = strconv.AppendUint(out, uint64(r), 16)
- }
- in = in[n:]
- default:
- i := indexNeedEscapeInString(in[n:])
- in, out = in[n+i:], append(out, in[:n+i]...)
- }
- }
- out = append(out, '"')
- return out, nil
-}
-
-func (p *decoder) unmarshalString() (Value, error) {
- v, n, err := consumeString(p.in)
- p.consume(n)
- return v, err
-}
-func consumeString(in []byte) (Value, int, error) {
- in0 := in
- if len(in) == 0 {
- return Value{}, 0, io.ErrUnexpectedEOF
- }
- quote := in[0]
- if in[0] != '"' && in[0] != '\'' {
- return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
- }
- in = in[1:]
- i := indexNeedEscapeInBytes(in)
- in, out := in[i:], in[:i:i] // set cap to prevent mutations
- for len(in) > 0 {
- switch r, n := utf8.DecodeRune(in); {
- case r == utf8.RuneError && n == 1:
- return Value{}, 0, newSyntaxError("invalid UTF-8 detected")
- case r == 0 || r == '\n':
- return Value{}, 0, newSyntaxError("invalid character %q in string", r)
- case r == rune(quote):
- in = in[1:]
- n := len(in0) - len(in)
- v := rawValueOf(string(out), in0[:n:n])
- return v, n, nil
- case r == '\\':
- if len(in) < 2 {
- return Value{}, 0, io.ErrUnexpectedEOF
- }
- switch r := in[1]; r {
- case '"', '\'', '\\', '?':
- in, out = in[2:], append(out, r)
- case 'a':
- in, out = in[2:], append(out, '\a')
- case 'b':
- in, out = in[2:], append(out, '\b')
- case 'n':
- in, out = in[2:], append(out, '\n')
- case 'r':
- in, out = in[2:], append(out, '\r')
- case 't':
- in, out = in[2:], append(out, '\t')
- case 'v':
- in, out = in[2:], append(out, '\v')
- case 'f':
- in, out = in[2:], append(out, '\f')
- case '0', '1', '2', '3', '4', '5', '6', '7':
- // One, two, or three octal characters.
- n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567"))
- if n > 3 {
- n = 3
- }
- v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8)
- if err != nil {
- return Value{}, 0, newSyntaxError("invalid octal escape code %q in string", in[:1+n])
- }
- in, out = in[1+n:], append(out, byte(v))
- case 'x':
- // One or two hexadecimal characters.
- n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF"))
- if n > 2 {
- n = 2
- }
- v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8)
- if err != nil {
- return Value{}, 0, newSyntaxError("invalid hex escape code %q in string", in[:2+n])
- }
- in, out = in[2+n:], append(out, byte(v))
- case 'u', 'U':
- // Four or eight hexadecimal characters
- n := 6
- if r == 'U' {
- n = 10
- }
- if len(in) < n {
- return Value{}, 0, io.ErrUnexpectedEOF
- }
- v, err := strconv.ParseUint(string(in[2:n]), 16, 32)
- if utf8.MaxRune < v || err != nil {
- return Value{}, 0, newSyntaxError("invalid Unicode escape code %q in string", in[:n])
- }
- in = in[n:]
-
- r := rune(v)
- if utf16.IsSurrogate(r) {
- if len(in) < 6 {
- return Value{}, 0, io.ErrUnexpectedEOF
- }
- v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
- r = utf16.DecodeRune(r, rune(v))
- if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
- return Value{}, 0, newSyntaxError("invalid Unicode escape code %q in string", in[:6])
- }
- in = in[6:]
- }
- out = append(out, string(r)...)
- default:
- return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
- }
- default:
- i := indexNeedEscapeInBytes(in[n:])
- in, out = in[n+i:], append(out, in[:n+i]...)
- }
- }
- return Value{}, 0, io.ErrUnexpectedEOF
-}
-
-// unmarshalStrings unmarshals multiple strings.
-// This differs from unmarshalString since the text format allows
-// multiple back-to-back string literals where they are semantically treated
-// as a single large string with all values concatenated.
-//
-// E.g., `"foo" "bar" "baz"` => ValueOf("foobarbaz")
-func (p *decoder) unmarshalStrings() (Value, error) {
- // Note that the ending quote is sufficient to unambiguously mark the end
- // of a string. Thus, the text grammar does not require intervening
- // whitespace or control characters in-between strings.
- // Thus, the following is valid:
- // `"foo"'bar'"baz"` => ValueOf("foobarbaz")
- b := p.in
- var ss []string
- for len(p.in) > 0 && (p.in[0] == '"' || p.in[0] == '\'') {
- v, err := p.unmarshalString()
- if err != nil {
- return Value{}, err
- }
- ss = append(ss, v.String())
- }
- b = b[:len(b)-len(p.in)]
- return rawValueOf(strings.Join(ss, ""), b[:len(b):len(b)]), nil
-}
-
-// indexNeedEscapeInString returns the index of the character that needs
-// escaping. If no characters need escaping, this returns the input length.
-func indexNeedEscapeInString(s string) int {
- for i := 0; i < len(s); i++ {
- if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
- return i
- }
- }
- return len(s)
-}
-func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
diff --git a/internal/encoding/text/text_test.go b/internal/encoding/text/text_test.go
deleted file mode 100644
index ba08868..0000000
--- a/internal/encoding/text/text_test.go
+++ /dev/null
@@ -1,863 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package text
-
-import (
- "fmt"
- "math"
- "strings"
- "testing"
- "unicode/utf8"
-
- "github.com/google/go-cmp/cmp"
- "github.com/google/go-cmp/cmp/cmpopts"
- "google.golang.org/protobuf/internal/detrand"
- "google.golang.org/protobuf/internal/flags"
- "google.golang.org/protobuf/reflect/protoreflect"
-)
-
-// Disable detrand to enable direct comparisons on outputs.
-func init() { detrand.Disable() }
-
-var S = fmt.Sprintf
-var V = ValueOf
-var ID = func(n protoreflect.Name) Value { return V(n) }
-
-type Lst = []Value
-type Msg = [][2]Value
-
-func Test(t *testing.T) {
- const space = " \n\r\t"
-
- tests := []struct {
- in string
- wantVal Value
- wantOut string
- wantOutBracket string
- wantOutASCII string
- wantOutIndent string
- wantErr string
- }{{
- in: "",
- wantVal: V(Msg{}),
- wantOutIndent: "",
- }, {
- in: S("%s# hello%s", space, space),
- wantVal: V(Msg{}),
- }, {
- in: S("%s# hello\rfoo:bar", space),
- wantVal: V(Msg{}),
- }, {
- // Comments only extend until the newline.
- in: S("%s# hello\nfoo:bar", space),
- wantVal: V(Msg{{ID("foo"), ID("bar")}}),
- wantOut: "foo:bar",
- wantOutIndent: "foo: bar\n",
- }, {
- // NUL is an invalid whitespace since C++ uses C-strings.
- in: "\x00",
- wantErr: `invalid "\x00" as identifier`,
- }, {
- in: "foo:0",
- wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
- wantOut: "foo:0",
- }, {
- in: S("%sfoo%s:0", space, space),
- wantVal: V(Msg{{ID("foo"), V(uint32(0))}}),
- }, {
- in: "foo bar:0",
- wantErr: `expected ':' after message key`,
- }, {
- in: "[foo]:0",
- wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
- wantOut: "[foo]:0",
- wantOutIndent: "[foo]: 0\n",
- }, {
- in: S("%s[%sfoo%s]%s:0", space, space, space, space),
- wantVal: V(Msg{{V("foo"), V(uint32(0))}}),
- }, {
- in: "[proto.package.name]:0",
- wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
- wantOut: "[proto.package.name]:0",
- wantOutIndent: "[proto.package.name]: 0\n",
- }, {
- in: S("%s[%sproto.package.name%s]%s:0", space, space, space, space),
- wantVal: V(Msg{{V("proto.package.name"), V(uint32(0))}}),
- }, {
- in: "['sub.domain.com\x2fpath\x2fto\x2fproto.package.name']:0",
- wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
- wantOut: "[sub.domain.com/path/to/proto.package.name]:0",
- wantOutIndent: "[sub.domain.com/path/to/proto.package.name]: 0\n",
- }, {
- in: "[\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"]:0",
- wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
- }, {
- in: S("%s[%s'sub.domain.com\x2fpath\x2fto\x2fproto.package.name'%s]%s:0", space, space, space, space),
- wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
- }, {
- in: S("%s[%s\"sub.domain.com\x2fpath\x2fto\x2fproto.package.name\"%s]%s:0", space, space, space, space),
- wantVal: V(Msg{{V("sub.domain.com/path/to/proto.package.name"), V(uint32(0))}}),
- }, {
- in: `['http://example.com/path/to/proto.package.name']:0`,
- wantVal: V(Msg{{V("http://example.com/path/to/proto.package.name"), V(uint32(0))}}),
- wantOut: `["http://example.com/path/to/proto.package.name"]:0`,
- wantOutIndent: `["http://example.com/path/to/proto.package.name"]: 0` + "\n",
- }, {
- in: "[proto.package.name:0",
- wantErr: `invalid character ':', expected ']' at end of extension name`,
- }, {
- in: "[proto.package name]:0",
- wantErr: `invalid character 'n', expected ']' at end of extension name`,
- }, {
- in: `["proto.package" "name"]:0`,
- wantErr: `invalid character '"', expected ']' at end of extension name`,
- }, {
- in: `["\z"]`,
- wantErr: `invalid escape code "\\z" in string`,
- }, {
- in: "[$]",
- wantErr: `invalid "$" as identifier`,
- }, {
- in: `[proto.package.]:0`,
- wantErr: `invalid "proto.package." as identifier`,
- }, {
- in: `[/proto.package]:0`,
- wantErr: `invalid "/proto.package" as identifier`,
- }, {
- in: `[proto.package/]:0`,
- wantErr: `invalid "proto.package/" as identifier`,
- }, {
- // This parses fine, but should result in a error later since no
- // type name in proto will ever be just a number.
- in: "[20]:0",
- wantVal: V(Msg{{V("20"), V(uint32(0))}}),
- wantOut: "[20]:0",
- }, {
- in: "20:0",
- wantVal: V(Msg{{V(uint32(20)), V(uint32(0))}}),
- wantOut: "20:0",
- }, {
- in: "0x20:0",
- wantVal: V(Msg{{V(uint32(0x20)), V(uint32(0))}}),
- wantOut: "32:0",
- }, {
- in: "020:0",
- wantVal: V(Msg{{V(uint32(020)), V(uint32(0))}}),
- wantOut: "16:0",
- }, {
- in: "-20:0",
- wantErr: `invalid "-20" as identifier`,
- }, {
- in: `foo:true bar:"s" baz:{} qux:[] wib:id`,
- wantVal: V(Msg{
- {ID("foo"), V(true)},
- {ID("bar"), V("s")},
- {ID("baz"), V(Msg{})},
- {ID("qux"), V(Lst{})},
- {ID("wib"), ID("id")},
- }),
- wantOut: `foo:true bar:"s" baz:{} qux:[] wib:id`,
- wantOutIndent: "foo: true\nbar: \"s\"\nbaz: {}\nqux: []\nwib: id\n",
- }, {
- in: S(`%sfoo%s:%strue%s %sbar%s:%s"s"%s %sbaz%s:%s<>%s %squx%s:%s[]%s %swib%s:%sid%s`,
- space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space, space),
- wantVal: V(Msg{
- {ID("foo"), V(true)},
- {ID("bar"), V("s")},
- {ID("baz"), V(Msg{})},
- {ID("qux"), V(Lst{})},
- {ID("wib"), ID("id")},
- }),
- }, {
- in: `foo:true;`,
- wantVal: V(Msg{{ID("foo"), V(true)}}),
- wantOut: "foo:true",
- wantOutIndent: "foo: true\n",
- }, {
- in: `foo:true,`,
- wantVal: V(Msg{{ID("foo"), V(true)}}),
- }, {
- in: `foo:bar;,`,
- wantErr: `invalid "," as identifier`,
- }, {
- in: `foo:bar,;`,
- wantErr: `invalid ";" as identifier`,
- }, {
- in: `footrue`,
- wantErr: `unexpected EOF`,
- }, {
- in: `foo true`,
- wantErr: `expected ':' after message key`,
- }, {
- in: `foo"s"`,
- wantErr: `expected ':' after message key`,
- }, {
- in: `foo "s"`,
- wantErr: `expected ':' after message key`,
- }, {
- in: `foo{}`,
- wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
- wantOut: "foo:{}",
- wantOutBracket: "foo:<>",
- wantOutIndent: "foo: {}\n",
- }, {
- in: `foo {}`,
- wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
- }, {
- in: `foo<>`,
- wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
- }, {
- in: `foo <>`,
- wantVal: V(Msg{{ID("foo"), V(Msg{})}}),
- }, {
- in: `foo[]`,
- wantErr: `expected ':' after message key`,
- }, {
- in: `foo []`,
- wantErr: `expected ':' after message key`,
- }, {
- in: `foo:truebar:true`,
- wantErr: `invalid ":" as identifier`,
- }, {
- in: `foo:"s"bar:true`,
- wantVal: V(Msg{{ID("foo"), V("s")}, {ID("bar"), V(true)}}),
- wantOut: `foo:"s" bar:true`,
- wantOutIndent: "foo: \"s\"\nbar: true\n",
- }, {
- in: `foo:0bar:true`,
- wantErr: `invalid "0bar" as number or bool`,
- }, {
- in: `foo:{}bar:true`,
- wantVal: V(Msg{{ID("foo"), V(Msg{})}, {ID("bar"), V(true)}}),
- wantOut: "foo:{} bar:true",
- wantOutBracket: "foo:<> bar:true",
- wantOutIndent: "foo: {}\nbar: true\n",
- }, {
- in: `foo:[]bar:true`,
- wantVal: V(Msg{{ID("foo"), V(Lst{})}, {ID("bar"), V(true)}}),
- }, {
- in: `foo{bar:true}`,
- wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
- wantOut: "foo:{bar:true}",
- wantOutBracket: "foo:<bar:true>",
- wantOutIndent: "foo: {\n\tbar: true\n}\n",
- }, {
- in: `foo<bar:true>`,
- wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
- }, {
- in: `foo{bar:true,}`,
- wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
- }, {
- in: `foo{bar:true;}`,
- wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(true)}})}}),
- }, {
- in: `foo{`,
- wantErr: `unexpected EOF`,
- }, {
- in: `foo{ `,
- wantErr: `unexpected EOF`,
- }, {
- in: `foo{[`,
- wantErr: `unexpected EOF`,
- }, {
- in: `foo{[ `,
- wantErr: `unexpected EOF`,
- }, {
- in: `foo{bar:true,;}`,
- wantErr: `invalid ";" as identifier`,
- }, {
- in: `foo{bar:true;,}`,
- wantErr: `invalid "," as identifier`,
- }, {
- in: `foo<bar:{}>`,
- wantVal: V(Msg{{ID("foo"), V(Msg{{ID("bar"), V(Msg{})}})}}),
- wantOut: "foo:{bar:{}}",
- wantOutBracket: "foo:<bar:<>>",
- wantOutIndent: "foo: {\n\tbar: {}\n}\n",
- }, {
- in: `foo<bar:{>`,
- wantErr: `invalid character '>', expected '}' at end of message`,
- }, {
- in: `foo<bar:{}`,
- wantErr: `unexpected EOF`,
- }, {
- in: `arr:[]`,
- wantVal: V(Msg{{ID("arr"), V(Lst{})}}),
- wantOut: "arr:[]",
- wantOutBracket: "arr:[]",
- wantOutIndent: "arr: []\n",
- }, {
- in: `arr:[,]`,
- wantErr: `invalid "," as number or bool`,
- }, {
- in: `arr:[0 0]`,
- wantErr: `invalid character '0', expected ']' at end of list`,
- }, {
- in: `arr:["foo" "bar"]`,
- wantVal: V(Msg{{ID("arr"), V(Lst{V("foobar")})}}),
- wantOut: `arr:["foobar"]`,
- wantOutBracket: `arr:["foobar"]`,
- wantOutIndent: "arr: [\n\t\"foobar\"\n]\n",
- }, {
- in: `arr:[0,]`,
- wantErr: `invalid "]" as number or bool`,
- }, {
- in: `arr:[true,0,"",id,[],{}]`,
- wantVal: V(Msg{{ID("arr"), V(Lst{
- V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
- })}}),
- wantOut: `arr:[true,0,"",id,[],{}]`,
- wantOutBracket: `arr:[true,0,"",id,[],<>]`,
- wantOutIndent: "arr: [\n\ttrue,\n\t0,\n\t\"\",\n\tid,\n\t[],\n\t{}\n]\n",
- }, {
- in: S(`arr:[%strue%s,%s0%s,%s""%s,%sid%s,%s[]%s,%s{}%s]`,
- space, space, space, space, space, space, space, space, space, space, space, space),
- wantVal: V(Msg{{ID("arr"), V(Lst{
- V(true), V(uint32(0)), V(""), ID("id"), V(Lst{}), V(Msg{}),
- })}}),
- }, {
- in: `arr:[`,
- wantErr: `unexpected EOF`,
- }, {
- in: `{`,
- wantErr: `invalid "{" as identifier`,
- }, {
- in: `<`,
- wantErr: `invalid "<" as identifier`,
- }, {
- in: `[`,
- wantErr: "unexpected EOF",
- }, {
- in: `}`,
- wantErr: "1 bytes of unconsumed input",
- }, {
- in: `>`,
- wantErr: "1 bytes of unconsumed input",
- }, {
- in: `]`,
- wantErr: `invalid "]" as identifier`,
- }, {
- in: `str: "'"`,
- wantVal: V(Msg{{ID("str"), V(`'`)}}),
- wantOut: `str:"'"`,
- }, {
- in: `str: '"'`,
- wantVal: V(Msg{{ID("str"), V(`"`)}}),
- wantOut: `str:"\""`,
- }, {
- // String that has as few escaped characters as possible.
- in: `str: ` + func() string {
- var b []byte
- for i := 0; i < utf8.RuneSelf; i++ {
- switch i {
- case 0, '\\', '\n', '\'': // these must be escaped, so ignore them
- default:
- b = append(b, byte(i))
- }
- }
- return "'" + string(b) + "'"
- }(),
- wantVal: V(Msg{{ID("str"), V("\x01\x02\x03\x04\x05\x06\a\b\t\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~\u007f")}}),
- wantOut: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
- wantOutASCII: `str:"\x01\x02\x03\x04\x05\x06\x07\x08\t\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_` + "`abcdefghijklmnopqrstuvwxyz{|}~\x7f\"",
- }, {
- in: "str: '\xde\xad\xbe\xef'",
- wantErr: "invalid UTF-8 detected",
- }, {
- // Valid UTF-8 wire encoding, but sub-optimal encoding.
- in: "str: '\xc0\x80'",
- wantErr: "invalid UTF-8 detected",
- }, {
- // Valid UTF-8 wire encoding, but invalid rune (surrogate pair).
- in: "str: '\xed\xa0\x80'",
- wantErr: "invalid UTF-8 detected",
- }, {
- // Valid UTF-8 wire encoding, but invalid rune (above max rune).
- in: "str: '\xf7\xbf\xbf\xbf'",
- wantErr: "invalid UTF-8 detected",
- }, {
- // Valid UTF-8 wire encoding of the RuneError rune.
- in: "str: '\xef\xbf\xbd'",
- wantVal: V(Msg{{ID("str"), V(string(utf8.RuneError))}}),
- wantOut: `str:"` + string(utf8.RuneError) + `"`,
- wantOutASCII: `str:"\ufffd"`,
- }, {
- in: "str: 'hello\u1234world'",
- wantVal: V(Msg{{ID("str"), V("hello\u1234world")}}),
- wantOut: "str:\"hello\u1234world\"",
- wantOutASCII: `str:"hello\u1234world"`,
- }, {
- in: `str: '\"\'\\\?\a\b\n\r\t\v\f\1\12\123\xA\xaB\x12\uAb8f\U0010FFFF'`,
- wantVal: V(Msg{{ID("str"), V("\"'\\?\a\b\n\r\t\v\f\x01\nS\n\xab\x12\uab8f\U0010ffff")}}),
- wantOut: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12` + "\uab8f\U0010ffff" + `"`,
- wantOutASCII: `str:"\"'\\?\x07\x08\n\r\t\x0b\x0c\x01\nS\n\xab\x12\uab8f\U0010ffff"`,
- }, {
- in: `str: '`,
- wantErr: `unexpected EOF`,
- }, {
- in: `str: '\`,
- wantErr: `unexpected EOF`,
- }, {
- in: `str: '\'`,
- wantErr: `unexpected EOF`,
- }, {
- in: `str: '\8'`,
- wantErr: `invalid escape code "\\8" in string`,
- }, {
- in: `str: '\1x'`,
- wantVal: V(Msg{{ID("str"), V("\001x")}}),
- wantOut: `str:"\x01x"`,
- wantOutASCII: `str:"\x01x"`,
- }, {
- in: `str: '\12x'`,
- wantVal: V(Msg{{ID("str"), V("\012x")}}),
- wantOut: `str:"\nx"`,
- wantOutASCII: `str:"\nx"`,
- }, {
- in: `str: '\123x'`,
- wantVal: V(Msg{{ID("str"), V("\123x")}}),
- wantOut: `str:"Sx"`,
- wantOutASCII: `str:"Sx"`,
- }, {
- in: `str: '\1234x'`,
- wantVal: V(Msg{{ID("str"), V("\1234x")}}),
- wantOut: `str:"S4x"`,
- wantOutASCII: `str:"S4x"`,
- }, {
- in: `str: '\1'`,
- wantVal: V(Msg{{ID("str"), V("\001")}}),
- wantOut: `str:"\x01"`,
- wantOutASCII: `str:"\x01"`,
- }, {
- in: `str: '\12'`,
- wantVal: V(Msg{{ID("str"), V("\012")}}),
- wantOut: `str:"\n"`,
- wantOutASCII: `str:"\n"`,
- }, {
- in: `str: '\123'`,
- wantVal: V(Msg{{ID("str"), V("\123")}}),
- wantOut: `str:"S"`,
- wantOutASCII: `str:"S"`,
- }, {
- in: `str: '\1234'`,
- wantVal: V(Msg{{ID("str"), V("\1234")}}),
- wantOut: `str:"S4"`,
- wantOutASCII: `str:"S4"`,
- }, {
- in: `str: '\377'`,
- wantVal: V(Msg{{ID("str"), V("\377")}}),
- wantOut: `str:"\xff"`,
- wantOutASCII: `str:"\xff"`,
- }, {
- // Overflow octal escape.
- in: `str: '\400'`,
- wantErr: `invalid octal escape code "\\400" in string`,
- }, {
- in: `str: '\xfx'`,
- wantVal: V(Msg{{ID("str"), V("\x0fx")}}),
- wantOut: `str:"\x0fx"`,
- wantOutASCII: `str:"\x0fx"`,
- }, {
- in: `str: '\xffx'`,
- wantVal: V(Msg{{ID("str"), V("\xffx")}}),
- wantOut: `str:"\xffx"`,
- wantOutASCII: `str:"\xffx"`,
- }, {
- in: `str: '\xfffx'`,
- wantVal: V(Msg{{ID("str"), V("\xfffx")}}),
- wantOut: `str:"\xfffx"`,
- wantOutASCII: `str:"\xfffx"`,
- }, {
- in: `str: '\xf'`,
- wantVal: V(Msg{{ID("str"), V("\x0f")}}),
- wantOut: `str:"\x0f"`,
- wantOutASCII: `str:"\x0f"`,
- }, {
- in: `str: '\xff'`,
- wantVal: V(Msg{{ID("str"), V("\xff")}}),
- wantOut: `str:"\xff"`,
- wantOutASCII: `str:"\xff"`,
- }, {
- in: `str: '\xfff'`,
- wantVal: V(Msg{{ID("str"), V("\xfff")}}),
- wantOut: `str:"\xfff"`,
- wantOutASCII: `str:"\xfff"`,
- }, {
- in: `str: '\xz'`,
- wantErr: `invalid hex escape code "\\x" in string`,
- }, {
- in: `str: '\uPo'`,
- wantErr: `unexpected EOF`,
- }, {
- in: `str: '\uPoo'`,
- wantErr: `invalid Unicode escape code "\\uPoo'" in string`,
- }, {
- in: `str: '\uPoop'`,
- wantErr: `invalid Unicode escape code "\\uPoop" in string`,
- }, {
- // Unmatched surrogate pair.
- in: `str: '\uDEAD'`,
- wantErr: `unexpected EOF`, // trying to reader other half
- }, {
- // Surrogate pair with invalid other half.
- in: `str: '\uDEAD\u0000'`,
- wantErr: `invalid Unicode escape code "\\u0000" in string`,
- }, {
- // Properly matched surrogate pair.
- in: `str: '\uD800\uDEAD'`,
- wantVal: V(Msg{{ID("str"), V("𐊭")}}),
- wantOut: `str:"𐊭"`,
- wantOutASCII: `str:"\U000102ad"`,
- }, {
- // Overflow on Unicode rune.
- in: `str: '\U00110000'`,
- wantErr: `invalid Unicode escape code "\\U00110000" in string`,
- }, {
- in: `str: '\z'`,
- wantErr: `invalid escape code "\\z" in string`,
- }, {
- // Strings cannot have NUL literal since C-style strings forbid them.
- in: "str: '\x00'",
- wantErr: `invalid character '\x00' in string`,
- }, {
- // Strings cannot have newline literal. The C++ permits them if an
- // option is specified to allow them. In Go, we always forbid them.
- in: "str: '\n'",
- wantErr: `invalid character '\n' in string`,
- }, {
- in: "name: \"My name is \"\n\"elsewhere\"",
- wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
- wantOut: `name:"My name is elsewhere"`,
- wantOutASCII: `name:"My name is elsewhere"`,
- }, {
- in: "name: 'My name is '\n'elsewhere'",
- wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
- }, {
- in: "name: 'My name is '\n\"elsewhere\"",
- wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
- }, {
- in: "name: \"My name is \"\n'elsewhere'",
- wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
- }, {
- in: "name: \"My \"'name '\"is \"\n'elsewhere'",
- wantVal: V(Msg{{ID("name"), V("My name is elsewhere")}}),
- }, {
- in: `crazy:"x'"'\""\''"'z"`,
- wantVal: V(Msg{{ID("crazy"), V(`x'""''z`)}}),
- }, {
- in: `nums: [t,T,true,True,TRUE,f,F,false,False,FALSE]`,
- wantVal: V(Msg{{ID("nums"), V(Lst{
- V(true),
- ID("T"),
- V(true),
- V(true),
- ID("TRUE"),
- V(false),
- ID("F"),
- V(false),
- V(false),
- ID("FALSE"),
- })}}),
- wantOut: "nums:[true,T,true,true,TRUE,false,F,false,false,FALSE]",
- wantOutIndent: "nums: [\n\ttrue,\n\tT,\n\ttrue,\n\ttrue,\n\tTRUE,\n\tfalse,\n\tF,\n\tfalse,\n\tfalse,\n\tFALSE\n]\n",
- }, {
- in: `nums: [nan,inf,-inf,NaN,NAN,Inf,INF]`,
- wantVal: V(Msg{{ID("nums"), V(Lst{
- V(math.NaN()),
- V(math.Inf(+1)),
- V(math.Inf(-1)),
- ID("NaN"),
- ID("NAN"),
- ID("Inf"),
- ID("INF"),
- })}}),
- wantOut: "nums:[nan,inf,-inf,NaN,NAN,Inf,INF]",
- wantOutIndent: "nums: [\n\tnan,\n\tinf,\n\t-inf,\n\tNaN,\n\tNAN,\n\tInf,\n\tINF\n]\n",
- }, {
- // C++ permits this, but we currently reject this.
- in: `num: -nan`,
- wantErr: `invalid "-nan" as number or bool`,
- }, {
- in: `nums: [0,-0,-9876543210,9876543210,0x0,0x0123456789abcdef,-0x0123456789abcdef,01234567,-01234567]`,
- wantVal: V(Msg{{ID("nums"), V(Lst{
- V(uint32(0)),
- V(int32(-0)),
- V(int64(-9876543210)),
- V(uint64(9876543210)),
- V(uint32(0x0)),
- V(uint64(0x0123456789abcdef)),
- V(int64(-0x0123456789abcdef)),
- V(uint64(01234567)),
- V(int64(-01234567)),
- })}}),
- wantOut: "nums:[0,0,-9876543210,9876543210,0,81985529216486895,-81985529216486895,342391,-342391]",
- wantOutIndent: "nums: [\n\t0,\n\t0,\n\t-9876543210,\n\t9876543210,\n\t0,\n\t81985529216486895,\n\t-81985529216486895,\n\t342391,\n\t-342391\n]\n",
- }, {
- in: `nums: [0.,0f,1f,10f,-0f,-1f,-10f,1.0,0.1e-3,1.5e+5,1e10,.0]`,
- wantVal: V(Msg{{ID("nums"), V(Lst{
- V(0.0),
- V(0.0),
- V(1.0),
- V(10.0),
- V(-0.0),
- V(-1.0),
- V(-10.0),
- V(1.0),
- V(0.1e-3),
- V(1.5e+5),
- V(1.0e+10),
- V(0.0),
- })}}),
- wantOut: "nums:[0,0,1,10,0,-1,-10,1,0.0001,150000,1e+10,0]",
- wantOutIndent: "nums: [\n\t0,\n\t0,\n\t1,\n\t10,\n\t0,\n\t-1,\n\t-10,\n\t1,\n\t0.0001,\n\t150000,\n\t1e+10,\n\t0\n]\n",
- }, {
- in: `nums: [0xbeefbeef,0xbeefbeefbeefbeef]`,
- wantVal: V(Msg{{ID("nums"), func() Value {
- if flags.ProtoLegacy {
- return V(Lst{V(int32(-1091584273)), V(int64(-4688318750159552785))})
- } else {
- return V(Lst{V(uint32(0xbeefbeef)), V(uint64(0xbeefbeefbeefbeef))})
- }
- }()}}),
- }, {
- in: `num: +0`,
- wantErr: `invalid "+0" as number or bool`,
- }, {
- in: `num: 01.1234`,
- wantErr: `invalid "01.1234" as number or bool`,
- }, {
- in: `num: 0x`,
- wantErr: `invalid "0x" as number or bool`,
- }, {
- in: `num: 0xX`,
- wantErr: `invalid "0xX" as number or bool`,
- }, {
- in: `num: 0800`,
- wantErr: `invalid "0800" as number or bool`,
- }, {
- in: `num: true.`,
- wantErr: `invalid "true." as number or bool`,
- }, {
- in: `num: .`,
- wantErr: `parsing ".": invalid syntax`,
- }, {
- in: `num: -.`,
- wantErr: `parsing "-.": invalid syntax`,
- }, {
- in: `num: 1e10000`,
- wantErr: `parsing "1e10000": value out of range`,
- }, {
- in: `num: 99999999999999999999`,
- wantErr: `parsing "99999999999999999999": value out of range`,
- }, {
- in: `num: -99999999999999999999`,
- wantErr: `parsing "-99999999999999999999": value out of range`,
- }, {
- in: "x: -",
- wantErr: `syntax error (line 1:5)`,
- }, {
- in: "x:[\"💩\"x",
- wantErr: `syntax error (line 1:7)`,
- }, {
- in: "x:\n\n[\"🔥🔥🔥\"x",
- wantErr: `syntax error (line 3:7)`,
- }, {
- in: "x:[\"👍🏻👍🏿\"x",
- wantErr: `syntax error (line 1:10)`, // multi-rune emojis; could be column:8
- }, {
- in: `
- firstName : "John",
- lastName : "Smith" ,
- isAlive : true,
- age : 27,
- address { # missing colon is okay for messages
- streetAddress : "21 2nd Street" ,
- city : "New York" ,
- state : "NY" ,
- postalCode : "10021-3100" ; # trailing semicolon is okay
- },
- phoneNumbers : [ {
- type : "home" ,
- number : "212 555-1234"
- } , {
- type : "office" ,
- number : "646 555-4567"
- } , {
- type : "mobile" ,
- number : "123 456-7890" , # trailing comma is okay
- } ],
- children : [] ,
- spouse : null`,
- wantVal: V(Msg{
- {ID("firstName"), V("John")},
- {ID("lastName"), V("Smith")},
- {ID("isAlive"), V(true)},
- {ID("age"), V(27.0)},
- {ID("address"), V(Msg{
- {ID("streetAddress"), V("21 2nd Street")},
- {ID("city"), V("New York")},
- {ID("state"), V("NY")},
- {ID("postalCode"), V("10021-3100")},
- })},
- {ID("phoneNumbers"), V([]Value{
- V(Msg{
- {ID("type"), V("home")},
- {ID("number"), V("212 555-1234")},
- }),
- V(Msg{
- {ID("type"), V("office")},
- {ID("number"), V("646 555-4567")},
- }),
- V(Msg{
- {ID("type"), V("mobile")},
- {ID("number"), V("123 456-7890")},
- }),
- })},
- {ID("children"), V([]Value{})},
- {ID("spouse"), V(protoreflect.Name("null"))},
- }),
- wantOut: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:{streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"} phoneNumbers:[{type:"home" number:"212 555-1234"},{type:"office" number:"646 555-4567"},{type:"mobile" number:"123 456-7890"}] children:[] spouse:null`,
- wantOutBracket: `firstName:"John" lastName:"Smith" isAlive:true age:27 address:<streetAddress:"21 2nd Street" city:"New York" state:"NY" postalCode:"10021-3100"> phoneNumbers:[<type:"home" number:"212 555-1234">,<type:"office" number:"646 555-4567">,<type:"mobile" number:"123 456-7890">] children:[] spouse:null`,
- wantOutIndent: `firstName: "John"
-lastName: "Smith"
-isAlive: true
-age: 27
-address: {
- streetAddress: "21 2nd Street"
- city: "New York"
- state: "NY"
- postalCode: "10021-3100"
-}
-phoneNumbers: [
- {
- type: "home"
- number: "212 555-1234"
- },
- {
- type: "office"
- number: "646 555-4567"
- },
- {
- type: "mobile"
- number: "123 456-7890"
- }
-]
-children: []
-spouse: null
-`,
- }}
-
- opts := cmp.Options{
- cmpopts.EquateEmpty(),
-
- // Transform composites (List and Message).
- cmp.FilterValues(func(x, y Value) bool {
- return (x.Type() == List && y.Type() == List) || (x.Type() == Message && y.Type() == Message)
- }, cmp.Transformer("", func(v Value) interface{} {
- if v.Type() == List {
- return v.List()
- } else {
- return v.Message()
- }
- })),
-
- // Compare scalars (Bool, Int, Uint, Float, String, Name).
- cmp.FilterValues(func(x, y Value) bool {
- return !(x.Type() == List && y.Type() == List) && !(x.Type() == Message && y.Type() == Message)
- }, cmp.Comparer(func(x, y Value) bool {
- if x.Type() == List || x.Type() == Message || y.Type() == List || y.Type() == Message {
- return false
- }
- // Ensure golden value is always in x variable.
- if len(x.raw) > 0 {
- x, y = y, x
- }
- switch x.Type() {
- case Bool:
- want, _ := x.Bool()
- got, ok := y.Bool()
- return got == want && ok
- case Int:
- want, _ := x.Int(true)
- got, ok := y.Int(want < math.MinInt32 || math.MaxInt32 < want)
- return got == want && ok
- case Uint:
- want, _ := x.Uint(true)
- got, ok := y.Uint(math.MaxUint32 < want)
- return got == want && ok
- case Float32, Float64:
- want, _ := x.Float(true)
- got, ok := y.Float(math.MaxFloat32 < math.Abs(want))
- if math.IsNaN(got) || math.IsNaN(want) {
- return math.IsNaN(got) == math.IsNaN(want)
- }
- return got == want && ok
- case Name:
- want, _ := x.Name()
- got, ok := y.Name()
- return got == want && ok
- default:
- return x.String() == y.String()
- }
- })),
- }
- for _, tt := range tests {
- t.Run("", func(t *testing.T) {
- if tt.in != "" || tt.wantVal.Type() != 0 || tt.wantErr != "" {
- gotVal, err := Unmarshal([]byte(tt.in))
- if err == nil {
- if tt.wantErr != "" {
- t.Errorf("Unmarshal(): got nil error, want %v", tt.wantErr)
- }
- } else {
- if tt.wantErr == "" {
- t.Errorf("Unmarshal(): got %v, want nil error", err)
- } else if !strings.Contains(err.Error(), tt.wantErr) {
- t.Errorf("Unmarshal(): got %v, want %v", err, tt.wantErr)
- }
- }
- if diff := cmp.Diff(gotVal, tt.wantVal, opts); diff != "" {
- t.Errorf("Unmarshal(): output mismatch (-got +want):\n%s", diff)
- }
- }
- if tt.wantOut != "" {
- gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, false)
- if err != nil {
- t.Errorf("Marshal(): got %v, want nil error", err)
- }
- if string(gotOut) != tt.wantOut {
- t.Errorf("Marshal():\ngot: %s\nwant: %s", gotOut, tt.wantOut)
- }
- }
- if tt.wantOutBracket != "" {
- gotOut, err := Marshal(tt.wantVal, "", [2]byte{'<', '>'}, false)
- if err != nil {
- t.Errorf("Marshal(Bracket): got %v, want nil error", err)
- }
- if string(gotOut) != tt.wantOutBracket {
- t.Errorf("Marshal(Bracket):\ngot: %s\nwant: %s", gotOut, tt.wantOutBracket)
- }
- }
- if tt.wantOutASCII != "" {
- gotOut, err := Marshal(tt.wantVal, "", [2]byte{0, 0}, true)
- if err != nil {
- t.Errorf("Marshal(ASCII): got %v, want nil error", err)
- }
- if string(gotOut) != tt.wantOutASCII {
- t.Errorf("Marshal(ASCII):\ngot: %s\nwant: %s", gotOut, tt.wantOutASCII)
- }
- }
- if tt.wantOutIndent != "" {
- gotOut, err := Marshal(tt.wantVal, "\t", [2]byte{0, 0}, false)
- if err != nil {
- t.Errorf("Marshal(Indent): got %v, want nil error", err)
- }
- if string(gotOut) != tt.wantOutIndent {
- t.Errorf("Marshal(Indent):\ngot: %s\nwant: %s", gotOut, tt.wantOutIndent)
- }
- }
- })
- }
-}
diff --git a/internal/encoding/text/value.go b/internal/encoding/text/value.go
deleted file mode 100644
index f98dbcb..0000000
--- a/internal/encoding/text/value.go
+++ /dev/null
@@ -1,334 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package text implements the text format for protocol buffers.
-// This package has no semantic understanding for protocol buffers and is only
-// a parser and composer for the format.
-//
-// There is no formal specification for the protobuf text format, as such the
-// C++ implementation (see google::protobuf::TextFormat) is the reference
-// implementation of the text format.
-//
-// This package is neither a superset nor a subset of the C++ implementation.
-// This implementation permits a more liberal grammar in some cases to be
-// backwards compatible with the historical Go implementation.
-// Future parsings unique to Go should not be added.
-// Some grammars allowed by the C++ implementation are deliberately
-// not implemented here because they are considered a bug by the protobuf team
-// and should not be replicated.
-//
-// The Go implementation should implement a sufficient amount of the C++
-// grammar such that the default text serialization by C++ can be parsed by Go.
-// However, just because the C++ parser accepts some input does not mean that
-// the Go implementation should as well.
-//
-// The text format is almost a superset of JSON except:
-// * message keys are not quoted strings, but identifiers
-// * the top-level value must be a message without the delimiters
-package text
-
-import (
- "fmt"
- "math"
- "strings"
-
- "google.golang.org/protobuf/internal/flags"
- "google.golang.org/protobuf/reflect/protoreflect"
-)
-
-// Type represents a type expressible in the text format.
-type Type uint8
-
-const (
- _ Type = iota
-
- // Bool is a boolean (e.g., "true" or "false").
- Bool
- // Int is a signed integer (e.g., "-1423").
- Int
- // Uint is an unsigned integer (e.g., "0xdeadbeef").
- Uint
- // Float32 is a 32-bit floating-point number (e.g., "1.234" or "1e38").
- // This allows encoding to differentiate the bitsize used for formatting.
- Float32
- // Float64 is a 64-bit floating-point number.
- Float64
- // String is a quoted string (e.g., `"the quick brown fox"`).
- String
- // Name is a protocol buffer identifier (e.g., `field_name`).
- Name
- // List is an ordered list of values (e.g., `[0, "one", true]`).
- List
- // Message is an ordered map of values (e.g., `{"key": null}`).
- Message
-)
-
-func (t Type) String() string {
- switch t {
- case Bool:
- return "bool"
- case Int:
- return "int"
- case Uint:
- return "uint"
- case Float32:
- return "float32"
- case Float64:
- return "float64"
- case String:
- return "string"
- case Name:
- return "name"
- case List:
- return "list"
- case Message:
- return "message"
- default:
- return "<invalid>"
- }
-}
-
-// Value contains a value of a given Type.
-type Value struct {
- typ Type
- raw []byte // raw bytes of the serialized data
- str string // only for String or Name
- num uint64 // only for Bool, Int, Uint, Float32, or Float64
- arr []Value // only for List
- obj [][2]Value // only for Message
-}
-
-// ValueOf returns a Value for a given Go value:
-// bool => Bool
-// int32, int64 => Int
-// uint32, uint64 => Uint
-// float32 => Float32
-// float64 => Float64
-// string, []byte => String
-// protoreflect.Name => Name
-// []Value => List
-// [][2]Value => Message
-//
-// ValueOf panics if the Go type is not one of the above.
-func ValueOf(v interface{}) Value {
- switch v := v.(type) {
- case bool:
- if v {
- return Value{typ: Bool, num: 1}
- } else {
- return Value{typ: Bool, num: 0}
- }
- case int32:
- return Value{typ: Int, num: uint64(v)}
- case int64:
- return Value{typ: Int, num: uint64(v)}
- case uint32:
- return Value{typ: Uint, num: uint64(v)}
- case uint64:
- return Value{typ: Uint, num: uint64(v)}
- case float32:
- // Store as float64 bits.
- return Value{typ: Float32, num: math.Float64bits(float64(v))}
- case float64:
- return Value{typ: Float64, num: math.Float64bits(float64(v))}
- case string:
- return Value{typ: String, str: string(v)}
- case []byte:
- return Value{typ: String, str: string(v)}
- case protoreflect.Name:
- return Value{typ: Name, str: string(v)}
- case []Value:
- return Value{typ: List, arr: v}
- case [][2]Value:
- return Value{typ: Message, obj: v}
- default:
- panic(fmt.Sprintf("invalid type %T", v))
- }
-}
-func rawValueOf(v interface{}, raw []byte) Value {
- v2 := ValueOf(v)
- v2.raw = raw
- return v2
-}
-
-// Type is the type of the value. When parsing, this is a best-effort guess
-// at the resulting type. However, there are ambiguities as to the exact type
-// of the value (e.g., "false" is either a bool or a name).
-// Thus, some of the types are convertible with each other.
-// The Bool, Int, Uint, Float32, Float64, and Name methods return a boolean to
-// report whether the conversion was successful.
-func (v Value) Type() Type {
- return v.typ
-}
-
-// Bool returns v as a bool and reports whether the conversion succeeded.
-func (v Value) Bool() (x bool, ok bool) {
- switch v.typ {
- case Bool:
- return v.num > 0, true
- case Uint, Int:
- // C++ allows a 1-bit unsigned integer (e.g., "0", "1", or "0x1").
- if len(v.raw) > 0 && v.raw[0] != '-' && v.num < 2 {
- return v.num > 0, true
- }
- }
- return false, false
-}
-
-// Int returns v as an int64 of the specified precision and reports whether
-// the conversion succeeded.
-func (v Value) Int(b64 bool) (x int64, ok bool) {
- switch v.typ {
- case Int:
- n := int64(v.num)
- if b64 || (math.MinInt32 <= n && n <= math.MaxInt32) {
- return int64(n), true
- }
- case Uint:
- n := uint64(v.num)
- if (!b64 && n <= math.MaxInt32) || (b64 && n <= math.MaxInt64) {
- return int64(n), true
- }
- // C++ accepts large positive hex numbers as negative values.
- // This feature is here for proto1 backwards compatibility purposes.
- if flags.ProtoLegacy && len(v.raw) > 1 && v.raw[0] == '0' && v.raw[1] == 'x' {
- if !b64 {
- return int64(int32(n)), n <= math.MaxUint32
- }
- // if !b64 && n <= math.MaxUint32 {
- // return int64(int32(n)), true
- // }
- return int64(n), true
- }
- }
- return 0, false
-}
-
-// Uint returns v as an uint64 of the specified precision and reports whether
-// the conversion succeeded.
-func (v Value) Uint(b64 bool) (x uint64, ok bool) {
- switch v.typ {
- case Int:
- n := int64(v.num)
- if len(v.raw) > 0 && v.raw[0] != '-' && (b64 || n <= math.MaxUint32) {
- return uint64(n), true
- }
- case Uint:
- n := uint64(v.num)
- if b64 || n <= math.MaxUint32 {
- return uint64(n), true
- }
- }
- return 0, false
-}
-
-// Float returns v as a float64 of the specified precision and reports whether
-// the conversion succeeded.
-func (v Value) Float(b64 bool) (x float64, ok bool) {
- switch v.typ {
- case Int:
- return float64(int64(v.num)), true // possibly lossy, but allowed
- case Uint:
- return float64(uint64(v.num)), true // possibly lossy, but allowed
- case Float32, Float64:
- n := math.Float64frombits(v.num)
- if math.IsNaN(n) || math.IsInf(n, 0) {
- return float64(n), true
- }
- if b64 || math.Abs(n) <= math.MaxFloat32 {
- return float64(n), true
- }
- }
- return 0, false
-}
-
-// String returns v as a string if the Type is String.
-// Otherwise, this returns a formatted string of v for debugging purposes.
-//
-// Since String is used to represent both text and binary, it is not validated
-// to contain valid UTF-8. When using this value with the string type in proto,
-// it is the user's responsibility perform additional UTF-8 validation.
-func (v Value) String() string {
- if v.typ != String {
- return v.stringValue()
- }
- return v.str
-}
-func (v Value) stringValue() string {
- switch v.typ {
- case Bool, Int, Uint, Float32, Float64, Name:
- return string(v.Raw())
- case List:
- var ss []string
- for _, v := range v.List() {
- ss = append(ss, v.String())
- }
- return "[" + strings.Join(ss, ",") + "]"
- case Message:
- var ss []string
- for _, v := range v.Message() {
- k := v[0].String()
- if v[0].Type() == String {
- k = "[" + k + "]"
- }
- ss = append(ss, k+":"+v[1].String())
- }
- return "{" + strings.Join(ss, ",") + "}"
- default:
- return "<invalid>"
- }
-}
-
-// Name returns the field name or enum value name and reports whether the value
-// can be treated as an identifier.
-func (v Value) Name() (protoreflect.Name, bool) {
- switch v.typ {
- case Bool, Float32, Float64:
- // Ambiguity arises in unmarshalValue since "nan" may interpreted as
- // either a Name type (for enum values) or a Float32/Float64 type.
- // Similarly, "true" may be interpreted as either a Name or Bool type.
- n := protoreflect.Name(v.raw)
- if n.IsValid() {
- return n, true
- }
- case Name:
- return protoreflect.Name(v.str), true
- }
- return "", false
-}
-
-// List returns the elements of v and panics if the Type is not List.
-// Mutations on the return value may not be observable from the Raw method.
-func (v Value) List() []Value {
- if v.typ != List {
- panic("value is not a list")
- }
- return v.arr
-}
-
-// Message returns the items of v and panics if the Type is not Message.
-// The [2]Value represents a key and value pair, where the key is either
-// a Name (representing a field name), a String (representing extension field
-// names or the Any type URL), or an Uint for unknown fields.
-//
-// Mutations on the return value may not be observable from the Raw method.
-func (v Value) Message() [][2]Value {
- if v.typ != Message {
- panic("value is not a message")
- }
- return v.obj
-}
-
-// Raw returns the raw representation of the value.
-// The returned value may alias the input given to Unmarshal.
-func (v Value) Raw() []byte {
- if len(v.raw) > 0 {
- return v.raw
- }
- p := encoder{}
- if err := p.marshalValue(v); err != nil {
- return []byte("<invalid>")
- }
- return p.out
-}