|  | // Copyright 2018 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | package text | 
|  |  | 
|  | import ( | 
|  | "bytes" | 
|  | "io" | 
|  | "regexp" | 
|  | "unicode/utf8" | 
|  |  | 
|  | "github.com/golang/protobuf/v2/internal/errors" | 
|  | "github.com/golang/protobuf/v2/reflect/protoreflect" | 
|  | ) | 
|  |  | 
|  | type syntaxError struct{ error } | 
|  |  | 
|  | func newSyntaxError(f string, x ...interface{}) error { | 
|  | return syntaxError{errors.New(f, x...)} | 
|  | } | 
|  |  | 
|  | // Unmarshal parses b as the proto text format. | 
|  | // It returns a Value, which is always of the Message type. | 
|  | func Unmarshal(b []byte) (Value, error) { | 
|  | p := decoder{in: b} | 
|  | p.consume(0) // trim leading spaces or comments | 
|  | v, err := p.unmarshalMessage(false) | 
|  | if !p.nerr.Merge(err) { | 
|  | if e, ok := err.(syntaxError); ok { | 
|  | b = b[:len(b)-len(p.in)] // consumed input | 
|  | line := bytes.Count(b, []byte("\n")) + 1 | 
|  | if i := bytes.LastIndexByte(b, '\n'); i >= 0 { | 
|  | b = b[i+1:] | 
|  | } | 
|  | column := utf8.RuneCount(b) + 1 // ignore multi-rune characters | 
|  | err = errors.New("syntax error (line %d:%d): %v", line, column, e.error) | 
|  | } | 
|  | return Value{}, err | 
|  | } | 
|  | if len(p.in) > 0 { | 
|  | return Value{}, errors.New("%d bytes of unconsumed input", len(p.in)) | 
|  | } | 
|  | return v, p.nerr.E | 
|  | } | 
|  |  | 
|  | type decoder struct { | 
|  | nerr errors.NonFatal | 
|  | in   []byte | 
|  | } | 
|  |  | 
|  | func (p *decoder) unmarshalList() (Value, error) { | 
|  | b := p.in | 
|  | var elems []Value | 
|  | if err := p.consumeChar('[', "at start of list"); err != nil { | 
|  | return Value{}, err | 
|  | } | 
|  | if len(p.in) > 0 && p.in[0] != ']' { | 
|  | for len(p.in) > 0 { | 
|  | v, err := p.unmarshalValue() | 
|  | if !p.nerr.Merge(err) { | 
|  | return Value{}, err | 
|  | } | 
|  | elems = append(elems, v) | 
|  | if !p.tryConsumeChar(',') { | 
|  | break | 
|  | } | 
|  | } | 
|  | } | 
|  | if err := p.consumeChar(']', "at end of list"); err != nil { | 
|  | return Value{}, err | 
|  | } | 
|  | b = b[:len(b)-len(p.in)] | 
|  | return rawValueOf(elems, b[:len(b):len(b)]), nil | 
|  | } | 
|  |  | 
|  | func (p *decoder) unmarshalMessage(checkDelims bool) (Value, error) { | 
|  | b := p.in | 
|  | var items [][2]Value | 
|  | delims := [2]byte{'{', '}'} | 
|  | if len(p.in) > 0 && p.in[0] == '<' { | 
|  | delims = [2]byte{'<', '>'} | 
|  | } | 
|  | if checkDelims { | 
|  | if err := p.consumeChar(delims[0], "at start of message"); err != nil { | 
|  | return Value{}, err | 
|  | } | 
|  | } | 
|  | for len(p.in) > 0 { | 
|  | if p.in[0] == '}' || p.in[0] == '>' { | 
|  | break | 
|  | } | 
|  | k, err := p.unmarshalKey() | 
|  | if !p.nerr.Merge(err) { | 
|  | return Value{}, err | 
|  | } | 
|  | if !p.tryConsumeChar(':') && len(p.in) > 0 && p.in[0] != '{' && p.in[0] != '<' { | 
|  | return Value{}, newSyntaxError("expected ':' after message key") | 
|  | } | 
|  | v, err := p.unmarshalValue() | 
|  | if !p.nerr.Merge(err) { | 
|  | return Value{}, err | 
|  | } | 
|  | if p.tryConsumeChar(';') || p.tryConsumeChar(',') { | 
|  | // always optional | 
|  | } | 
|  | items = append(items, [2]Value{k, v}) | 
|  | } | 
|  | if checkDelims { | 
|  | if err := p.consumeChar(delims[1], "at end of message"); err != nil { | 
|  | return Value{}, err | 
|  | } | 
|  | } | 
|  | b = b[:len(b)-len(p.in)] | 
|  | return rawValueOf(items, b[:len(b):len(b)]), nil | 
|  | } | 
|  |  | 
|  | // This expression is more liberal than ConsumeAnyTypeUrl in C++. | 
|  | // However, the C++ parser does not handle many legal URL strings. | 
|  | // The Go implementation is more liberal to be backwards compatible with | 
|  | // the historical Go implementation which was overly liberal (and buggy). | 
|  | var urlRegexp = regexp.MustCompile(`^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`) | 
|  |  | 
|  | // unmarshalKey parses the key, which may be a Name, String, or Uint. | 
|  | func (p *decoder) unmarshalKey() (v Value, err error) { | 
|  | if p.tryConsumeChar('[') { | 
|  | if len(p.in) == 0 { | 
|  | return Value{}, io.ErrUnexpectedEOF | 
|  | } | 
|  | if p.in[0] == '\'' || p.in[0] == '"' { | 
|  | // Historically, Go's parser allowed a string for the Any type URL. | 
|  | // This is specific to Go and contrary to the C++ implementation, | 
|  | // which does not support strings for the Any type URL. | 
|  | v, err = p.unmarshalString() | 
|  | if !p.nerr.Merge(err) { | 
|  | return Value{}, err | 
|  | } | 
|  | } else if n := matchWithDelim(urlRegexp, p.in); n > 0 { | 
|  | v = rawValueOf(string(p.in[:n]), p.in[:n:n]) | 
|  | p.consume(n) | 
|  | } else { | 
|  | return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in)) | 
|  | } | 
|  | if err := p.consumeChar(']', "at end of extension name"); err != nil { | 
|  | return Value{}, err | 
|  | } | 
|  | return v, nil | 
|  | } | 
|  | if matchWithDelim(intRegexp, p.in) > 0 && p.in[0] != '-' { | 
|  | return p.unmarshalNumber() | 
|  | } | 
|  | return p.unmarshalName() | 
|  | } | 
|  |  | 
|  | func (p *decoder) unmarshalValue() (Value, error) { | 
|  | if len(p.in) == 0 { | 
|  | return Value{}, io.ErrUnexpectedEOF | 
|  | } | 
|  | switch p.in[0] { | 
|  | case '"', '\'': | 
|  | return p.unmarshalStrings() | 
|  | case '[': | 
|  | return p.unmarshalList() | 
|  | case '{', '<': | 
|  | return p.unmarshalMessage(true) | 
|  | default: | 
|  | n := matchWithDelim(nameRegexp, p.in) // zero if no match | 
|  | if n > 0 && literals[string(p.in[:n])] == nil { | 
|  | return p.unmarshalName() | 
|  | } | 
|  | return p.unmarshalNumber() | 
|  | } | 
|  | } | 
|  |  | 
|  | // This expression matches all valid proto identifiers. | 
|  | var nameRegexp = regexp.MustCompile(`^[_a-zA-Z][_a-zA-Z0-9]*`) | 
|  |  | 
|  | // unmarshalName unmarshals an unquoted identifier. | 
|  | // | 
|  | // E.g., `field_name` => ValueOf(protoreflect.Name("field_name")) | 
|  | func (p *decoder) unmarshalName() (Value, error) { | 
|  | if n := matchWithDelim(nameRegexp, p.in); n > 0 { | 
|  | v := rawValueOf(protoreflect.Name(p.in[:n]), p.in[:n:n]) | 
|  | p.consume(n) | 
|  | return v, nil | 
|  | } | 
|  | return Value{}, newSyntaxError("invalid %q as identifier", errRegexp.Find(p.in)) | 
|  | } | 
|  |  | 
|  | func (p *decoder) consumeChar(c byte, msg string) error { | 
|  | if p.tryConsumeChar(c) { | 
|  | return nil | 
|  | } | 
|  | if len(p.in) == 0 { | 
|  | return io.ErrUnexpectedEOF | 
|  | } | 
|  | return newSyntaxError("invalid character %q, expected %q %s", p.in[0], c, msg) | 
|  | } | 
|  |  | 
|  | func (p *decoder) tryConsumeChar(c byte) bool { | 
|  | if len(p.in) > 0 && p.in[0] == c { | 
|  | p.consume(1) | 
|  | return true | 
|  | } | 
|  | return false | 
|  | } | 
|  |  | 
|  | // consume consumes n bytes of input and any subsequent whitespace or comments. | 
|  | func (p *decoder) consume(n int) { | 
|  | p.in = p.in[n:] | 
|  | for len(p.in) > 0 { | 
|  | switch p.in[0] { | 
|  | case ' ', '\n', '\r', '\t': | 
|  | p.in = p.in[1:] | 
|  | case '#': | 
|  | if i := bytes.IndexByte(p.in, '\n'); i >= 0 { | 
|  | p.in = p.in[i+len("\n"):] | 
|  | } else { | 
|  | p.in = nil | 
|  | } | 
|  | default: | 
|  | return | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Any sequence that looks like a non-delimiter (for error reporting). | 
|  | var errRegexp = regexp.MustCompile("^([-+._a-zA-Z0-9]{1,32}|.)") | 
|  |  | 
|  | // matchWithDelim matches r with the input b and verifies that the match | 
|  | // terminates with a delimiter of some form (e.g., r"[^-+_.a-zA-Z0-9]"). | 
|  | // As a special case, EOF is considered a delimiter. | 
|  | func matchWithDelim(r *regexp.Regexp, b []byte) int { | 
|  | n := len(r.Find(b)) | 
|  | if n < len(b) { | 
|  | // Check that that the next character is a delimiter. | 
|  | c := b[n] | 
|  | notDelim := (c == '-' || c == '+' || c == '.' || c == '_' || | 
|  | ('a' <= c && c <= 'z') || | 
|  | ('A' <= c && c <= 'Z') || | 
|  | ('0' <= c && c <= '9')) | 
|  | if notDelim { | 
|  | return 0 | 
|  | } | 
|  | } | 
|  | return n | 
|  | } |