encoding/prototext: allow whitespace and comments between minus sign and number in negative numeric literal

The text format specification[1] indicates that whitespace and comments
may appear after a minus sign and before the subsequent numeric component
in negative number literals. But the Go implementation does not allow
this.

This brings the Go implementation info conformance with this aspect.

Fixes golang/protobuf#1526

[1] https://protobuf.dev/reference/protobuf/textformat-spec/#parsing

Change-Id: I3996c89ee9d37cf2b7502fc6736d6e2ed6dbcf43
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/473015
Reviewed-by: Lasse Folger <lassefolger@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/encoding/prototext/decode_test.go b/encoding/prototext/decode_test.go
index eed64db..3597d9b 100644
--- a/encoding/prototext/decode_test.go
+++ b/encoding/prototext/decode_test.go
@@ -127,9 +127,9 @@
 opt_uint32: 0xff
 opt_uint64: 0xdeadbeef
 opt_sint32: -1001
-opt_sint64: -0xffff
+opt_sint64: -   0xffff
 opt_fixed64: 64
-opt_sfixed32: -32
+opt_sfixed32: -		32
 opt_float: 1.234
 opt_double: 1.23e+100
 opt_bytes: "\xe8\xb0\xb7\xe6\xad\x8c"
@@ -164,7 +164,8 @@
 s_uint32: 0xff
 s_uint64: 0xdeadbeef
 s_sint32: -1001
-s_sint64: -0xffff
+s_sint64: -  #
+             0xffff
 s_fixed64: 64
 s_sfixed32: -32
 s_float: 1.234
diff --git a/internal/encoding/text/decode.go b/internal/encoding/text/decode.go
index 427c62d..87853e7 100644
--- a/internal/encoding/text/decode.go
+++ b/internal/encoding/text/decode.go
@@ -412,12 +412,13 @@
 	// Field number. Identify if input is a valid number that is not negative
 	// and is decimal integer within 32-bit range.
 	if num := parseNumber(d.in); num.size > 0 {
+		str := num.string(d.in)
 		if !num.neg && num.kind == numDec {
-			if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil {
+			if _, err := strconv.ParseInt(str, 10, 32); err == nil {
 				return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil
 			}
 		}
-		return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size])
+		return Token{}, d.newSyntaxError("invalid field number: %s", str)
 	}
 
 	return Token{}, d.newSyntaxError("invalid field name: %s", errId(d.in))
diff --git a/internal/encoding/text/decode_number.go b/internal/encoding/text/decode_number.go
index 81a5d8c..3dc8e97 100644
--- a/internal/encoding/text/decode_number.go
+++ b/internal/encoding/text/decode_number.go
@@ -15,17 +15,12 @@
 	if num.neg {
 		numAttrs |= isNegative
 	}
-	strSize := num.size
-	last := num.size - 1
-	if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') {
-		strSize = last
-	}
 	tok := Token{
 		kind:     Scalar,
 		attrs:    numberValue,
 		pos:      len(d.orig) - len(d.in),
 		raw:      d.in[:num.size],
-		str:      string(d.in[:strSize]),
+		str:      num.string(d.in),
 		numAttrs: numAttrs,
 	}
 	d.consume(num.size)
@@ -46,6 +41,27 @@
 	kind uint8
 	neg  bool
 	size int
+	// if neg, this is the length of whitespace and comments between
+	// the minus sign and the rest fo the number literal
+	sep int
+}
+
+func (num number) string(data []byte) string {
+	strSize := num.size
+	last := num.size - 1
+	if num.kind == numFloat && (data[last] == 'f' || data[last] == 'F') {
+		strSize = last
+	}
+	if num.neg && num.sep > 0 {
+		// strip whitespace/comments between negative sign and the rest
+		strLen := strSize - num.sep
+		str := make([]byte, strLen)
+		str[0] = data[0]
+		copy(str[1:], data[num.sep+1:strSize])
+		return string(str)
+	}
+	return string(data[:strSize])
+
 }
 
 // parseNumber constructs a number object from given input. It allows for the
@@ -67,6 +83,7 @@
 	}
 
 	// Optional -
+	var sep int
 	if s[0] == '-' {
 		neg = true
 		s = s[1:]
@@ -74,12 +91,14 @@
 		if len(s) == 0 {
 			return number{}
 		}
+		// Consume any whitespace or comments between the
+		// negative sign and the rest of the number
+		lenBefore := len(s)
+		s = consume(s, 0)
+		sep = lenBefore - len(s)
+		size += sep
 	}
 
-	// C++ allows for whitespace and comments in between the negative sign and
-	// the rest of the number. This logic currently does not but is consistent
-	// with v1.
-
 	switch {
 	case s[0] == '0':
 		if len(s) > 1 {
@@ -116,7 +135,7 @@
 				if len(s) > 0 && !isDelim(s[0]) {
 					return number{}
 				}
-				return number{kind: kind, neg: neg, size: size}
+				return number{kind: kind, neg: neg, size: size, sep: sep}
 			}
 		}
 		s = s[1:]
@@ -188,5 +207,5 @@
 		return number{}
 	}
 
-	return number{kind: kind, neg: neg, size: size}
+	return number{kind: kind, neg: neg, size: size, sep: sep}
 }
diff --git a/internal/encoding/text/decode_test.go b/internal/encoding/text/decode_test.go
index a7dbad7..528b1bc 100644
--- a/internal/encoding/text/decode_test.go
+++ b/internal/encoding/text/decode_test.go
@@ -556,6 +556,14 @@
 			want: []R{{E: "invalid field number: -123"}},
 		},
 		{
+			in:   "- \t 123.321e6",
+			want: []R{{E: "invalid field number: -123.321e6"}},
+		},
+		{
+			in:   "- # negative\n 123",
+			want: []R{{E: "invalid field number: -123"}},
+		},
+		{
 			// Field number > math.MaxInt32.
 			in:   "2147483648:",
 			want: []R{{E: "invalid field number: 2147483648"}},