internal/encoding/{json,text}: improve string parsing

Previous calls to indexNeedEscape with a type conversion from []byte
to string incurs allocation.

Make 2 different calls instead, one for string and one for bytes.

Type converting string to []byte does not incur extra allocation,
however, the benchmark results still show it to be slower by ~3% for
textpb and 6+% for jsonpb, hence decided to go with 2 separate calls
instead.

Results over current head:
name          old time/op    new time/op    delta
TextEncode-4    18.1ms ± 2%    18.3ms ± 2%     ~     (p=0.065 n=10+9)
TextDecode-4     233ms ± 3%     102ms ± 1%  -56.34%  (p=0.000 n=9+10)
JSONEncode-4    10.4ms ± 2%    10.5ms ± 0%   +0.56%  (p=0.019 n=9+9)
JSONDecode-4     870ms ± 2%     354ms ± 4%  -59.33%  (p=0.000 n=9+10)

name          old alloc/op   new alloc/op   delta
TextEncode-4    28.9MB ± 0%    28.9MB ± 0%   +0.00%  (p=0.000 n=10+9)
TextDecode-4    1.16GB ± 0%    0.03GB ± 0%  -97.44%  (p=0.000 n=9+10)
JSONEncode-4    3.94MB ± 0%    3.94MB ± 0%   +0.00%  (p=0.000 n=10+10)
JSONDecode-4    3.35GB ± 0%    0.01GB ± 0%  -99.83%  (p=0.000 n=10+10)

name          old allocs/op  new allocs/op  delta
TextEncode-4     73.5k ± 0%     73.5k ± 0%     ~     (all equal)
TextDecode-4      278k ± 0%      255k ± 0%   -8.26%  (p=0.000 n=9+10)
JSONEncode-4     63.8k ± 0%     63.8k ± 0%     ~     (all equal)
JSONDecode-4      247k ± 0%      210k ± 0%  -14.92%  (p=0.000 n=10+10)

Change-Id: Ibc64e9a7827ec1fffa213eb79f60497950203700
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/172239
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/internal/encoding/json/string.go b/internal/encoding/json/string.go
index fbfb09f..7f70cff 100644
--- a/internal/encoding/json/string.go
+++ b/internal/encoding/json/string.go
@@ -18,7 +18,7 @@
 func appendString(out []byte, in string) ([]byte, error) {
 	var nerr errors.NonFatal
 	out = append(out, '"')
-	i := indexNeedEscape(in)
+	i := indexNeedEscapeInString(in)
 	in, out = in[i:], append(out, in[:i]...)
 	for len(in) > 0 {
 		switch r, n := utf8.DecodeRuneInString(in); {
@@ -47,7 +47,7 @@
 			}
 			in = in[n:]
 		default:
-			i := indexNeedEscape(in[n:])
+			i := indexNeedEscapeInString(in[n:])
 			in, out = in[n+i:], append(out, in[:n+i]...)
 		}
 	}
@@ -65,7 +65,7 @@
 		return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
 	}
 	in = in[1:]
-	i := indexNeedEscape(string(in))
+	i := indexNeedEscapeInBytes(in)
 	in, out := in[i:], in[:i:i] // set cap to prevent mutations
 	for len(in) > 0 {
 		switch r, n := utf8.DecodeRune(in); {
@@ -123,16 +123,16 @@
 				return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
 			}
 		default:
-			i := indexNeedEscape(string(in[n:]))
+			i := indexNeedEscapeInBytes(in[n:])
 			in, out = in[n+i:], append(out, in[:n+i]...)
 		}
 	}
 	return "", 0, io.ErrUnexpectedEOF
 }
 
-// indexNeedEscape returns the index of the next character that needs escaping.
-// If no characters need escaping, this returns the input length.
-func indexNeedEscape(s string) int {
+// indexNeedEscapeInString returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+func indexNeedEscapeInString(s string) int {
 	for i, r := range s {
 		if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
 			return i
@@ -140,3 +140,18 @@
 	}
 	return len(s)
 }
+
+// indexNeedEscapeInBytes returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
+// resolved.
+func indexNeedEscapeInBytes(b []byte) int {
+	for i := 0; i < len(b); {
+		r, n := utf8.DecodeRune(b[i:])
+		if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
+			return i
+		}
+		i += n
+	}
+	return len(b)
+}
diff --git a/internal/encoding/text/string.go b/internal/encoding/text/string.go
index 0274295..6a0b730 100644
--- a/internal/encoding/text/string.go
+++ b/internal/encoding/text/string.go
@@ -33,7 +33,7 @@
 	in := v.String()
 
 	out = append(out, '"')
-	i := indexNeedEscape(in)
+	i := indexNeedEscapeInString(in)
 	in, out = in[i:], append(out, in[:i]...)
 	for len(in) > 0 {
 		switch r, n := utf8.DecodeRuneInString(in); {
@@ -72,7 +72,7 @@
 			}
 			in = in[n:]
 		default:
-			i := indexNeedEscape(in[n:])
+			i := indexNeedEscapeInString(in[n:])
 			in, out = in[n+i:], append(out, in[:n+i]...)
 		}
 	}
@@ -96,7 +96,7 @@
 		return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
 	}
 	in = in[1:]
-	i := indexNeedEscape(string(in))
+	i := indexNeedEscapeInBytes(in)
 	in, out := in[i:], in[:i:i] // set cap to prevent mutations
 	for len(in) > 0 {
 		switch r, n := utf8.DecodeRune(in); {
@@ -185,7 +185,7 @@
 				return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
 			}
 		default:
-			i := indexNeedEscape(string(in[n:]))
+			i := indexNeedEscapeInBytes(in[n:])
 			in, out = in[n+i:], append(out, in[:n+i]...)
 		}
 	}
@@ -217,9 +217,9 @@
 	return rawValueOf(strings.Join(ss, ""), b[:len(b):len(b)]), nil
 }
 
-// indexNeedEscape returns the index of the next character that needs escaping.
-// If no characters need escaping, this returns the input length.
-func indexNeedEscape(s string) int {
+// indexNeedEscapeInString returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+func indexNeedEscapeInString(s string) int {
 	for i := 0; i < len(s); i++ {
 		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
 			return i
@@ -227,3 +227,18 @@
 	}
 	return len(s)
 }
+
+// indexNeedEscapeInBytes returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
+// resolved.
+func indexNeedEscapeInBytes(b []byte) int {
+	for i := 0; i < len(b); {
+		c, size := utf8.DecodeRune(b[i:])
+		if c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
+			return i
+		}
+		i += size
+	}
+	return len(b)
+}