internal/encoding/{json,text}: improve string parsing
Previous calls to indexNeedEscape with a type conversion from []byte
to string incurs allocation.
Make 2 different calls instead, one for string and one for bytes.
Type converting string to []byte does not incur extra allocation,
however, the benchmark results still show it to be slower by ~3% for
textpb and 6+% for jsonpb, hence decided to go with 2 separate calls
instead.
Results over current head:
name old time/op new time/op delta
TextEncode-4 18.1ms ± 2% 18.3ms ± 2% ~ (p=0.065 n=10+9)
TextDecode-4 233ms ± 3% 102ms ± 1% -56.34% (p=0.000 n=9+10)
JSONEncode-4 10.4ms ± 2% 10.5ms ± 0% +0.56% (p=0.019 n=9+9)
JSONDecode-4 870ms ± 2% 354ms ± 4% -59.33% (p=0.000 n=9+10)
name old alloc/op new alloc/op delta
TextEncode-4 28.9MB ± 0% 28.9MB ± 0% +0.00% (p=0.000 n=10+9)
TextDecode-4 1.16GB ± 0% 0.03GB ± 0% -97.44% (p=0.000 n=9+10)
JSONEncode-4 3.94MB ± 0% 3.94MB ± 0% +0.00% (p=0.000 n=10+10)
JSONDecode-4 3.35GB ± 0% 0.01GB ± 0% -99.83% (p=0.000 n=10+10)
name old allocs/op new allocs/op delta
TextEncode-4 73.5k ± 0% 73.5k ± 0% ~ (all equal)
TextDecode-4 278k ± 0% 255k ± 0% -8.26% (p=0.000 n=9+10)
JSONEncode-4 63.8k ± 0% 63.8k ± 0% ~ (all equal)
JSONDecode-4 247k ± 0% 210k ± 0% -14.92% (p=0.000 n=10+10)
Change-Id: Ibc64e9a7827ec1fffa213eb79f60497950203700
Reviewed-on: https://go-review.googlesource.com/c/protobuf/+/172239
Reviewed-by: Joe Tsai <thebrokentoaster@gmail.com>
diff --git a/internal/encoding/json/string.go b/internal/encoding/json/string.go
index fbfb09f..7f70cff 100644
--- a/internal/encoding/json/string.go
+++ b/internal/encoding/json/string.go
@@ -18,7 +18,7 @@
func appendString(out []byte, in string) ([]byte, error) {
var nerr errors.NonFatal
out = append(out, '"')
- i := indexNeedEscape(in)
+ i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
@@ -47,7 +47,7 @@
}
in = in[n:]
default:
- i := indexNeedEscape(in[n:])
+ i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
@@ -65,7 +65,7 @@
return "", 0, d.newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
- i := indexNeedEscape(string(in))
+ i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
@@ -123,16 +123,16 @@
return "", 0, d.newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
- i := indexNeedEscape(string(in[n:]))
+ i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return "", 0, io.ErrUnexpectedEOF
}
-// indexNeedEscape returns the index of the next character that needs escaping.
-// If no characters need escaping, this returns the input length.
-func indexNeedEscape(s string) int {
+// indexNeedEscapeInString returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+func indexNeedEscapeInString(s string) int {
for i, r := range s {
if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
return i
@@ -140,3 +140,18 @@
}
return len(s)
}
+
+// indexNeedEscapeInBytes returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
+// resolved.
+func indexNeedEscapeInBytes(b []byte) int {
+ for i := 0; i < len(b); {
+ r, n := utf8.DecodeRune(b[i:])
+ if r < ' ' || r == '\\' || r == '"' || r == utf8.RuneError {
+ return i
+ }
+ i += n
+ }
+ return len(b)
+}
diff --git a/internal/encoding/text/string.go b/internal/encoding/text/string.go
index 0274295..6a0b730 100644
--- a/internal/encoding/text/string.go
+++ b/internal/encoding/text/string.go
@@ -33,7 +33,7 @@
in := v.String()
out = append(out, '"')
- i := indexNeedEscape(in)
+ i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
@@ -72,7 +72,7 @@
}
in = in[n:]
default:
- i := indexNeedEscape(in[n:])
+ i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
@@ -96,7 +96,7 @@
return Value{}, 0, newSyntaxError("invalid character %q at start of string", in[0])
}
in = in[1:]
- i := indexNeedEscape(string(in))
+ i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
@@ -185,7 +185,7 @@
return Value{}, 0, newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
- i := indexNeedEscape(string(in[n:]))
+ i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
@@ -217,9 +217,9 @@
return rawValueOf(strings.Join(ss, ""), b[:len(b):len(b)]), nil
}
-// indexNeedEscape returns the index of the next character that needs escaping.
-// If no characters need escaping, this returns the input length.
-func indexNeedEscape(s string) int {
+// indexNeedEscapeInString returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+func indexNeedEscapeInString(s string) int {
for i := 0; i < len(s); i++ {
if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
return i
@@ -227,3 +227,18 @@
}
return len(s)
}
+
+// indexNeedEscapeInBytes returns the index of the character that needs
+// escaping. If no characters need escaping, this returns the input length.
+// TODO: Remove this duplicate function when https://golang.org/issue/31506 gets
+// resolved.
+func indexNeedEscapeInBytes(b []byte) int {
+ for i := 0; i < len(b); {
+ c, size := utf8.DecodeRune(b[i:])
+ if c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
+ return i
+ }
+ i += size
+ }
+ return len(b)
+}