bytes: make IndexRune faster
re-implement IndexRune by IndexByte and Index which are well optimized
to get performance gain.
name old time/op new time/op delta
IndexRune/10-4 53.2ns ± 1% 29.1ns ± 1% -45.32% (p=0.008 n=5+5)
IndexRune/32-4 191ns ± 1% 27ns ± 1% -85.75% (p=0.008 n=5+5)
IndexRune/4K-4 23.5µs ± 1% 1.0µs ± 1% -95.77% (p=0.008 n=5+5)
IndexRune/4M-4 23.8ms ± 0% 1.0ms ± 2% -95.90% (p=0.008 n=5+5)
IndexRune/64M-4 384ms ± 1% 15ms ± 1% -95.98% (p=0.008 n=5+5)
IndexRuneASCII/10-4 61.5ns ± 0% 10.3ns ± 4% -83.17% (p=0.008 n=5+5)
IndexRuneASCII/32-4 203ns ± 0% 11ns ± 5% -94.68% (p=0.008 n=5+5)
IndexRuneASCII/4K-4 23.4µs ± 0% 0.3µs ± 2% -98.60% (p=0.008 n=5+5)
IndexRuneASCII/4M-4 24.0ms ± 1% 0.3ms ± 1% -98.60% (p=0.008 n=5+5)
IndexRuneASCII/64M-4 386ms ± 2% 6ms ± 1% -98.57% (p=0.008 n=5+5)
name old speed new speed delta
IndexRune/10-4 188MB/s ± 1% 344MB/s ± 1% +82.91% (p=0.008 n=5+5)
IndexRune/32-4 167MB/s ± 0% 1175MB/s ± 1% +603.52% (p=0.008 n=5+5)
IndexRune/4K-4 174MB/s ± 1% 4117MB/s ± 1% +2262.71% (p=0.008 n=5+5)
IndexRune/4M-4 176MB/s ± 0% 4299MB/s ± 2% +2340.46% (p=0.008 n=5+5)
IndexRune/64M-4 175MB/s ± 1% 4354MB/s ± 1% +2388.57% (p=0.008 n=5+5)
IndexRuneASCII/10-4 163MB/s ± 0% 968MB/s ± 4% +494.66% (p=0.008 n=5+5)
IndexRuneASCII/32-4 157MB/s ± 0% 2974MB/s ± 4% +1788.59% (p=0.008 n=5+5)
IndexRuneASCII/4K-4 175MB/s ± 0% 12481MB/s ± 2% +7027.71% (p=0.008 n=5+5)
IndexRuneASCII/4M-4 175MB/s ± 1% 12510MB/s ± 1% +7061.15% (p=0.008 n=5+5)
IndexRuneASCII/64M-4 174MB/s ± 2% 12143MB/s ± 1% +6881.70% (p=0.008 n=5+5)
Change-Id: I0632eadb83937c2a9daa7f0ce79df1dee64f992e
Reviewed-on: https://go-review.googlesource.com/28537
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go
index c35a1c0..3286ca3 100644
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@@ -131,14 +131,12 @@
// It returns the byte index of the first occurrence in s of the given rune.
// It returns -1 if rune is not present in s.
func IndexRune(s []byte, r rune) int {
- for i := 0; i < len(s); {
- r1, size := utf8.DecodeRune(s[i:])
- if r == r1 {
- return i
- }
- i += size
+ if r < utf8.RuneSelf {
+ return IndexByte(s, byte(r))
}
- return -1
+ var b [utf8.UTFMax]byte
+ n := utf8.EncodeRune(b[:], r)
+ return Index(s, b[:n])
}
// IndexAny interprets s as a sequence of UTF-8-encoded Unicode code points.
diff --git a/src/bytes/bytes_amd64.go b/src/bytes/bytes_amd64.go
index e8be28b..b683e67 100644
--- a/src/bytes/bytes_amd64.go
+++ b/src/bytes/bytes_amd64.go
@@ -4,6 +4,8 @@
package bytes
+//go:noescape
+
// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
// indexShortStr requires 2 <= len(c) <= shortStringLen
func indexShortStr(s, c []byte) int // ../runtime/asm_$GOARCH.s
diff --git a/src/bytes/bytes_test.go b/src/bytes/bytes_test.go
index c48f662..e9a022b 100644
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@@ -354,6 +354,20 @@
t.Errorf(`IndexRune(%q, '%c') = %v`, tt.a, r, pos)
}
}
+
+ haystack := []byte("test世界")
+
+ allocs := testing.AllocsPerRun(1000, func() {
+ if i := IndexRune(haystack, 's'); i != 2 {
+ t.Fatalf("'s' at %d; want 2", i)
+ }
+ if i := IndexRune(haystack, '世'); i != 4 {
+ t.Fatalf("'世' at %d; want 4", i)
+ }
+ })
+ if allocs != 0 {
+ t.Errorf(`expected no allocations, got %f`, allocs)
+ }
}
var bmbuf []byte
@@ -404,6 +418,44 @@
}
}
+func BenchmarkIndexRune(b *testing.B) {
+ benchBytes(b, indexSizes, bmIndexRune(IndexRune))
+}
+
+func BenchmarkIndexRuneASCII(b *testing.B) {
+ benchBytes(b, indexSizes, bmIndexRuneASCII(IndexRune))
+}
+
+func bmIndexRuneASCII(index func([]byte, rune) int) func(b *testing.B, n int) {
+ return func(b *testing.B, n int) {
+ buf := bmbuf[0:n]
+ buf[n-1] = 'x'
+ for i := 0; i < b.N; i++ {
+ j := index(buf, 'x')
+ if j != n-1 {
+ b.Fatal("bad index", j)
+ }
+ }
+ buf[n-1] = '\x00'
+ }
+}
+
+func bmIndexRune(index func([]byte, rune) int) func(b *testing.B, n int) {
+ return func(b *testing.B, n int) {
+ buf := bmbuf[0:n]
+ utf8.EncodeRune(buf[n-3:], '世')
+ for i := 0; i < b.N; i++ {
+ j := index(buf, '世')
+ if j != n-3 {
+ b.Fatal("bad index", j)
+ }
+ }
+ buf[n-3] = '\x00'
+ buf[n-2] = '\x00'
+ buf[n-1] = '\x00'
+ }
+}
+
func BenchmarkEqual(b *testing.B) {
b.Run("0", func(b *testing.B) {
var buf [4]byte