strings: add asm version of Index() for short strings on amd64
Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):
name old time/op new time/op delta
IndexRune-4 57.4ns ± 0% 57.5ns ± 0% +0.10% (p=0.000 n=20+19)
IndexRuneFastPath-4 20.4ns ± 0% 20.4ns ± 0% ~ (all samples are equal)
Index-4 21.0ns ± 0% 21.8ns ± 0% +3.81% (p=0.000 n=20+20)
LastIndex-4 7.07ns ± 1% 6.98ns ± 0% -1.21% (p=0.000 n=20+16)
IndexByte-4 18.3ns ± 0% 18.3ns ± 0% ~ (all samples are equal)
IndexHard1-4 1.46ms ± 0% 0.39ms ± 0% -73.06% (p=0.000 n=16+16)
IndexHard2-4 1.46ms ± 0% 0.30ms ± 0% -79.55% (p=0.000 n=18+18)
IndexHard3-4 1.46ms ± 0% 0.66ms ± 0% -54.68% (p=0.000 n=19+19)
LastIndexHard1-4 1.46ms ± 0% 1.46ms ± 0% -0.01% (p=0.036 n=18+20)
LastIndexHard2-4 1.46ms ± 0% 1.46ms ± 0% ~ (p=0.588 n=19+19)
LastIndexHard3-4 1.46ms ± 0% 1.46ms ± 0% ~ (p=0.283 n=17+20)
IndexTorture-4 11.1µs ± 0% 11.1µs ± 0% +0.01% (p=0.000 n=18+17)
Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 33d641e..2ba3d3d 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -1725,6 +1725,168 @@
JMP loop
+// TODO: Also use this in bytes.Index
+TEXT strings·indexShortStr(SB),NOSPLIT,$0-40
+ MOVQ s+0(FP), DI
+ MOVQ s_len+8(FP), CX
+ MOVQ c+16(FP), AX
+ MOVQ c_len+24(FP), BX
+ CMPQ BX, CX
+ JA fail
+ CMPQ BX, $2
+ JA _3_or_more
+ MOVW (AX), AX
+ LEAQ -1(DI)(CX*1), CX
+loop2:
+ MOVW (DI), SI
+ CMPW SI,AX
+ JZ success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop2
+ JMP fail
+_3_or_more:
+ CMPQ BX, $3
+ JA _4_or_more
+ MOVW 1(AX), DX
+ MOVW (AX), AX
+ LEAQ -2(DI)(CX*1), CX
+loop3:
+ MOVW (DI), SI
+ CMPW SI,AX
+ JZ partial_success3
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop3
+ JMP fail
+partial_success3:
+ MOVW 1(DI), SI
+ CMPW SI,DX
+ JZ success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop3
+ JMP fail
+_4_or_more:
+ CMPQ BX, $4
+ JA _5_or_more
+ MOVL (AX), AX
+ LEAQ -3(DI)(CX*1), CX
+loop4:
+ MOVL (DI), SI
+ CMPL SI,AX
+ JZ success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop4
+ JMP fail
+_5_or_more:
+ CMPQ BX, $7
+ JA _8_or_more
+ LEAQ 1(DI)(CX*1), CX
+ SUBQ BX, CX
+ MOVL -4(AX)(BX*1), DX
+ MOVL (AX), AX
+loop5to7:
+ MOVL (DI), SI
+ CMPL SI,AX
+ JZ partial_success5to7
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop5to7
+ JMP fail
+partial_success5to7:
+ MOVL -4(BX)(DI*1), SI
+ CMPL SI,DX
+ JZ success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop5to7
+ JMP fail
+_8_or_more:
+ CMPQ BX, $8
+ JA _9_or_more
+ MOVQ (AX), AX
+ LEAQ -7(DI)(CX*1), CX
+loop8:
+ MOVQ (DI), SI
+ CMPQ SI,AX
+ JZ success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop8
+ JMP fail
+_9_or_more:
+ CMPQ BX, $16
+ JA _16_or_more
+ LEAQ 1(DI)(CX*1), CX
+ SUBQ BX, CX
+ MOVQ -8(AX)(BX*1), DX
+ MOVQ (AX), AX
+loop9to15:
+ MOVQ (DI), SI
+ CMPQ SI,AX
+ JZ partial_success9to15
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop9to15
+ JMP fail
+partial_success9to15:
+ MOVQ -8(BX)(DI*1), SI
+ CMPQ SI,DX
+ JZ success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop9to15
+ JMP fail
+_16_or_more:
+ CMPQ BX, $16
+ JA _17_to_31
+ MOVOU (AX), X1
+ LEAQ -15(DI)(CX*1), CX
+loop16:
+ MOVOU (DI), X2
+ PCMPEQB X1, X2
+ PMOVMSKB X2, SI
+ CMPQ SI, $0xffff
+ JE success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop16
+ JMP fail
+_17_to_31:
+ LEAQ 1(DI)(CX*1), CX
+ SUBQ BX, CX
+ MOVOU -16(AX)(BX*1), X0
+ MOVOU (AX), X1
+loop17to31:
+ MOVOU (DI), X2
+ PCMPEQB X1,X2
+ PMOVMSKB X2, SI
+ CMPQ SI, $0xffff
+ JE partial_success17to31
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop17to31
+ JMP fail
+partial_success17to31:
+ MOVOU -16(BX)(DI*1), X3
+ PCMPEQB X0, X3
+ PMOVMSKB X3, SI
+ CMPQ SI, $0xffff
+ JE success
+ ADDQ $1,DI
+ CMPQ DI,CX
+ JB loop17to31
+fail:
+ MOVQ $-1, ret+32(FP)
+ RET
+success:
+ SUBQ s+0(FP), DI
+ MOVQ DI, ret+32(FP)
+ RET
+
+
TEXT bytes·IndexByte(SB),NOSPLIT,$0-40
MOVQ s+0(FP), SI
MOVQ s_len+8(FP), BX
diff --git a/src/strings/strings.go b/src/strings/strings.go
index dd51dab..37d5647 100644
--- a/src/strings/strings.go
+++ b/src/strings/strings.go
@@ -143,43 +143,6 @@
return IndexRune(s, r) >= 0
}
-// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
-func Index(s, sep string) int {
- n := len(sep)
- switch {
- case n == 0:
- return 0
- case n == 1:
- return IndexByte(s, sep[0])
- case n == len(s):
- if sep == s {
- return 0
- }
- return -1
- case n > len(s):
- return -1
- }
- // Rabin-Karp search
- hashsep, pow := hashStr(sep)
- var h uint32
- for i := 0; i < n; i++ {
- h = h*primeRK + uint32(s[i])
- }
- if h == hashsep && s[:n] == sep {
- return 0
- }
- for i := n; i < len(s); {
- h *= primeRK
- h += uint32(s[i])
- h -= pow * uint32(s[i-n])
- i++
- if h == hashsep && s[i-n:i] == sep {
- return i - n
- }
- }
- return -1
-}
-
// LastIndex returns the index of the last instance of sep in s, or -1 if sep is not present in s.
func LastIndex(s, sep string) int {
n := len(sep)
diff --git a/src/strings/strings_amd64.go b/src/strings/strings_amd64.go
new file mode 100644
index 0000000..376113f
--- /dev/null
+++ b/src/strings/strings_amd64.go
@@ -0,0 +1,49 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package strings
+
+// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
+// indexShortStr requires 2 <= len(c) <= shortStringLen
+func indexShortStr(s, c string) int // ../runtime/asm_$GOARCH.s
+const shortStringLen = 31
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep string) int {
+ n := len(sep)
+ switch {
+ case n == 0:
+ return 0
+ case n == 1:
+ return IndexByte(s, sep[0])
+ case n <= shortStringLen:
+ return indexShortStr(s, sep)
+ case n == len(s):
+ if sep == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ }
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && s[:n] == sep {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && s[i-n:i] == sep {
+ return i - n
+ }
+ }
+ return -1
+}
diff --git a/src/strings/strings_generic.go b/src/strings/strings_generic.go
new file mode 100644
index 0000000..811cb80
--- /dev/null
+++ b/src/strings/strings_generic.go
@@ -0,0 +1,47 @@
+// Copyright 2015 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64
+
+package strings
+
+// TODO: implements short string optimization on non amd64 platforms
+// and get rid of strings_amd64.go
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep string) int {
+ n := len(sep)
+ switch {
+ case n == 0:
+ return 0
+ case n == 1:
+ return IndexByte(s, sep[0])
+ case n == len(s):
+ if sep == s {
+ return 0
+ }
+ return -1
+ case n > len(s):
+ return -1
+ }
+ // Rabin-Karp search
+ hashsep, pow := hashStr(sep)
+ var h uint32
+ for i := 0; i < n; i++ {
+ h = h*primeRK + uint32(s[i])
+ }
+ if h == hashsep && s[:n] == sep {
+ return 0
+ }
+ for i := n; i < len(s); {
+ h *= primeRK
+ h += uint32(s[i])
+ h -= pow * uint32(s[i-n])
+ i++
+ if h == hashsep && s[i-n:i] == sep {
+ return i - n
+ }
+ }
+ return -1
+}
diff --git a/src/strings/strings_test.go b/src/strings/strings_test.go
index 4e21dea..49f55fe 100644
--- a/src/strings/strings_test.go
+++ b/src/strings/strings_test.go
@@ -59,6 +59,59 @@
{"abc", "b", 1},
{"abc", "c", 2},
{"abc", "x", -1},
+ // test special cases in Index() for short strings
+ {"", "ab", -1},
+ {"bc", "ab", -1},
+ {"ab", "ab", 0},
+ {"xab", "ab", 1},
+ {"xab"[:2], "ab", -1},
+ {"", "abc", -1},
+ {"xbc", "abc", -1},
+ {"abc", "abc", 0},
+ {"xabc", "abc", 1},
+ {"xabc"[:3], "abc", -1},
+ {"xabxc", "abc", -1},
+ {"", "abcd", -1},
+ {"xbcd", "abcd", -1},
+ {"abcd", "abcd", 0},
+ {"xabcd", "abcd", 1},
+ {"xyabcd"[:5], "abcd", -1},
+ {"xbcqq", "abcqq", -1},
+ {"abcqq", "abcqq", 0},
+ {"xabcqq", "abcqq", 1},
+ {"xyabcqq"[:6], "abcqq", -1},
+ {"xabxcqq", "abcqq", -1},
+ {"xabcqxq", "abcqq", -1},
+ {"", "01234567", -1},
+ {"32145678", "01234567", -1},
+ {"01234567", "01234567", 0},
+ {"x01234567", "01234567", 1},
+ {"xx01234567"[:9], "01234567", -1},
+ {"", "0123456789", -1},
+ {"3214567844", "0123456789", -1},
+ {"0123456789", "0123456789", 0},
+ {"x0123456789", "0123456789", 1},
+ {"xyz0123456789"[:12], "0123456789", -1},
+ {"x01234567x89", "0123456789", -1},
+ {"", "0123456789012345", -1},
+ {"3214567889012345", "0123456789012345", -1},
+ {"0123456789012345", "0123456789012345", 0},
+ {"x0123456789012345", "0123456789012345", 1},
+ {"", "01234567890123456789", -1},
+ {"32145678890123456789", "01234567890123456789", -1},
+ {"01234567890123456789", "01234567890123456789", 0},
+ {"x01234567890123456789", "01234567890123456789", 1},
+ {"xyz01234567890123456789"[:22], "01234567890123456789", -1},
+ {"", "0123456789012345678901234567890", -1},
+ {"321456788901234567890123456789012345678911", "0123456789012345678901234567890", -1},
+ {"0123456789012345678901234567890", "0123456789012345678901234567890", 0},
+ {"x0123456789012345678901234567890", "0123456789012345678901234567890", 1},
+ {"xyz0123456789012345678901234567890"[:33], "0123456789012345678901234567890", -1},
+ {"", "01234567890123456789012345678901", -1},
+ {"32145678890123456789012345678901234567890211", "01234567890123456789012345678901", -1},
+ {"01234567890123456789012345678901", "01234567890123456789012345678901", 0},
+ {"x01234567890123456789012345678901", "01234567890123456789012345678901", 1},
+ {"xyz01234567890123456789012345678901"[:34], "01234567890123456789012345678901", -1},
}
var lastIndexTests = []IndexTest{