blob: 77d5970152a43b5ab03e1642eb393fbc3832d9dd [file] [log] [blame]
Michael Mundaycfd89162016-11-04 16:30:12 -04001// Copyright 2016 The Go Authors. All rights reserved.
Ilya Tocar44f18542016-04-28 17:34:24 +03002// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package bytes
6
Martin Möhrmann69972ae2017-04-03 22:38:09 +02007import "internal/cpu"
8
Hiroshi Iokae10286a2016-09-06 08:09:27 +09009//go:noescape
10
Ilya Tocar44f18542016-04-28 17:34:24 +030011// indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s.
12// indexShortStr requires 2 <= len(c) <= shortStringLen
Martin Möhrmann69972ae2017-04-03 22:38:09 +020013func indexShortStr(s, c []byte) int // ../runtime/asm_amd64.s
14func countByte(s []byte, c byte) int // ../runtime/asm_amd64.s
Ilya Tocar0cff2192016-04-28 17:39:55 +030015
16var shortStringLen int
17
18func init() {
Martin Möhrmann69972ae2017-04-03 22:38:09 +020019 if cpu.X86.HasAVX2 {
Ilya Tocar0cff2192016-04-28 17:39:55 +030020 shortStringLen = 63
21 } else {
22 shortStringLen = 31
23 }
24}
Ilya Tocar44f18542016-04-28 17:34:24 +030025
26// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
27func Index(s, sep []byte) int {
28 n := len(sep)
29 switch {
30 case n == 0:
31 return 0
32 case n == 1:
33 return IndexByte(s, sep[0])
Ilya Tocar44f18542016-04-28 17:34:24 +030034 case n == len(s):
35 if Equal(sep, s) {
36 return 0
37 }
38 return -1
39 case n > len(s):
40 return -1
Ilya Tocarf31492f2016-10-21 23:23:48 +030041 case n <= shortStringLen:
42 // Use brute force when s and sep both are small
43 if len(s) <= 64 {
44 return indexShortStr(s, sep)
45 }
46 c := sep[0]
47 i := 0
48 t := s[:len(s)-n+1]
49 fails := 0
50 for i < len(t) {
51 if t[i] != c {
52 // IndexByte skips 16/32 bytes per iteration,
53 // so it's faster than indexShortStr.
54 o := IndexByte(t[i:], c)
55 if o < 0 {
56 return -1
57 }
58 i += o
59 }
60 if Equal(s[i:i+n], sep) {
61 return i
62 }
63 fails++
64 i++
65 // Switch to indexShortStr when IndexByte produces too many false positives.
66 // Too many means more that 1 error per 8 characters.
67 // Allow some errors in the beginning.
68 if fails > (i+16)/8 {
69 r := indexShortStr(s[i:], sep)
70 if r >= 0 {
71 return r + i
72 }
73 return -1
74 }
75 }
76 return -1
Ilya Tocar44f18542016-04-28 17:34:24 +030077 }
78 // Rabin-Karp search
79 hashsep, pow := hashStr(sep)
80 var h uint32
81 for i := 0; i < n; i++ {
82 h = h*primeRK + uint32(s[i])
83 }
84 if h == hashsep && Equal(s[:n], sep) {
85 return 0
86 }
87 for i := n; i < len(s); {
88 h *= primeRK
89 h += uint32(s[i])
90 h -= pow * uint32(s[i-n])
91 i++
92 if h == hashsep && Equal(s[i-n:i], sep) {
93 return i - n
94 }
95 }
96 return -1
97}
98
Josselin Costanzi01cd22c2017-03-19 12:18:08 +010099// Count counts the number of non-overlapping instances of sep in s.
100// If sep is an empty slice, Count returns 1 + the number of Unicode code points in s.
101func Count(s, sep []byte) int {
Martin Möhrmann69972ae2017-04-03 22:38:09 +0200102 if len(sep) == 1 && cpu.X86.HasPOPCNT {
Josselin Costanzi01cd22c2017-03-19 12:18:08 +0100103 return countByte(s, sep[0])
104 }
105 return countGeneric(s, sep)
106}
107
Ilya Tocar44f18542016-04-28 17:34:24 +0300108// primeRK is the prime base used in Rabin-Karp algorithm.
109const primeRK = 16777619
110
111// hashStr returns the hash and the appropriate multiplicative
112// factor for use in Rabin-Karp algorithm.
113func hashStr(sep []byte) (uint32, uint32) {
114 hash := uint32(0)
115 for i := 0; i < len(sep); i++ {
116 hash = hash*primeRK + uint32(sep[i])
117 }
118 var pow, sq uint32 = 1, primeRK
119 for i := len(sep); i > 0; i >>= 1 {
120 if i&1 != 0 {
121 pow *= sq
122 }
123 sq *= sq
124 }
125 return hash, pow
126}