Michael Munday | cfd8916 | 2016-11-04 16:30:12 -0400 | [diff] [blame] | 1 | // Copyright 2016 The Go Authors. All rights reserved. |
Ilya Tocar | 44f1854 | 2016-04-28 17:34:24 +0300 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | package bytes |
| 6 | |
Martin Möhrmann | 69972ae | 2017-04-03 22:38:09 +0200 | [diff] [blame^] | 7 | import "internal/cpu" |
| 8 | |
Hiroshi Ioka | e10286a | 2016-09-06 08:09:27 +0900 | [diff] [blame] | 9 | //go:noescape |
| 10 | |
Ilya Tocar | 44f1854 | 2016-04-28 17:34:24 +0300 | [diff] [blame] | 11 | // indexShortStr returns the index of the first instance of c in s, or -1 if c is not present in s. |
| 12 | // indexShortStr requires 2 <= len(c) <= shortStringLen |
Martin Möhrmann | 69972ae | 2017-04-03 22:38:09 +0200 | [diff] [blame^] | 13 | func indexShortStr(s, c []byte) int // ../runtime/asm_amd64.s |
| 14 | func countByte(s []byte, c byte) int // ../runtime/asm_amd64.s |
Ilya Tocar | 0cff219 | 2016-04-28 17:39:55 +0300 | [diff] [blame] | 15 | |
| 16 | var shortStringLen int |
| 17 | |
| 18 | func init() { |
Martin Möhrmann | 69972ae | 2017-04-03 22:38:09 +0200 | [diff] [blame^] | 19 | if cpu.X86.HasAVX2 { |
Ilya Tocar | 0cff219 | 2016-04-28 17:39:55 +0300 | [diff] [blame] | 20 | shortStringLen = 63 |
| 21 | } else { |
| 22 | shortStringLen = 31 |
| 23 | } |
| 24 | } |
Ilya Tocar | 44f1854 | 2016-04-28 17:34:24 +0300 | [diff] [blame] | 25 | |
| 26 | // Index returns the index of the first instance of sep in s, or -1 if sep is not present in s. |
| 27 | func Index(s, sep []byte) int { |
| 28 | n := len(sep) |
| 29 | switch { |
| 30 | case n == 0: |
| 31 | return 0 |
| 32 | case n == 1: |
| 33 | return IndexByte(s, sep[0]) |
Ilya Tocar | 44f1854 | 2016-04-28 17:34:24 +0300 | [diff] [blame] | 34 | case n == len(s): |
| 35 | if Equal(sep, s) { |
| 36 | return 0 |
| 37 | } |
| 38 | return -1 |
| 39 | case n > len(s): |
| 40 | return -1 |
Ilya Tocar | f31492f | 2016-10-21 23:23:48 +0300 | [diff] [blame] | 41 | case n <= shortStringLen: |
| 42 | // Use brute force when s and sep both are small |
| 43 | if len(s) <= 64 { |
| 44 | return indexShortStr(s, sep) |
| 45 | } |
| 46 | c := sep[0] |
| 47 | i := 0 |
| 48 | t := s[:len(s)-n+1] |
| 49 | fails := 0 |
| 50 | for i < len(t) { |
| 51 | if t[i] != c { |
| 52 | // IndexByte skips 16/32 bytes per iteration, |
| 53 | // so it's faster than indexShortStr. |
| 54 | o := IndexByte(t[i:], c) |
| 55 | if o < 0 { |
| 56 | return -1 |
| 57 | } |
| 58 | i += o |
| 59 | } |
| 60 | if Equal(s[i:i+n], sep) { |
| 61 | return i |
| 62 | } |
| 63 | fails++ |
| 64 | i++ |
| 65 | // Switch to indexShortStr when IndexByte produces too many false positives. |
| 66 | // Too many means more that 1 error per 8 characters. |
| 67 | // Allow some errors in the beginning. |
| 68 | if fails > (i+16)/8 { |
| 69 | r := indexShortStr(s[i:], sep) |
| 70 | if r >= 0 { |
| 71 | return r + i |
| 72 | } |
| 73 | return -1 |
| 74 | } |
| 75 | } |
| 76 | return -1 |
Ilya Tocar | 44f1854 | 2016-04-28 17:34:24 +0300 | [diff] [blame] | 77 | } |
| 78 | // Rabin-Karp search |
| 79 | hashsep, pow := hashStr(sep) |
| 80 | var h uint32 |
| 81 | for i := 0; i < n; i++ { |
| 82 | h = h*primeRK + uint32(s[i]) |
| 83 | } |
| 84 | if h == hashsep && Equal(s[:n], sep) { |
| 85 | return 0 |
| 86 | } |
| 87 | for i := n; i < len(s); { |
| 88 | h *= primeRK |
| 89 | h += uint32(s[i]) |
| 90 | h -= pow * uint32(s[i-n]) |
| 91 | i++ |
| 92 | if h == hashsep && Equal(s[i-n:i], sep) { |
| 93 | return i - n |
| 94 | } |
| 95 | } |
| 96 | return -1 |
| 97 | } |
| 98 | |
Josselin Costanzi | 01cd22c | 2017-03-19 12:18:08 +0100 | [diff] [blame] | 99 | // Count counts the number of non-overlapping instances of sep in s. |
| 100 | // If sep is an empty slice, Count returns 1 + the number of Unicode code points in s. |
| 101 | func Count(s, sep []byte) int { |
Martin Möhrmann | 69972ae | 2017-04-03 22:38:09 +0200 | [diff] [blame^] | 102 | if len(sep) == 1 && cpu.X86.HasPOPCNT { |
Josselin Costanzi | 01cd22c | 2017-03-19 12:18:08 +0100 | [diff] [blame] | 103 | return countByte(s, sep[0]) |
| 104 | } |
| 105 | return countGeneric(s, sep) |
| 106 | } |
| 107 | |
Ilya Tocar | 44f1854 | 2016-04-28 17:34:24 +0300 | [diff] [blame] | 108 | // primeRK is the prime base used in Rabin-Karp algorithm. |
| 109 | const primeRK = 16777619 |
| 110 | |
| 111 | // hashStr returns the hash and the appropriate multiplicative |
| 112 | // factor for use in Rabin-Karp algorithm. |
| 113 | func hashStr(sep []byte) (uint32, uint32) { |
| 114 | hash := uint32(0) |
| 115 | for i := 0; i < len(sep); i++ { |
| 116 | hash = hash*primeRK + uint32(sep[i]) |
| 117 | } |
| 118 | var pow, sq uint32 = 1, primeRK |
| 119 | for i := len(sep); i > 0; i >>= 1 { |
| 120 | if i&1 != 0 { |
| 121 | pow *= sq |
| 122 | } |
| 123 | sq *= sq |
| 124 | } |
| 125 | return hash, pow |
| 126 | } |