| // Copyright 2018 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package bytealg |
| |
| import ( |
| "internal/cpu" |
| "unsafe" |
| ) |
| |
| // Offsets into internal/cpu records for use in assembly. |
| const ( |
| offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2) |
| offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42) |
| offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2) |
| offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT) |
| |
| offsetS390xHasVX = unsafe.Offsetof(cpu.S390X.HasVX) |
| ) |
| |
| // MaxLen is the maximum length of the string to be searched for (argument b) in Index. |
| // If MaxLen is not 0, make sure MaxLen >= 4. |
| var MaxLen int |
| |
| // FIXME: the logic of HashStrBytes, HashStrRevBytes, IndexRabinKarpBytes and HashStr, HashStrRev, |
| // IndexRabinKarp are exactly the same, except that the types are different. Can we eliminate |
| // three of them without causing allocation? |
| |
| // PrimeRK is the prime base used in Rabin-Karp algorithm. |
| const PrimeRK = 16777619 |
| |
| // HashStrBytes returns the hash and the appropriate multiplicative |
| // factor for use in Rabin-Karp algorithm. |
| func HashStrBytes(sep []byte) (uint32, uint32) { |
| hash := uint32(0) |
| for i := 0; i < len(sep); i++ { |
| hash = hash*PrimeRK + uint32(sep[i]) |
| } |
| var pow, sq uint32 = 1, PrimeRK |
| for i := len(sep); i > 0; i >>= 1 { |
| if i&1 != 0 { |
| pow *= sq |
| } |
| sq *= sq |
| } |
| return hash, pow |
| } |
| |
| // HashStr returns the hash and the appropriate multiplicative |
| // factor for use in Rabin-Karp algorithm. |
| func HashStr(sep string) (uint32, uint32) { |
| hash := uint32(0) |
| for i := 0; i < len(sep); i++ { |
| hash = hash*PrimeRK + uint32(sep[i]) |
| } |
| var pow, sq uint32 = 1, PrimeRK |
| for i := len(sep); i > 0; i >>= 1 { |
| if i&1 != 0 { |
| pow *= sq |
| } |
| sq *= sq |
| } |
| return hash, pow |
| } |
| |
| // HashStrRevBytes returns the hash of the reverse of sep and the |
| // appropriate multiplicative factor for use in Rabin-Karp algorithm. |
| func HashStrRevBytes(sep []byte) (uint32, uint32) { |
| hash := uint32(0) |
| for i := len(sep) - 1; i >= 0; i-- { |
| hash = hash*PrimeRK + uint32(sep[i]) |
| } |
| var pow, sq uint32 = 1, PrimeRK |
| for i := len(sep); i > 0; i >>= 1 { |
| if i&1 != 0 { |
| pow *= sq |
| } |
| sq *= sq |
| } |
| return hash, pow |
| } |
| |
| // HashStrRev returns the hash of the reverse of sep and the |
| // appropriate multiplicative factor for use in Rabin-Karp algorithm. |
| func HashStrRev(sep string) (uint32, uint32) { |
| hash := uint32(0) |
| for i := len(sep) - 1; i >= 0; i-- { |
| hash = hash*PrimeRK + uint32(sep[i]) |
| } |
| var pow, sq uint32 = 1, PrimeRK |
| for i := len(sep); i > 0; i >>= 1 { |
| if i&1 != 0 { |
| pow *= sq |
| } |
| sq *= sq |
| } |
| return hash, pow |
| } |
| |
| // IndexRabinKarpBytes uses the Rabin-Karp search algorithm to return the index of the |
| // first occurrence of substr in s, or -1 if not present. |
| func IndexRabinKarpBytes(s, sep []byte) int { |
| // Rabin-Karp search |
| hashsep, pow := HashStrBytes(sep) |
| n := len(sep) |
| var h uint32 |
| for i := 0; i < n; i++ { |
| h = h*PrimeRK + uint32(s[i]) |
| } |
| if h == hashsep && Equal(s[:n], sep) { |
| return 0 |
| } |
| for i := n; i < len(s); { |
| h *= PrimeRK |
| h += uint32(s[i]) |
| h -= pow * uint32(s[i-n]) |
| i++ |
| if h == hashsep && Equal(s[i-n:i], sep) { |
| return i - n |
| } |
| } |
| return -1 |
| } |
| |
| // IndexRabinKarp uses the Rabin-Karp search algorithm to return the index of the |
| // first occurrence of substr in s, or -1 if not present. |
| func IndexRabinKarp(s, substr string) int { |
| // Rabin-Karp search |
| hashss, pow := HashStr(substr) |
| n := len(substr) |
| var h uint32 |
| for i := 0; i < n; i++ { |
| h = h*PrimeRK + uint32(s[i]) |
| } |
| if h == hashss && s[:n] == substr { |
| return 0 |
| } |
| for i := n; i < len(s); { |
| h *= PrimeRK |
| h += uint32(s[i]) |
| h -= pow * uint32(s[i-n]) |
| i++ |
| if h == hashss && s[i-n:i] == substr { |
| return i - n |
| } |
| } |
| return -1 |
| } |