strings: add asm version of Index() for short strings on amd64

Currently we have special case for 1-byte strings,
This extends this to strings shorter than 32 bytes on amd64.
Results (broadwell):

name                 old time/op  new time/op  delta
IndexRune-4          57.4ns ± 0%  57.5ns ± 0%   +0.10%        (p=0.000 n=20+19)
IndexRuneFastPath-4  20.4ns ± 0%  20.4ns ± 0%     ~     (all samples are equal)
Index-4              21.0ns ± 0%  21.8ns ± 0%   +3.81%        (p=0.000 n=20+20)
LastIndex-4          7.07ns ± 1%  6.98ns ± 0%   -1.21%        (p=0.000 n=20+16)
IndexByte-4          18.3ns ± 0%  18.3ns ± 0%     ~     (all samples are equal)
IndexHard1-4         1.46ms ± 0%  0.39ms ± 0%  -73.06%        (p=0.000 n=16+16)
IndexHard2-4         1.46ms ± 0%  0.30ms ± 0%  -79.55%        (p=0.000 n=18+18)
IndexHard3-4         1.46ms ± 0%  0.66ms ± 0%  -54.68%        (p=0.000 n=19+19)
LastIndexHard1-4     1.46ms ± 0%  1.46ms ± 0%   -0.01%        (p=0.036 n=18+20)
LastIndexHard2-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.588 n=19+19)
LastIndexHard3-4     1.46ms ± 0%  1.46ms ± 0%     ~           (p=0.283 n=17+20)
IndexTorture-4       11.1µs ± 0%  11.1µs ± 0%   +0.01%        (p=0.000 n=18+17)

Change-Id: I892781549f558f698be4e41f9f568e3d0611efb5
Reviewed-on: https://go-review.googlesource.com/16430
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
diff --git a/src/strings/strings_generic.go b/src/strings/strings_generic.go
new file mode 100644
index 0000000..811cb80
--- /dev/null
+++ b/src/strings/strings_generic.go
@@ -0,0 +1,47 @@
+// Copyright 2015 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !amd64
+
+package strings
+
+// TODO: implements short string optimization on non amd64 platforms
+// and get rid of strings_amd64.go
+
+// Index returns the index of the first instance of sep in s, or -1 if sep is not present in s.
+func Index(s, sep string) int {
+	n := len(sep)
+	switch {
+	case n == 0:
+		return 0
+	case n == 1:
+		return IndexByte(s, sep[0])
+	case n == len(s):
+		if sep == s {
+			return 0
+		}
+		return -1
+	case n > len(s):
+		return -1
+	}
+	// Rabin-Karp search
+	hashsep, pow := hashStr(sep)
+	var h uint32
+	for i := 0; i < n; i++ {
+		h = h*primeRK + uint32(s[i])
+	}
+	if h == hashsep && s[:n] == sep {
+		return 0
+	}
+	for i := n; i < len(s); {
+		h *= primeRK
+		h += uint32(s[i])
+		h -= pow * uint32(s[i-n])
+		i++
+		if h == hashsep && s[i-n:i] == sep {
+			return i - n
+		}
+	}
+	return -1
+}