runtime: Implement faster equals for strings and bytes.

(amd64)
benchmark           old ns/op    new ns/op    delta
BenchmarkEqual0            16            6  -63.15%
BenchmarkEqual9            22            7  -65.37%
BenchmarkEqual32           36            9  -74.91%
BenchmarkEqual4K         2187          120  -94.51%

benchmark            old MB/s     new MB/s  speedup
BenchmarkEqual9        392.22      1134.38    2.89x
BenchmarkEqual32       866.72      3457.39    3.99x
BenchmarkEqual4K      1872.73     33998.87   18.15x

(386)
benchmark           old ns/op    new ns/op    delta
BenchmarkEqual0            16            5  -63.85%
BenchmarkEqual9            22            7  -67.84%
BenchmarkEqual32           34           12  -64.94%
BenchmarkEqual4K         2196          113  -94.85%

benchmark            old MB/s     new MB/s  speedup
BenchmarkEqual9        405.81      1260.18    3.11x
BenchmarkEqual32       919.55      2631.21    2.86x
BenchmarkEqual4K      1864.85     36072.54   19.34x

Update #3751

R=bradfitz, r, khr, dave, remyoudompheng, fullung, minux.ma, ality
CC=golang-dev
https://golang.org/cl/8056043
diff --git a/src/pkg/bytes/equal_test.go b/src/pkg/bytes/equal_test.go
new file mode 100644
index 0000000..a393d5e
--- /dev/null
+++ b/src/pkg/bytes/equal_test.go
@@ -0,0 +1,45 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+//
+// +build linux
+
+package bytes_test
+
+import (
+	. "bytes"
+	"syscall"
+	"testing"
+	"unsafe"
+)
+
+// This file tests the situation where memeq is checking
+// data very near to a page boundary.  We want to make sure
+// equal does not read across the boundary and cause a page
+// fault where it shouldn't.
+
+// This test runs only on linux.  The code being tested is
+// not OS-specific, so it does not need to be tested on all
+// operating systems.
+
+func TestEqualNearPageBoundary(t *testing.T) {
+	pagesize := syscall.Getpagesize()
+	b := make([]byte, 4*pagesize)
+	i := pagesize
+	for ; uintptr(unsafe.Pointer(&b[i]))%uintptr(pagesize) != 0; i++ {
+	}
+	syscall.Mprotect(b[i-pagesize:i], 0)
+	syscall.Mprotect(b[i+pagesize:i+2*pagesize], 0)
+
+	// both of these should fault
+	//pagesize += int(b[i-1])
+	//pagesize += int(b[i+pagesize])
+
+	for j := 0; j < pagesize; j++ {
+		b[i+j] = 'A'
+	}
+	for j := 0; j <= pagesize; j++ {
+		Equal(b[i:i+j], b[i+pagesize-j:i+pagesize])
+		Equal(b[i+pagesize-j:i+pagesize], b[i:i+j])
+	}
+}