slices: inline search into its callers

Constructing a lambda and calling the search function has shown to not
perform well, being on par with the non-generic sort.Search. Inlining
gives a major performance boost

```
~/exp$ benchstat gotip_master.bench gotip_inlined.bench
name                     old time/op  new time/op  delta
SearchFloats/Size16-4    20.1ns ± 6%   6.2ns ± 1%  -69.24%  (p=0.000
n=9+9)
SearchFloats/Size32-4    22.2ns ± 3%   7.2ns ± 1%  -67.69%  (p=0.000
n=9+9)
SearchFloats/Size64-4    25.8ns ± 6%   8.2ns ± 1%  -68.36%  (p=0.000
n=9+10)
SearchFloats/Size128-4   28.9ns ± 7%   9.2ns ± 0%  -68.30%  (p=0.000
n=10+10)
SearchFloats/Size512-4   34.8ns ± 6%  11.2ns ± 0%  -67.83%  (p=0.000
n=10+10)
SearchFloats/Size1024-4  36.9ns ± 3%  12.2ns ± 0%  -66.99%  (p=0.000
n=9+9)
```

The story is similar for BinarySearchFunc, even though less severe

```
~/exp$ benchstat gotip_master_structs.bench gotip_inlined_structs.bench
name                            old time/op  new time/op  delta
BinarySearchFuncStruct/Size16-4    28.9ns ± 2%  19.0ns ± 4%  -34.18%  (p=0.000 n=9+10)
BinarySearchFuncStruct/Size32-4    33.8ns ± 1%  22.0ns ± 5%  -35.03%  (p=0.000 n=9+10)
BinarySearchFuncStruct/Size64-4    39.0ns ± 1%  23.4ns ± 1%  -39.98%  (p=0.000 n=10+9)
BinarySearchFuncStruct/Size128-4   43.8ns ± 1%  26.0ns ± 0%  -40.78%  (p=0.000 n=9+8)
BinarySearchFuncStruct/Size512-4   54.0ns ± 0%  31.5ns ± 1%  -41.58%  (p=0.000 n=10+10)
BinarySearchFuncStruct/Size1024-4  59.0ns ± 0%  34.1ns ± 0%  -42.30%  (p=0.000 n=9+9)
```

Change-Id: Idf695f6415f7d1f9b69827305bb87061c0fc1e1b
GitHub-Last-Rev: 26e17ba6da5ddd6fadfb8264c51cf33b4d0efc3e
GitHub-Pull-Request: golang/exp#51
Reviewed-on: https://go-review.googlesource.com/c/exp/+/459037
Run-TryBot: Eli Bendersky <eliben@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Eli Bendersky <eliben@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Nicolas Hillegeer <aktau@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Keith Randall <khr@google.com>
diff --git a/slices/sort.go b/slices/sort.go
index 35a5d8f..c5e4a6a 100644
--- a/slices/sort.go
+++ b/slices/sort.go
@@ -62,15 +62,22 @@
 // sort order; it also returns a bool saying whether the target is really found
 // in the slice. The slice must be sorted in increasing order.
 func BinarySearch[E constraints.Ordered](x []E, target E) (int, bool) {
-	// search returns the leftmost position where f returns true, or len(x) if f
-	// returns false for all x. This is the insertion position for target in x,
-	// and could point to an element that's either == target or not.
-	pos := search(len(x), func(i int) bool { return x[i] >= target })
-	if pos >= len(x) || x[pos] != target {
-		return pos, false
-	} else {
-		return pos, true
+	// Inlining is faster than calling BinarySearchFunc with a lambda.
+	n := len(x)
+	// Define x[-1] < target and x[n] >= target.
+	// Invariant: x[i-1] < target, x[j] >= target.
+	i, j := 0, n
+	for i < j {
+		h := int(uint(i+j) >> 1) // avoid overflow when computing h
+		// i ≤ h < j
+		if x[h] < target {
+			i = h + 1 // preserves x[i-1] < target
+		} else {
+			j = h // preserves x[j] >= target
+		}
 	}
+	// i == j, x[i-1] < target, and x[j] (= x[i]) >= target  =>  answer is i.
+	return i, i < n && x[i] == target
 }
 
 // BinarySearchFunc works like BinarySearch, but uses a custom comparison
@@ -79,29 +86,21 @@
 // parameters: 0 if a == b, a negative number if a < b and a positive number if
 // a > b.
 func BinarySearchFunc[E any](x []E, target E, cmp func(E, E) int) (int, bool) {
-	pos := search(len(x), func(i int) bool { return cmp(x[i], target) >= 0 })
-	if pos >= len(x) || cmp(x[pos], target) != 0 {
-		return pos, false
-	} else {
-		return pos, true
-	}
-}
-
-func search(n int, f func(int) bool) int {
-	// Define f(-1) == false and f(n) == true.
-	// Invariant: f(i-1) == false, f(j) == true.
+	n := len(x)
+	// Define cmp(x[-1], target) < 0 and cmp(x[n], target) >= 0 .
+	// Invariant: cmp(x[i - 1], target) < 0, cmp(x[j], target) >= 0.
 	i, j := 0, n
 	for i < j {
 		h := int(uint(i+j) >> 1) // avoid overflow when computing h
 		// i ≤ h < j
-		if !f(h) {
-			i = h + 1 // preserves f(i-1) == false
+		if cmp(x[h], target) < 0 {
+			i = h + 1 // preserves cmp(x[i - 1], target) < 0
 		} else {
-			j = h // preserves f(j) == true
+			j = h // preserves cmp(x[j], target) >= 0
 		}
 	}
-	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
-	return i
+	// i == j, cmp(x[i-1], target) < 0, and cmp(x[j], target) (= cmp(x[i], target)) >= 0  =>  answer is i.
+	return i, i < n && cmp(x[i], target) == 0
 }
 
 type sortedHint int // hint for pdqsort when choosing the pivot
diff --git a/slices/sort_benchmark_test.go b/slices/sort_benchmark_test.go
index 88b753d..ee49f66 100644
--- a/slices/sort_benchmark_test.go
+++ b/slices/sort_benchmark_test.go
@@ -5,6 +5,7 @@
 package slices
 
 import (
+	"fmt"
 	"math/rand"
 	"sort"
 	"strings"
@@ -200,3 +201,38 @@
 		SortFunc(ss, lessFunc)
 	}
 }
+
+func BenchmarkBinarySearchFloats(b *testing.B) {
+	for _, size := range []int{16, 32, 64, 128, 512, 1024} {
+		b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
+			floats := make([]float64, size)
+			for i := range floats {
+				floats[i] = float64(i)
+			}
+			midpoint := len(floats) / 2
+			needle := (floats[midpoint] + floats[midpoint+1]) / 2
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				BinarySearch(floats, needle)
+			}
+		})
+	}
+}
+
+func BenchmarkBinarySearchFuncStruct(b *testing.B) {
+	for _, size := range []int{16, 32, 64, 128, 512, 1024} {
+		b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
+			structs := make([]*myStruct, size)
+			for i := range structs {
+				structs[i] = &myStruct{n: i}
+			}
+			midpoint := len(structs) / 2
+			needle := &myStruct{n: (structs[midpoint].n + structs[midpoint+1].n) / 2}
+			lessFunc := func(a, b *myStruct) int { return a.n - b.n }
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				BinarySearchFunc(structs, needle, lessFunc)
+			}
+		})
+	}
+}