slices: inline search into its callers
Constructing a lambda and calling the search function has shown to not
perform well, being on par with the non-generic sort.Search. Inlining
gives a major performance boost
```
~/exp$ benchstat gotip_master.bench gotip_inlined.bench
name old time/op new time/op delta
SearchFloats/Size16-4 20.1ns ± 6% 6.2ns ± 1% -69.24% (p=0.000
n=9+9)
SearchFloats/Size32-4 22.2ns ± 3% 7.2ns ± 1% -67.69% (p=0.000
n=9+9)
SearchFloats/Size64-4 25.8ns ± 6% 8.2ns ± 1% -68.36% (p=0.000
n=9+10)
SearchFloats/Size128-4 28.9ns ± 7% 9.2ns ± 0% -68.30% (p=0.000
n=10+10)
SearchFloats/Size512-4 34.8ns ± 6% 11.2ns ± 0% -67.83% (p=0.000
n=10+10)
SearchFloats/Size1024-4 36.9ns ± 3% 12.2ns ± 0% -66.99% (p=0.000
n=9+9)
```
The story is similar for BinarySearchFunc, even though less severe
```
~/exp$ benchstat gotip_master_structs.bench gotip_inlined_structs.bench
name old time/op new time/op delta
BinarySearchFuncStruct/Size16-4 28.9ns ± 2% 19.0ns ± 4% -34.18% (p=0.000 n=9+10)
BinarySearchFuncStruct/Size32-4 33.8ns ± 1% 22.0ns ± 5% -35.03% (p=0.000 n=9+10)
BinarySearchFuncStruct/Size64-4 39.0ns ± 1% 23.4ns ± 1% -39.98% (p=0.000 n=10+9)
BinarySearchFuncStruct/Size128-4 43.8ns ± 1% 26.0ns ± 0% -40.78% (p=0.000 n=9+8)
BinarySearchFuncStruct/Size512-4 54.0ns ± 0% 31.5ns ± 1% -41.58% (p=0.000 n=10+10)
BinarySearchFuncStruct/Size1024-4 59.0ns ± 0% 34.1ns ± 0% -42.30% (p=0.000 n=9+9)
```
Change-Id: Idf695f6415f7d1f9b69827305bb87061c0fc1e1b
GitHub-Last-Rev: 26e17ba6da5ddd6fadfb8264c51cf33b4d0efc3e
GitHub-Pull-Request: golang/exp#51
Reviewed-on: https://go-review.googlesource.com/c/exp/+/459037
Run-TryBot: Eli Bendersky <eliben@google.com>
Reviewed-by: Ian Lance Taylor <iant@google.com>
Reviewed-by: Eli Bendersky <eliben@google.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Nicolas Hillegeer <aktau@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Ian Lance Taylor <iant@google.com>
Reviewed-by: Keith Randall <khr@google.com>
diff --git a/slices/sort.go b/slices/sort.go
index 35a5d8f..c5e4a6a 100644
--- a/slices/sort.go
+++ b/slices/sort.go
@@ -62,15 +62,22 @@
// sort order; it also returns a bool saying whether the target is really found
// in the slice. The slice must be sorted in increasing order.
func BinarySearch[E constraints.Ordered](x []E, target E) (int, bool) {
- // search returns the leftmost position where f returns true, or len(x) if f
- // returns false for all x. This is the insertion position for target in x,
- // and could point to an element that's either == target or not.
- pos := search(len(x), func(i int) bool { return x[i] >= target })
- if pos >= len(x) || x[pos] != target {
- return pos, false
- } else {
- return pos, true
+ // Inlining is faster than calling BinarySearchFunc with a lambda.
+ n := len(x)
+ // Define x[-1] < target and x[n] >= target.
+ // Invariant: x[i-1] < target, x[j] >= target.
+ i, j := 0, n
+ for i < j {
+ h := int(uint(i+j) >> 1) // avoid overflow when computing h
+ // i ≤ h < j
+ if x[h] < target {
+ i = h + 1 // preserves x[i-1] < target
+ } else {
+ j = h // preserves x[j] >= target
+ }
}
+ // i == j, x[i-1] < target, and x[j] (= x[i]) >= target => answer is i.
+ return i, i < n && x[i] == target
}
// BinarySearchFunc works like BinarySearch, but uses a custom comparison
@@ -79,29 +86,21 @@
// parameters: 0 if a == b, a negative number if a < b and a positive number if
// a > b.
func BinarySearchFunc[E any](x []E, target E, cmp func(E, E) int) (int, bool) {
- pos := search(len(x), func(i int) bool { return cmp(x[i], target) >= 0 })
- if pos >= len(x) || cmp(x[pos], target) != 0 {
- return pos, false
- } else {
- return pos, true
- }
-}
-
-func search(n int, f func(int) bool) int {
- // Define f(-1) == false and f(n) == true.
- // Invariant: f(i-1) == false, f(j) == true.
+ n := len(x)
+ // Define cmp(x[-1], target) < 0 and cmp(x[n], target) >= 0 .
+ // Invariant: cmp(x[i - 1], target) < 0, cmp(x[j], target) >= 0.
i, j := 0, n
for i < j {
h := int(uint(i+j) >> 1) // avoid overflow when computing h
// i ≤ h < j
- if !f(h) {
- i = h + 1 // preserves f(i-1) == false
+ if cmp(x[h], target) < 0 {
+ i = h + 1 // preserves cmp(x[i - 1], target) < 0
} else {
- j = h // preserves f(j) == true
+ j = h // preserves cmp(x[j], target) >= 0
}
}
- // i == j, f(i-1) == false, and f(j) (= f(i)) == true => answer is i.
- return i
+ // i == j, cmp(x[i-1], target) < 0, and cmp(x[j], target) (= cmp(x[i], target)) >= 0 => answer is i.
+ return i, i < n && cmp(x[i], target) == 0
}
type sortedHint int // hint for pdqsort when choosing the pivot
diff --git a/slices/sort_benchmark_test.go b/slices/sort_benchmark_test.go
index 88b753d..ee49f66 100644
--- a/slices/sort_benchmark_test.go
+++ b/slices/sort_benchmark_test.go
@@ -5,6 +5,7 @@
package slices
import (
+ "fmt"
"math/rand"
"sort"
"strings"
@@ -200,3 +201,38 @@
SortFunc(ss, lessFunc)
}
}
+
+func BenchmarkBinarySearchFloats(b *testing.B) {
+ for _, size := range []int{16, 32, 64, 128, 512, 1024} {
+ b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
+ floats := make([]float64, size)
+ for i := range floats {
+ floats[i] = float64(i)
+ }
+ midpoint := len(floats) / 2
+ needle := (floats[midpoint] + floats[midpoint+1]) / 2
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ BinarySearch(floats, needle)
+ }
+ })
+ }
+}
+
+func BenchmarkBinarySearchFuncStruct(b *testing.B) {
+ for _, size := range []int{16, 32, 64, 128, 512, 1024} {
+ b.Run(fmt.Sprintf("Size%d", size), func(b *testing.B) {
+ structs := make([]*myStruct, size)
+ for i := range structs {
+ structs[i] = &myStruct{n: i}
+ }
+ midpoint := len(structs) / 2
+ needle := &myStruct{n: (structs[midpoint].n + structs[midpoint+1].n) / 2}
+ lessFunc := func(a, b *myStruct) int { return a.n - b.n }
+ b.ResetTimer()
+ for i := 0; i < b.N; i++ {
+ BinarySearchFunc(structs, needle, lessFunc)
+ }
+ })
+ }
+}