slices: initial implementation of sorting functions

Implements golang/go#47619 in the exp/slices package as a
testing ground prior to inclusion in the standard library.

Relies on the modified sorting function code generator proposed
in https://go-review.googlesource.com/c/go/+/353069 to
automatically generate the code of the sorting functions.

Benchmark comparing sort.Ints with the generic Sort function
added in this CL to sort a slice of int:

name           old time/op  new time/op  delta
Sort-8         12.0ms ± 1%   6.5ms ± 1%  -46.02%  (p=0.000 n=9+10)

Benchmark comparing sort.Sort with SortFunc to sort a slice of
struct pointers based on one field in the struct:

name           old time/op  new time/op  delta
SortStructs-8  18.6ms ± 2%  15.9ms ± 3%  -14.43%  (p=0.000 n=10+10)

Change-Id: Ic301aae7e5b8f99144e39b8a77fde897779588ed
Reviewed-on: https://go-review.googlesource.com/c/exp/+/378134
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Trust: Cody Oss <codyoss@google.com>
Trust: Jeremy Faller <jeremy@golang.org>
diff --git a/slices/sort.go b/slices/sort.go
new file mode 100644
index 0000000..64f334f
--- /dev/null
+++ b/slices/sort.go
@@ -0,0 +1,95 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slices
+
+import "constraints"
+
+// Sort sorts a slice of any ordered type in ascending order.
+func Sort[Elem constraints.Ordered](x []Elem) {
+	n := len(x)
+	quickSortOrdered(x, 0, n, maxDepth(n))
+}
+
+// Sort sorts the slice x in ascending order as determined by the less function.
+// This sort is not guaranteed to be stable.
+func SortFunc[Elem any](x []Elem, less func(a, b Elem) bool) {
+	n := len(x)
+	quickSortLessFunc(x, 0, n, maxDepth(n), less)
+}
+
+// SortStable sorts the slice x while keeping the original order of equal
+// elements, using less to compare elements.
+func SortStableFunc[Elem any](x []Elem, less func(a, b Elem) bool) {
+	stableLessFunc(x, len(x), less)
+}
+
+// IsSorted reports whether x is sorted in ascending order.
+func IsSorted[Elem constraints.Ordered](x []Elem) bool {
+	for i := len(x) - 1; i > 0; i-- {
+		if x[i] < x[i-1] {
+			return false
+		}
+	}
+	return true
+}
+
+// IsSortedFunc reports whether x is sorted in ascending order, with less as the
+// comparison function.
+func IsSortedFunc[Elem any](x []Elem, less func(a, b Elem) bool) bool {
+	for i := len(x) - 1; i > 0; i-- {
+		if less(x[i], x[i-1]) {
+			return false
+		}
+	}
+	return true
+}
+
+// BinarySearch searches for target in a sorted slice and returns the smallest
+// index at which target is found. If the target is not found, the index at
+// which it could be inserted into the slice is returned; therefore, if the
+// intention is to find target itself a separate check for equality with the
+// element at the returned index is required.
+func BinarySearch[Elem constraints.Ordered](x []Elem, target Elem) int {
+	return search(len(x), func(i int) bool { return x[i] >= target })
+}
+
+// BinarySearchFunc uses binary search to find and return the smallest index i
+// in [0, n) at which ok(i) is true, assuming that on the range [0, n),
+// ok(i) == true implies ok(i+1) == true. That is, BinarySearchFunc requires
+// that ok is false for some (possibly empty) prefix of the input range [0, n)
+// and then true for the (possibly empty) remainder; BinarySearchFunc returns
+// the first true index. If there is no such index, BinarySearchFunc returns n.
+// (Note that the "not found" return value is not -1 as in, for instance,
+// strings.Index.) Search calls ok(i) only for i in the range [0, n).
+func BinarySearchFunc[Elem any](x []Elem, ok func(Elem) bool) int {
+	return search(len(x), func(i int) bool { return ok(x[i]) })
+}
+
+// maxDepth returns a threshold at which quicksort should switch
+// to heapsort. It returns 2*ceil(lg(n+1)).
+func maxDepth(n int) int {
+	var depth int
+	for i := n; i > 0; i >>= 1 {
+		depth++
+	}
+	return depth * 2
+}
+
+func search(n int, f func(int) bool) int {
+	// Define f(-1) == false and f(n) == true.
+	// Invariant: f(i-1) == false, f(j) == true.
+	i, j := 0, n
+	for i < j {
+		h := int(uint(i+j) >> 1) // avoid overflow when computing h
+		// i ≤ h < j
+		if !f(h) {
+			i = h + 1 // preserves f(i-1) == false
+		} else {
+			j = h // preserves f(j) == true
+		}
+	}
+	// i == j, f(i-1) == false, and f(j) (= f(i)) == true  =>  answer is i.
+	return i
+}
diff --git a/slices/sort_benchmark_test.go b/slices/sort_benchmark_test.go
new file mode 100644
index 0000000..5d363af
--- /dev/null
+++ b/slices/sort_benchmark_test.go
@@ -0,0 +1,116 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slices
+
+import (
+	"math/rand"
+	"sort"
+	"testing"
+)
+
+// These benchmarks compare sorting a large slice of int with sort.Ints vs.
+// slices.Sort
+func makeRandomInts(n int) []int {
+	rand.Seed(42)
+	ints := make([]int, n)
+	for i := 0; i < n; i++ {
+		ints[i] = rand.Intn(n)
+	}
+	return ints
+}
+
+const N = 100_000
+
+func BenchmarkSortInts(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		ints := makeRandomInts(N)
+		b.StartTimer()
+		sort.Ints(ints)
+	}
+}
+
+func BenchmarkSlicesSort(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		ints := makeRandomInts(N)
+		b.StartTimer()
+		Sort(ints)
+	}
+}
+
+// Since we're benchmarking these sorts against each other, make sure that they
+// generate similar results.
+func TestIntSorts(t *testing.T) {
+	ints := makeRandomInts(200)
+	ints2 := Clone(ints)
+
+	sort.Ints(ints)
+	Sort(ints2)
+
+	for i := range ints {
+		if ints[i] != ints2[i] {
+			t.Fatalf("ints2 mismatch at %d; %d != %d", i, ints[i], ints2[i])
+		}
+	}
+}
+
+// These benchmarks compare sorting a slice of structs with sort.Sort vs.
+// slices.SortFunc.
+type myStruct struct {
+	a, b, c, d string
+	n          int
+}
+
+type myStructs []*myStruct
+
+func (s myStructs) Len() int           { return len(s) }
+func (s myStructs) Less(i, j int) bool { return s[i].n < s[j].n }
+func (s myStructs) Swap(i, j int)      { s[i], s[j] = s[j], s[i] }
+
+func makeRandomStructs(n int) myStructs {
+	rand.Seed(42)
+	structs := make([]*myStruct, n)
+	for i := 0; i < n; i++ {
+		structs[i] = &myStruct{n: rand.Intn(n)}
+	}
+	return structs
+}
+
+func TestStructSorts(t *testing.T) {
+	ss := makeRandomStructs(200)
+	ss2 := make([]*myStruct, len(ss))
+	for i := range ss {
+		ss2[i] = &myStruct{n: ss[i].n}
+	}
+
+	sort.Sort(ss)
+	SortFunc(ss2, func(a, b *myStruct) bool { return a.n < b.n })
+
+	for i := range ss {
+		if *ss[i] != *ss2[i] {
+			t.Fatalf("ints2 mismatch at %d; %v != %v", i, *ss[i], *ss2[i])
+		}
+	}
+}
+
+func BenchmarkSortStructs(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		ss := makeRandomStructs(N)
+		b.StartTimer()
+		sort.Sort(ss)
+	}
+}
+
+func BenchmarkSortFuncStructs(b *testing.B) {
+	lessFunc := func(a, b *myStruct) bool { return a.n < b.n }
+	for i := 0; i < b.N; i++ {
+		b.StopTimer()
+		ss := makeRandomStructs(N)
+		b.StartTimer()
+		SortFunc(ss, lessFunc)
+	}
+}
diff --git a/slices/sort_test.go b/slices/sort_test.go
new file mode 100644
index 0000000..4f3145a
--- /dev/null
+++ b/slices/sort_test.go
@@ -0,0 +1,182 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slices
+
+import (
+	"math"
+	"math/rand"
+	"testing"
+)
+
+var ints = [...]int{74, 59, 238, -784, 9845, 959, 905, 0, 0, 42, 7586, -5467984, 7586}
+var float64s = [...]float64{74.3, 59.0, math.Inf(1), 238.2, -784.0, 2.3, math.NaN(), math.NaN(), math.Inf(-1), 9845.768, -959.7485, 905, 7.8, 7.8}
+var strs = [...]string{"", "Hello", "foo", "bar", "foo", "f00", "%*&^*&^&", "***"}
+
+func TestSortIntSlice(t *testing.T) {
+	data := ints[:]
+	Sort(data)
+	if !IsSorted(data) {
+		t.Errorf("sorted %v", ints)
+		t.Errorf("   got %v", data)
+	}
+}
+
+func TestSortFuncIntSlice(t *testing.T) {
+	data := ints[:]
+	SortFunc(data, func(a, b int) bool { return a < b })
+	if !IsSorted(data) {
+		t.Errorf("sorted %v", ints)
+		t.Errorf("   got %v", data)
+	}
+}
+
+func TestSortFloat64Slice(t *testing.T) {
+	data := float64s[:]
+	Sort(data)
+	if !IsSorted(data) {
+		t.Errorf("sorted %v", float64s)
+		t.Errorf("   got %v", data)
+	}
+}
+
+func TestSortStringSlice(t *testing.T) {
+	data := strs[:]
+	Sort(data)
+	if !IsSorted(data) {
+		t.Errorf("sorted %v", strs)
+		t.Errorf("   got %v", data)
+	}
+}
+
+func TestSortLarge_Random(t *testing.T) {
+	n := 1000000
+	if testing.Short() {
+		n /= 100
+	}
+	data := make([]int, n)
+	for i := 0; i < len(data); i++ {
+		data[i] = rand.Intn(100)
+	}
+	if IsSorted(data) {
+		t.Fatalf("terrible rand.rand")
+	}
+	Sort(data)
+	if !IsSorted(data) {
+		t.Errorf("sort didn't sort - 1M ints")
+	}
+}
+
+type intPair struct {
+	a, b int
+}
+
+type intPairs []intPair
+
+// Pairs compare on a only.
+func intPairLess(x, y intPair) bool {
+	return x.a < y.a
+}
+
+// Record initial order in B.
+func (d intPairs) initB() {
+	for i := range d {
+		d[i].b = i
+	}
+}
+
+// InOrder checks if a-equal elements were not reordered.
+func (d intPairs) inOrder() bool {
+	lastA, lastB := -1, 0
+	for i := 0; i < len(d); i++ {
+		if lastA != d[i].a {
+			lastA = d[i].a
+			lastB = d[i].b
+			continue
+		}
+		if d[i].b <= lastB {
+			return false
+		}
+		lastB = d[i].b
+	}
+	return true
+}
+
+func TestStability(t *testing.T) {
+	n, m := 100000, 1000
+	if testing.Short() {
+		n, m = 1000, 100
+	}
+	data := make(intPairs, n)
+
+	// random distribution
+	for i := 0; i < len(data); i++ {
+		data[i].a = rand.Intn(m)
+	}
+	if IsSortedFunc(data, intPairLess) {
+		t.Fatalf("terrible rand.rand")
+	}
+	data.initB()
+	SortStableFunc(data, intPairLess)
+	if !IsSortedFunc(data, intPairLess) {
+		t.Errorf("Stable didn't sort %d ints", n)
+	}
+	if !data.inOrder() {
+		t.Errorf("Stable wasn't stable on %d ints", n)
+	}
+
+	// already sorted
+	data.initB()
+	SortStableFunc(data, intPairLess)
+	if !IsSortedFunc(data, intPairLess) {
+		t.Errorf("Stable shuffled sorted %d ints (order)", n)
+	}
+	if !data.inOrder() {
+		t.Errorf("Stable shuffled sorted %d ints (stability)", n)
+	}
+
+	// sorted reversed
+	for i := 0; i < len(data); i++ {
+		data[i].a = len(data) - i
+	}
+	data.initB()
+	SortStableFunc(data, intPairLess)
+	if !IsSortedFunc(data, intPairLess) {
+		t.Errorf("Stable didn't sort %d ints", n)
+	}
+	if !data.inOrder() {
+		t.Errorf("Stable wasn't stable on %d ints", n)
+	}
+}
+
+func TestBinarySearch(t *testing.T) {
+	data := []string{"aa", "ad", "ca", "xy"}
+	tests := []struct {
+		target string
+		want   int
+	}{
+		{"aa", 0},
+		{"ab", 1},
+		{"ad", 1},
+		{"ax", 2},
+		{"ca", 2},
+		{"cc", 3},
+		{"dd", 3},
+		{"xy", 3},
+		{"zz", 4},
+	}
+	for _, tt := range tests {
+		t.Run(tt.target, func(t *testing.T) {
+			i := BinarySearch(data, tt.target)
+			if i != tt.want {
+				t.Errorf("BinarySearch want %d, got %d", tt.want, i)
+			}
+
+			j := BinarySearchFunc(data, func(s string) bool { return s >= tt.target })
+			if j != tt.want {
+				t.Errorf("BinarySearchFunc want %d, got %d", tt.want, j)
+			}
+		})
+	}
+}
diff --git a/slices/zsortfunc.go b/slices/zsortfunc.go
new file mode 100644
index 0000000..82f156f
--- /dev/null
+++ b/slices/zsortfunc.go
@@ -0,0 +1,342 @@
+// Code generated by gen_sort_variants.go; DO NOT EDIT.
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slices
+
+// insertionSortLessFunc sorts data[a:b] using insertion sort.
+func insertionSortLessFunc[Elem any](data []Elem, a, b int, less func(a, b Elem) bool) {
+	for i := a + 1; i < b; i++ {
+		for j := i; j > a && less(data[j], data[j-1]); j-- {
+			data[j], data[j-1] = data[j-1], data[j]
+		}
+	}
+}
+
+// siftDownLessFunc implements the heap property on data[lo:hi].
+// first is an offset into the array where the root of the heap lies.
+func siftDownLessFunc[Elem any](data []Elem, lo, hi, first int, less func(a, b Elem) bool) {
+	root := lo
+	for {
+		child := 2*root + 1
+		if child >= hi {
+			break
+		}
+		if child+1 < hi && less(data[first+child], data[first+child+1]) {
+			child++
+		}
+		if !less(data[first+root], data[first+child]) {
+			return
+		}
+		data[first+root], data[first+child] = data[first+child], data[first+root]
+		root = child
+	}
+}
+
+func heapSortLessFunc[Elem any](data []Elem, a, b int, less func(a, b Elem) bool) {
+	first := a
+	lo := 0
+	hi := b - a
+
+	// Build heap with greatest element at top.
+	for i := (hi - 1) / 2; i >= 0; i-- {
+		siftDownLessFunc(data, i, hi, first, less)
+	}
+
+	// Pop elements, largest first, into end of data.
+	for i := hi - 1; i >= 0; i-- {
+		data[first], data[first+i] = data[first+i], data[first]
+		siftDownLessFunc(data, lo, i, first, less)
+	}
+}
+
+// Quicksort, loosely following Bentley and McIlroy,
+// "Engineering a Sort Function" SP&E November 1993.
+
+// medianOfThreeLessFunc moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
+func medianOfThreeLessFunc[Elem any](data []Elem, m1, m0, m2 int, less func(a, b Elem) bool) {
+	// sort 3 elements
+	if less(data[m1], data[m0]) {
+		data[m1], data[m0] = data[m0], data[m1]
+	}
+	// data[m0] <= data[m1]
+	if less(data[m2], data[m1]) {
+		data[m2], data[m1] = data[m1], data[m2]
+		// data[m0] <= data[m2] && data[m1] < data[m2]
+		if less(data[m1], data[m0]) {
+			data[m1], data[m0] = data[m0], data[m1]
+		}
+	}
+	// now data[m0] <= data[m1] <= data[m2]
+}
+
+func swapRangeLessFunc[Elem any](data []Elem, a, b, n int, less func(a, b Elem) bool) {
+	for i := 0; i < n; i++ {
+		data[a+i], data[b+i] = data[b+i], data[a+i]
+	}
+}
+
+func doPivotLessFunc[Elem any](data []Elem, lo, hi int, less func(a, b Elem) bool) (midlo, midhi int) {
+	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
+	if hi-lo > 40 {
+		// Tukey's "Ninther" median of three medians of three.
+		s := (hi - lo) / 8
+		medianOfThreeLessFunc(data, lo, lo+s, lo+2*s, less)
+		medianOfThreeLessFunc(data, m, m-s, m+s, less)
+		medianOfThreeLessFunc(data, hi-1, hi-1-s, hi-1-2*s, less)
+	}
+	medianOfThreeLessFunc(data, lo, m, hi-1, less)
+
+	// Invariants are:
+	//	data[lo] = pivot (set up by ChoosePivot)
+	//	data[lo < i < a] < pivot
+	//	data[a <= i < b] <= pivot
+	//	data[b <= i < c] unexamined
+	//	data[c <= i < hi-1] > pivot
+	//	data[hi-1] >= pivot
+	pivot := lo
+	a, c := lo+1, hi-1
+
+	for ; a < c && less(data[a], data[pivot]); a++ {
+	}
+	b := a
+	for {
+		for ; b < c && !less(data[pivot], data[b]); b++ { // data[b] <= pivot
+		}
+		for ; b < c && less(data[pivot], data[c-1]); c-- { // data[c-1] > pivot
+		}
+		if b >= c {
+			break
+		}
+		// data[b] > pivot; data[c-1] <= pivot
+		data[b], data[c-1] = data[c-1], data[b]
+		b++
+		c--
+	}
+	// If hi-c<3 then there are duplicates (by property of median of nine).
+	// Let's be a bit more conservative, and set border to 5.
+	protect := hi-c < 5
+	if !protect && hi-c < (hi-lo)/4 {
+		// Lets test some points for equality to pivot
+		dups := 0
+		if !less(data[pivot], data[hi-1]) { // data[hi-1] = pivot
+			data[c], data[hi-1] = data[hi-1], data[c]
+			c++
+			dups++
+		}
+		if !less(data[b-1], data[pivot]) { // data[b-1] = pivot
+			b--
+			dups++
+		}
+		// m-lo = (hi-lo)/2 > 6
+		// b-lo > (hi-lo)*3/4-1 > 8
+		// ==> m < b ==> data[m] <= pivot
+		if !less(data[m], data[pivot]) { // data[m] = pivot
+			data[m], data[b-1] = data[b-1], data[m]
+			b--
+			dups++
+		}
+		// if at least 2 points are equal to pivot, assume skewed distribution
+		protect = dups > 1
+	}
+	if protect {
+		// Protect against a lot of duplicates
+		// Add invariant:
+		//	data[a <= i < b] unexamined
+		//	data[b <= i < c] = pivot
+		for {
+			for ; a < b && !less(data[b-1], data[pivot]); b-- { // data[b] == pivot
+			}
+			for ; a < b && less(data[a], data[pivot]); a++ { // data[a] < pivot
+			}
+			if a >= b {
+				break
+			}
+			// data[a] == pivot; data[b-1] < pivot
+			data[a], data[b-1] = data[b-1], data[a]
+			a++
+			b--
+		}
+	}
+	// Swap pivot into middle
+	data[pivot], data[b-1] = data[b-1], data[pivot]
+	return b - 1, c
+}
+
+func quickSortLessFunc[Elem any](data []Elem, a, b, maxDepth int, less func(a, b Elem) bool) {
+	for b-a > 12 { // Use ShellSort for slices <= 12 elements
+		if maxDepth == 0 {
+			heapSortLessFunc(data, a, b, less)
+			return
+		}
+		maxDepth--
+		mlo, mhi := doPivotLessFunc(data, a, b, less)
+		// Avoiding recursion on the larger subproblem guarantees
+		// a stack depth of at most lg(b-a).
+		if mlo-a < b-mhi {
+			quickSortLessFunc(data, a, mlo, maxDepth, less)
+			a = mhi // i.e., quickSortLessFunc(data, mhi, b)
+		} else {
+			quickSortLessFunc(data, mhi, b, maxDepth, less)
+			b = mlo // i.e., quickSortLessFunc(data, a, mlo)
+		}
+	}
+	if b-a > 1 {
+		// Do ShellSort pass with gap 6
+		// It could be written in this simplified form cause b-a <= 12
+		for i := a + 6; i < b; i++ {
+			if less(data[i], data[i-6]) {
+				data[i], data[i-6] = data[i-6], data[i]
+			}
+		}
+		insertionSortLessFunc(data, a, b, less)
+	}
+}
+
+func stableLessFunc[Elem any](data []Elem, n int, less func(a, b Elem) bool) {
+	blockSize := 20 // must be > 0
+	a, b := 0, blockSize
+	for b <= n {
+		insertionSortLessFunc(data, a, b, less)
+		a = b
+		b += blockSize
+	}
+	insertionSortLessFunc(data, a, n, less)
+
+	for blockSize < n {
+		a, b = 0, 2*blockSize
+		for b <= n {
+			symMergeLessFunc(data, a, a+blockSize, b, less)
+			a = b
+			b += 2 * blockSize
+		}
+		if m := a + blockSize; m < n {
+			symMergeLessFunc(data, a, m, n, less)
+		}
+		blockSize *= 2
+	}
+}
+
+// symMergeLessFunc merges the two sorted subsequences data[a:m] and data[m:b] using
+// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
+// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
+// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
+// Computer Science, pages 714-723. Springer, 2004.
+//
+// Let M = m-a and N = b-n. Wolog M < N.
+// The recursion depth is bound by ceil(log(N+M)).
+// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
+// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
+//
+// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
+// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
+// in the paper carries through for Swap operations, especially as the block
+// swapping rotate uses only O(M+N) Swaps.
+//
+// symMerge assumes non-degenerate arguments: a < m && m < b.
+// Having the caller check this condition eliminates many leaf recursion calls,
+// which improves performance.
+func symMergeLessFunc[Elem any](data []Elem, a, m, b int, less func(a, b Elem) bool) {
+	// Avoid unnecessary recursions of symMerge
+	// by direct insertion of data[a] into data[m:b]
+	// if data[a:m] only contains one element.
+	if m-a == 1 {
+		// Use binary search to find the lowest index i
+		// such that data[i] >= data[a] for m <= i < b.
+		// Exit the search loop with i == b in case no such index exists.
+		i := m
+		j := b
+		for i < j {
+			h := int(uint(i+j) >> 1)
+			if less(data[h], data[a]) {
+				i = h + 1
+			} else {
+				j = h
+			}
+		}
+		// Swap values until data[a] reaches the position before i.
+		for k := a; k < i-1; k++ {
+			data[k], data[k+1] = data[k+1], data[k]
+		}
+		return
+	}
+
+	// Avoid unnecessary recursions of symMerge
+	// by direct insertion of data[m] into data[a:m]
+	// if data[m:b] only contains one element.
+	if b-m == 1 {
+		// Use binary search to find the lowest index i
+		// such that data[i] > data[m] for a <= i < m.
+		// Exit the search loop with i == m in case no such index exists.
+		i := a
+		j := m
+		for i < j {
+			h := int(uint(i+j) >> 1)
+			if !less(data[m], data[h]) {
+				i = h + 1
+			} else {
+				j = h
+			}
+		}
+		// Swap values until data[m] reaches the position i.
+		for k := m; k > i; k-- {
+			data[k], data[k-1] = data[k-1], data[k]
+		}
+		return
+	}
+
+	mid := int(uint(a+b) >> 1)
+	n := mid + m
+	var start, r int
+	if m > mid {
+		start = n - b
+		r = mid
+	} else {
+		start = a
+		r = m
+	}
+	p := n - 1
+
+	for start < r {
+		c := int(uint(start+r) >> 1)
+		if !less(data[p-c], data[c]) {
+			start = c + 1
+		} else {
+			r = c
+		}
+	}
+
+	end := n - start
+	if start < m && m < end {
+		rotateLessFunc(data, start, m, end, less)
+	}
+	if a < start && start < mid {
+		symMergeLessFunc(data, a, start, mid, less)
+	}
+	if mid < end && end < b {
+		symMergeLessFunc(data, mid, end, b, less)
+	}
+}
+
+// rotateLessFunc rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
+// Data of the form 'x u v y' is changed to 'x v u y'.
+// rotate performs at most b-a many calls to data.Swap,
+// and it assumes non-degenerate arguments: a < m && m < b.
+func rotateLessFunc[Elem any](data []Elem, a, m, b int, less func(a, b Elem) bool) {
+	i := m - a
+	j := b - m
+
+	for i != j {
+		if i > j {
+			swapRangeLessFunc(data, m-i, m, j, less)
+			i -= j
+		} else {
+			swapRangeLessFunc(data, m-i, m+j-i, i, less)
+			j -= i
+		}
+	}
+	// i == j
+	swapRangeLessFunc(data, m-i, m, i, less)
+}
diff --git a/slices/zsortordered.go b/slices/zsortordered.go
new file mode 100644
index 0000000..1667de0
--- /dev/null
+++ b/slices/zsortordered.go
@@ -0,0 +1,344 @@
+// Code generated by gen_sort_variants.go; DO NOT EDIT.
+
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slices
+
+import "constraints"
+
+// insertionSortOrdered sorts data[a:b] using insertion sort.
+func insertionSortOrdered[Elem constraints.Ordered](data []Elem, a, b int) {
+	for i := a + 1; i < b; i++ {
+		for j := i; j > a && (data[j] < data[j-1]); j-- {
+			data[j], data[j-1] = data[j-1], data[j]
+		}
+	}
+}
+
+// siftDownOrdered implements the heap property on data[lo:hi].
+// first is an offset into the array where the root of the heap lies.
+func siftDownOrdered[Elem constraints.Ordered](data []Elem, lo, hi, first int) {
+	root := lo
+	for {
+		child := 2*root + 1
+		if child >= hi {
+			break
+		}
+		if child+1 < hi && (data[first+child] < data[first+child+1]) {
+			child++
+		}
+		if !(data[first+root] < data[first+child]) {
+			return
+		}
+		data[first+root], data[first+child] = data[first+child], data[first+root]
+		root = child
+	}
+}
+
+func heapSortOrdered[Elem constraints.Ordered](data []Elem, a, b int) {
+	first := a
+	lo := 0
+	hi := b - a
+
+	// Build heap with greatest element at top.
+	for i := (hi - 1) / 2; i >= 0; i-- {
+		siftDownOrdered(data, i, hi, first)
+	}
+
+	// Pop elements, largest first, into end of data.
+	for i := hi - 1; i >= 0; i-- {
+		data[first], data[first+i] = data[first+i], data[first]
+		siftDownOrdered(data, lo, i, first)
+	}
+}
+
+// Quicksort, loosely following Bentley and McIlroy,
+// "Engineering a Sort Function" SP&E November 1993.
+
+// medianOfThreeOrdered moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
+func medianOfThreeOrdered[Elem constraints.Ordered](data []Elem, m1, m0, m2 int) {
+	// sort 3 elements
+	if data[m1] < data[m0] {
+		data[m1], data[m0] = data[m0], data[m1]
+	}
+	// data[m0] <= data[m1]
+	if data[m2] < data[m1] {
+		data[m2], data[m1] = data[m1], data[m2]
+		// data[m0] <= data[m2] && data[m1] < data[m2]
+		if data[m1] < data[m0] {
+			data[m1], data[m0] = data[m0], data[m1]
+		}
+	}
+	// now data[m0] <= data[m1] <= data[m2]
+}
+
+func swapRangeOrdered[Elem constraints.Ordered](data []Elem, a, b, n int) {
+	for i := 0; i < n; i++ {
+		data[a+i], data[b+i] = data[b+i], data[a+i]
+	}
+}
+
+func doPivotOrdered[Elem constraints.Ordered](data []Elem, lo, hi int) (midlo, midhi int) {
+	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
+	if hi-lo > 40 {
+		// Tukey's "Ninther" median of three medians of three.
+		s := (hi - lo) / 8
+		medianOfThreeOrdered(data, lo, lo+s, lo+2*s)
+		medianOfThreeOrdered(data, m, m-s, m+s)
+		medianOfThreeOrdered(data, hi-1, hi-1-s, hi-1-2*s)
+	}
+	medianOfThreeOrdered(data, lo, m, hi-1)
+
+	// Invariants are:
+	//	data[lo] = pivot (set up by ChoosePivot)
+	//	data[lo < i < a] < pivot
+	//	data[a <= i < b] <= pivot
+	//	data[b <= i < c] unexamined
+	//	data[c <= i < hi-1] > pivot
+	//	data[hi-1] >= pivot
+	pivot := lo
+	a, c := lo+1, hi-1
+
+	for ; a < c && (data[a] < data[pivot]); a++ {
+	}
+	b := a
+	for {
+		for ; b < c && !(data[pivot] < data[b]); b++ { // data[b] <= pivot
+		}
+		for ; b < c && (data[pivot] < data[c-1]); c-- { // data[c-1] > pivot
+		}
+		if b >= c {
+			break
+		}
+		// data[b] > pivot; data[c-1] <= pivot
+		data[b], data[c-1] = data[c-1], data[b]
+		b++
+		c--
+	}
+	// If hi-c<3 then there are duplicates (by property of median of nine).
+	// Let's be a bit more conservative, and set border to 5.
+	protect := hi-c < 5
+	if !protect && hi-c < (hi-lo)/4 {
+		// Lets test some points for equality to pivot
+		dups := 0
+		if !(data[pivot] < data[hi-1]) { // data[hi-1] = pivot
+			data[c], data[hi-1] = data[hi-1], data[c]
+			c++
+			dups++
+		}
+		if !(data[b-1] < data[pivot]) { // data[b-1] = pivot
+			b--
+			dups++
+		}
+		// m-lo = (hi-lo)/2 > 6
+		// b-lo > (hi-lo)*3/4-1 > 8
+		// ==> m < b ==> data[m] <= pivot
+		if !(data[m] < data[pivot]) { // data[m] = pivot
+			data[m], data[b-1] = data[b-1], data[m]
+			b--
+			dups++
+		}
+		// if at least 2 points are equal to pivot, assume skewed distribution
+		protect = dups > 1
+	}
+	if protect {
+		// Protect against a lot of duplicates
+		// Add invariant:
+		//	data[a <= i < b] unexamined
+		//	data[b <= i < c] = pivot
+		for {
+			for ; a < b && !(data[b-1] < data[pivot]); b-- { // data[b] == pivot
+			}
+			for ; a < b && (data[a] < data[pivot]); a++ { // data[a] < pivot
+			}
+			if a >= b {
+				break
+			}
+			// data[a] == pivot; data[b-1] < pivot
+			data[a], data[b-1] = data[b-1], data[a]
+			a++
+			b--
+		}
+	}
+	// Swap pivot into middle
+	data[pivot], data[b-1] = data[b-1], data[pivot]
+	return b - 1, c
+}
+
+func quickSortOrdered[Elem constraints.Ordered](data []Elem, a, b, maxDepth int) {
+	for b-a > 12 { // Use ShellSort for slices <= 12 elements
+		if maxDepth == 0 {
+			heapSortOrdered(data, a, b)
+			return
+		}
+		maxDepth--
+		mlo, mhi := doPivotOrdered(data, a, b)
+		// Avoiding recursion on the larger subproblem guarantees
+		// a stack depth of at most lg(b-a).
+		if mlo-a < b-mhi {
+			quickSortOrdered(data, a, mlo, maxDepth)
+			a = mhi // i.e., quickSortOrdered(data, mhi, b)
+		} else {
+			quickSortOrdered(data, mhi, b, maxDepth)
+			b = mlo // i.e., quickSortOrdered(data, a, mlo)
+		}
+	}
+	if b-a > 1 {
+		// Do ShellSort pass with gap 6
+		// It could be written in this simplified form cause b-a <= 12
+		for i := a + 6; i < b; i++ {
+			if data[i] < data[i-6] {
+				data[i], data[i-6] = data[i-6], data[i]
+			}
+		}
+		insertionSortOrdered(data, a, b)
+	}
+}
+
+func stableOrdered[Elem constraints.Ordered](data []Elem, n int) {
+	blockSize := 20 // must be > 0
+	a, b := 0, blockSize
+	for b <= n {
+		insertionSortOrdered(data, a, b)
+		a = b
+		b += blockSize
+	}
+	insertionSortOrdered(data, a, n)
+
+	for blockSize < n {
+		a, b = 0, 2*blockSize
+		for b <= n {
+			symMergeOrdered(data, a, a+blockSize, b)
+			a = b
+			b += 2 * blockSize
+		}
+		if m := a + blockSize; m < n {
+			symMergeOrdered(data, a, m, n)
+		}
+		blockSize *= 2
+	}
+}
+
+// symMergeOrdered merges the two sorted subsequences data[a:m] and data[m:b] using
+// the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
+// Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
+// Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
+// Computer Science, pages 714-723. Springer, 2004.
+//
+// Let M = m-a and N = b-n. Wolog M < N.
+// The recursion depth is bound by ceil(log(N+M)).
+// The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
+// The algorithm needs O((M+N)*log(M)) calls to data.Swap.
+//
+// The paper gives O((M+N)*log(M)) as the number of assignments assuming a
+// rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
+// in the paper carries through for Swap operations, especially as the block
+// swapping rotate uses only O(M+N) Swaps.
+//
+// symMerge assumes non-degenerate arguments: a < m && m < b.
+// Having the caller check this condition eliminates many leaf recursion calls,
+// which improves performance.
+func symMergeOrdered[Elem constraints.Ordered](data []Elem, a, m, b int) {
+	// Avoid unnecessary recursions of symMerge
+	// by direct insertion of data[a] into data[m:b]
+	// if data[a:m] only contains one element.
+	if m-a == 1 {
+		// Use binary search to find the lowest index i
+		// such that data[i] >= data[a] for m <= i < b.
+		// Exit the search loop with i == b in case no such index exists.
+		i := m
+		j := b
+		for i < j {
+			h := int(uint(i+j) >> 1)
+			if data[h] < data[a] {
+				i = h + 1
+			} else {
+				j = h
+			}
+		}
+		// Swap values until data[a] reaches the position before i.
+		for k := a; k < i-1; k++ {
+			data[k], data[k+1] = data[k+1], data[k]
+		}
+		return
+	}
+
+	// Avoid unnecessary recursions of symMerge
+	// by direct insertion of data[m] into data[a:m]
+	// if data[m:b] only contains one element.
+	if b-m == 1 {
+		// Use binary search to find the lowest index i
+		// such that data[i] > data[m] for a <= i < m.
+		// Exit the search loop with i == m in case no such index exists.
+		i := a
+		j := m
+		for i < j {
+			h := int(uint(i+j) >> 1)
+			if !(data[m] < data[h]) {
+				i = h + 1
+			} else {
+				j = h
+			}
+		}
+		// Swap values until data[m] reaches the position i.
+		for k := m; k > i; k-- {
+			data[k], data[k-1] = data[k-1], data[k]
+		}
+		return
+	}
+
+	mid := int(uint(a+b) >> 1)
+	n := mid + m
+	var start, r int
+	if m > mid {
+		start = n - b
+		r = mid
+	} else {
+		start = a
+		r = m
+	}
+	p := n - 1
+
+	for start < r {
+		c := int(uint(start+r) >> 1)
+		if !(data[p-c] < data[c]) {
+			start = c + 1
+		} else {
+			r = c
+		}
+	}
+
+	end := n - start
+	if start < m && m < end {
+		rotateOrdered(data, start, m, end)
+	}
+	if a < start && start < mid {
+		symMergeOrdered(data, a, start, mid)
+	}
+	if mid < end && end < b {
+		symMergeOrdered(data, mid, end, b)
+	}
+}
+
+// rotateOrdered rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
+// Data of the form 'x u v y' is changed to 'x v u y'.
+// rotate performs at most b-a many calls to data.Swap,
+// and it assumes non-degenerate arguments: a < m && m < b.
+func rotateOrdered[Elem constraints.Ordered](data []Elem, a, m, b int) {
+	i := m - a
+	j := b - m
+
+	for i != j {
+		if i > j {
+			swapRangeOrdered(data, m-i, m, j)
+			i -= j
+		} else {
+			swapRangeOrdered(data, m-i, m+j-i, i)
+			j -= i
+		}
+	}
+	// i == j
+	swapRangeOrdered(data, m-i, m, i)
+}