| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package sort provides primitives for sorting slices and user-defined |
| // collections. |
| package sort |
| |
| // A type, typically a collection, that satisfies sort.Interface can be |
| // sorted by the routines in this package. The methods require that the |
| // elements of the collection be enumerated by an integer index. |
| type Interface interface { |
| // Len is the number of elements in the collection. |
| Len() int |
| // Less reports whether the element with |
| // index i should sort before the element with index j. |
| Less(i, j int) bool |
| // Swap swaps the elements with indexes i and j. |
| Swap(i, j int) |
| } |
| |
| func min(a, b int) int { |
| if a < b { |
| return a |
| } |
| return b |
| } |
| |
| // Insertion sort |
| func insertionSort(data Interface, a, b int) { |
| for i := a + 1; i < b; i++ { |
| for j := i; j > a && data.Less(j, j-1); j-- { |
| data.Swap(j, j-1) |
| } |
| } |
| } |
| |
| // siftDown implements the heap property on data[lo, hi). |
| // first is an offset into the array where the root of the heap lies. |
| func siftDown(data Interface, lo, hi, first int) { |
| root := lo |
| for { |
| child := 2*root + 1 |
| if child >= hi { |
| break |
| } |
| if child+1 < hi && data.Less(first+child, first+child+1) { |
| child++ |
| } |
| if !data.Less(first+root, first+child) { |
| return |
| } |
| data.Swap(first+root, first+child) |
| root = child |
| } |
| } |
| |
| func heapSort(data Interface, a, b int) { |
| first := a |
| lo := 0 |
| hi := b - a |
| |
| // Build heap with greatest element at top. |
| for i := (hi - 1) / 2; i >= 0; i-- { |
| siftDown(data, i, hi, first) |
| } |
| |
| // Pop elements, largest first, into end of data. |
| for i := hi - 1; i >= 0; i-- { |
| data.Swap(first, first+i) |
| siftDown(data, lo, i, first) |
| } |
| } |
| |
| // Quicksort, following Bentley and McIlroy, |
| // ``Engineering a Sort Function,'' SP&E November 1993. |
| |
| // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. |
| func medianOfThree(data Interface, m1, m0, m2 int) { |
| // sort 3 elements |
| if data.Less(m1, m0) { |
| data.Swap(m1, m0) |
| } |
| // data[m0] <= data[m1] |
| if data.Less(m2, m1) { |
| data.Swap(m2, m1) |
| // data[m0] <= data[m2] && data[m1] < data[m2] |
| if data.Less(m1, m0) { |
| data.Swap(m1, m0) |
| } |
| } |
| // now data[m0] <= data[m1] <= data[m2] |
| } |
| |
| func swapRange(data Interface, a, b, n int) { |
| for i := 0; i < n; i++ { |
| data.Swap(a+i, b+i) |
| } |
| } |
| |
| func doPivot(data Interface, lo, hi int) (midlo, midhi int) { |
| m := lo + (hi-lo)/2 // Written like this to avoid integer overflow. |
| if hi-lo > 40 { |
| // Tukey's ``Ninther,'' median of three medians of three. |
| s := (hi - lo) / 8 |
| medianOfThree(data, lo, lo+s, lo+2*s) |
| medianOfThree(data, m, m-s, m+s) |
| medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) |
| } |
| medianOfThree(data, lo, m, hi-1) |
| |
| // Invariants are: |
| // data[lo] = pivot (set up by ChoosePivot) |
| // data[lo <= i < a] = pivot |
| // data[a <= i < b] < pivot |
| // data[b <= i < c] is unexamined |
| // data[c <= i < d] > pivot |
| // data[d <= i < hi] = pivot |
| // |
| // Once b meets c, can swap the "= pivot" sections |
| // into the middle of the slice. |
| pivot := lo |
| a, b, c, d := lo+1, lo+1, hi, hi |
| for { |
| for b < c { |
| if data.Less(b, pivot) { // data[b] < pivot |
| b++ |
| } else if !data.Less(pivot, b) { // data[b] = pivot |
| data.Swap(a, b) |
| a++ |
| b++ |
| } else { |
| break |
| } |
| } |
| for b < c { |
| if data.Less(pivot, c-1) { // data[c-1] > pivot |
| c-- |
| } else if !data.Less(c-1, pivot) { // data[c-1] = pivot |
| data.Swap(c-1, d-1) |
| c-- |
| d-- |
| } else { |
| break |
| } |
| } |
| if b >= c { |
| break |
| } |
| // data[b] > pivot; data[c-1] < pivot |
| data.Swap(b, c-1) |
| b++ |
| c-- |
| } |
| |
| n := min(b-a, a-lo) |
| swapRange(data, lo, b-n, n) |
| |
| n = min(hi-d, d-c) |
| swapRange(data, c, hi-n, n) |
| |
| return lo + b - a, hi - (d - c) |
| } |
| |
| func quickSort(data Interface, a, b, maxDepth int) { |
| for b-a > 7 { |
| if maxDepth == 0 { |
| heapSort(data, a, b) |
| return |
| } |
| maxDepth-- |
| mlo, mhi := doPivot(data, a, b) |
| // Avoiding recursion on the larger subproblem guarantees |
| // a stack depth of at most lg(b-a). |
| if mlo-a < b-mhi { |
| quickSort(data, a, mlo, maxDepth) |
| a = mhi // i.e., quickSort(data, mhi, b) |
| } else { |
| quickSort(data, mhi, b, maxDepth) |
| b = mlo // i.e., quickSort(data, a, mlo) |
| } |
| } |
| if b-a > 1 { |
| insertionSort(data, a, b) |
| } |
| } |
| |
| // Sort sorts data. |
| // It makes one call to data.Len to determine n, and O(n*log(n)) calls to |
| // data.Less and data.Swap. The sort is not guaranteed to be stable. |
| func Sort(data Interface) { |
| // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached. |
| n := data.Len() |
| maxDepth := 0 |
| for i := n; i > 0; i >>= 1 { |
| maxDepth++ |
| } |
| maxDepth *= 2 |
| quickSort(data, 0, n, maxDepth) |
| } |
| |
| type reverse struct { |
| // This embedded Interface permits Reverse to use the methods of |
| // another Interface implementation. |
| Interface |
| } |
| |
| // Less returns the opposite of the embedded implementation's Less method. |
| func (r reverse) Less(i, j int) bool { |
| return r.Interface.Less(j, i) |
| } |
| |
| // Reverse returns the reverse order for data. |
| func Reverse(data Interface) Interface { |
| return &reverse{data} |
| } |
| |
| // IsSorted reports whether data is sorted. |
| func IsSorted(data Interface) bool { |
| n := data.Len() |
| for i := n - 1; i > 0; i-- { |
| if data.Less(i, i-1) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| // Convenience types for common cases |
| |
| // IntSlice attaches the methods of Interface to []int, sorting in increasing order. |
| type IntSlice []int |
| |
| func (p IntSlice) Len() int { return len(p) } |
| func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] } |
| func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } |
| |
| // Sort is a convenience method. |
| func (p IntSlice) Sort() { Sort(p) } |
| |
| // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order. |
| type Float64Slice []float64 |
| |
| func (p Float64Slice) Len() int { return len(p) } |
| func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) } |
| func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } |
| |
| // isNaN is a copy of math.IsNaN to avoid a dependency on the math package. |
| func isNaN(f float64) bool { |
| return f != f |
| } |
| |
| // Sort is a convenience method. |
| func (p Float64Slice) Sort() { Sort(p) } |
| |
| // StringSlice attaches the methods of Interface to []string, sorting in increasing order. |
| type StringSlice []string |
| |
| func (p StringSlice) Len() int { return len(p) } |
| func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] } |
| func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } |
| |
| // Sort is a convenience method. |
| func (p StringSlice) Sort() { Sort(p) } |
| |
| // Convenience wrappers for common cases |
| |
| // Ints sorts a slice of ints in increasing order. |
| func Ints(a []int) { Sort(IntSlice(a)) } |
| |
| // Float64s sorts a slice of float64s in increasing order. |
| func Float64s(a []float64) { Sort(Float64Slice(a)) } |
| |
| // Strings sorts a slice of strings in increasing order. |
| func Strings(a []string) { Sort(StringSlice(a)) } |
| |
| // IntsAreSorted tests whether a slice of ints is sorted in increasing order. |
| func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) } |
| |
| // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order. |
| func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) } |
| |
| // StringsAreSorted tests whether a slice of strings is sorted in increasing order. |
| func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) } |
| |
| // Notes on stable sorting: |
| // The used algorithms are simple and provable correct on all input and use |
| // only logarithmic additional stack space. They perform well if compared |
| // experimentally to other stable in-place sorting algorithms. |
| // |
| // Remarks on other algorithms evaluated: |
| // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++: |
| // Not faster. |
| // - GCC's __rotate for block rotations: Not faster. |
| // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen |
| // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40: |
| // The given algorithms are in-place, number of Swap and Assignments |
| // grow as n log n but the algorithm is not stable. |
| // - "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and |
| // V. Raman in Algorithmica (1996) 16, 115-160: |
| // This algorithm either needs additional 2n bits or works only if there |
| // are enough different elements available to encode some permutations |
| // which have to be undone later (so not stable an any input). |
| // - All the optimal in-place sorting/merging algorithms I found are either |
| // unstable or rely on enough different elements in each step to encode the |
| // performed block rearrangements. See also "In-Place Merging Algorithms", |
| // Denham Coates-Evely, Department of Computer Science, Kings College, |
| // January 2004 and the reverences in there. |
| // - Often "optimal" algorithms are optimal in the number of assignments |
| // but Interface has only Swap as operation. |
| |
| // Stable sorts data while keeping the original order of equal elements. |
| // |
| // It makes one call to data.Len to determine n, O(n*log(n)) calls to |
| // data.Less and O(n*log(n)*log(n)) calls to data.Swap. |
| func Stable(data Interface) { |
| n := data.Len() |
| blockSize := 20 // must be > 0 |
| a, b := 0, blockSize |
| for b <= n { |
| insertionSort(data, a, b) |
| a = b |
| b += blockSize |
| } |
| insertionSort(data, a, n) |
| |
| for blockSize < n { |
| a, b = 0, 2*blockSize |
| for b <= n { |
| symMerge(data, a, a+blockSize, b) |
| a = b |
| b += 2 * blockSize |
| } |
| if m := a + blockSize; m < n { |
| symMerge(data, a, m, n) |
| } |
| blockSize *= 2 |
| } |
| } |
| |
| // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using |
| // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum |
| // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz |
| // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in |
| // Computer Science, pages 714-723. Springer, 2004. |
| // |
| // Let M = m-a and N = b-n. Wolog M < N. |
| // The recursion depth is bound by ceil(log(N+M)). |
| // The algorithm needs O(M*log(N/M + 1)) calls to data.Less. |
| // The algorithm needs O((M+N)*log(M)) calls to data.Swap. |
| // |
| // The paper gives O((M+N)*log(M)) as the number of assignments assuming a |
| // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation |
| // in the paper carries through for Swap operations, especially as the block |
| // swapping rotate uses only O(M+N) Swaps. |
| // |
| // symMerge assumes non-degenerate arguments: a < m && m < b. |
| // Having the caller check this condition eliminates many leaf recursion calls, |
| // which improves performance. |
| func symMerge(data Interface, a, m, b int) { |
| // Avoid unnecessary recursions of symMerge |
| // by direct insertion of data[a] into data[m:b] |
| // if data[a:m] only contains one element. |
| if m-a == 1 { |
| // Use binary search to find the lowest index i |
| // such that data[i] >= data[a] for m <= i < b. |
| // Exit the search loop with i == b in case no such index exists. |
| i := m |
| j := b |
| for i < j { |
| h := i + (j-i)/2 |
| if data.Less(h, a) { |
| i = h + 1 |
| } else { |
| j = h |
| } |
| } |
| // Swap values until data[a] reaches the position before i. |
| for k := a; k < i-1; k++ { |
| data.Swap(k, k+1) |
| } |
| return |
| } |
| |
| // Avoid unnecessary recursions of symMerge |
| // by direct insertion of data[m] into data[a:m] |
| // if data[m:b] only contains one element. |
| if b-m == 1 { |
| // Use binary search to find the lowest index i |
| // such that data[i] > data[m] for a <= i < m. |
| // Exit the search loop with i == m in case no such index exists. |
| i := a |
| j := m |
| for i < j { |
| h := i + (j-i)/2 |
| if !data.Less(m, h) { |
| i = h + 1 |
| } else { |
| j = h |
| } |
| } |
| // Swap values until data[m] reaches the position i. |
| for k := m; k > i; k-- { |
| data.Swap(k, k-1) |
| } |
| return |
| } |
| |
| mid := a + (b-a)/2 |
| n := mid + m |
| var start, r int |
| if m > mid { |
| start = n - b |
| r = mid |
| } else { |
| start = a |
| r = m |
| } |
| p := n - 1 |
| |
| for start < r { |
| c := start + (r-start)/2 |
| if !data.Less(p-c, c) { |
| start = c + 1 |
| } else { |
| r = c |
| } |
| } |
| |
| end := n - start |
| if start < m && m < end { |
| rotate(data, start, m, end) |
| } |
| if a < start && start < mid { |
| symMerge(data, a, start, mid) |
| } |
| if mid < end && end < b { |
| symMerge(data, mid, end, b) |
| } |
| } |
| |
| // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data: |
| // Data of the form 'x u v y' is changed to 'x v u y'. |
| // Rotate performs at most b-a many calls to data.Swap. |
| // Rotate assumes non-degenerate arguments: a < m && m < b. |
| func rotate(data Interface, a, m, b int) { |
| i := m - a |
| j := b - m |
| |
| for i != j { |
| if i > j { |
| swapRange(data, m-i, m, j) |
| i -= j |
| } else { |
| swapRange(data, m-i, m+j-i, i) |
| j -= i |
| } |
| } |
| // i == j |
| swapRange(data, m-i, m, i) |
| } |
| |
| /* |
| Complexity of Stable Sorting |
| |
| |
| Complexity of block swapping rotation |
| |
| Each Swap puts one new element into its correct, final position. |
| Elements which reach their final position are no longer moved. |
| Thus block swapping rotation needs |u|+|v| calls to Swaps. |
| This is best possible as each element might need a move. |
| |
| Pay attention when comparing to other optimal algorithms which |
| typically count the number of assignments instead of swaps: |
| E.g. the optimal algorithm of Dudzinski and Dydek for in-place |
| rotations uses O(u + v + gcd(u,v)) assignments which is |
| better than our O(3 * (u+v)) as gcd(u,v) <= u. |
| |
| |
| Stable sorting by SymMerge and BlockSwap rotations |
| |
| SymMerg complexity for same size input M = N: |
| Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N) |
| Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N)) |
| |
| (The following argument does not fuzz over a missing -1 or |
| other stuff which does not impact the final result). |
| |
| Let n = data.Len(). Assume n = 2^k. |
| |
| Plain merge sort performs log(n) = k iterations. |
| On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i. |
| |
| Thus iteration i of merge sort performs: |
| Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n) |
| Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i) |
| |
| In total k = log(n) iterations are performed; so in total: |
| Calls to Less O(log(n) * n) |
| Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n) |
| = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n)) |
| |
| |
| Above results should generalize to arbitrary n = 2^k + p |
| and should not be influenced by the initial insertion sort phase: |
| Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of |
| size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort. |
| Merge sort iterations start at i = log(bs). With t = log(bs) constant: |
| Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n) |
| = O(n * log(n)) |
| Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n)) |
| |
| */ |