suffixarray: faster creation algorithm
This implements the algorithm qsufsort using the sort package
as a sorting primitive. Its worst-case performance is O(N*log(N)), and it
uses only an additional slice of N ints of memory during creation.
Benchmarks (seconds):
old new
10k nulls 149 0.044
1M English corpus 32.0 3.6
R=gri, gri1
CC=golang-dev
https://golang.org/cl/3752044
diff --git a/src/pkg/index/suffixarray/suffixarray_test.go b/src/pkg/index/suffixarray/suffixarray_test.go
index 659bce0..b3486a9 100644
--- a/src/pkg/index/suffixarray/suffixarray_test.go
+++ b/src/pkg/index/suffixarray/suffixarray_test.go
@@ -5,6 +5,7 @@
package suffixarray
import (
+ "bytes"
"container/vector"
"regexp"
"sort"
@@ -204,9 +205,26 @@
}
+// index is used to hide the sort.Interface
+type index Index
+
+func (x *index) Len() int { return len(x.sa) }
+func (x *index) Less(i, j int) bool { return bytes.Compare(x.at(i), x.at(j)) < 0 }
+func (x *index) Swap(i, j int) { x.sa[i], x.sa[j] = x.sa[j], x.sa[i] }
+func (a *index) at(i int) []byte { return a.data[a.sa[i]:] }
+
+
+func testConstruction(t *testing.T, tc *testCase, x *Index) {
+ if !sort.IsSorted((*index)(x)) {
+ t.Errorf("testConstruction failed %s", tc.name)
+ }
+}
+
+
func TestIndex(t *testing.T) {
for _, tc := range testCases {
x := New([]byte(tc.source))
+ testConstruction(t, &tc, x)
testLookups(t, &tc, x, 0)
testLookups(t, &tc, x, 1)
testLookups(t, &tc, x, 10)