regexp: fix performance bug, make anchored searches fail fast.
The bug was that for an anchored pattern such as ^x, the prefix
scan ignored the anchor, and could scan the whole file if there was
no x present. The fix is to do prefix matching after the anchor;
the cost miniscule; the speedups huge.
R=rsc, gri
CC=golang-dev
https://golang.org/cl/3837042
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go
index 8f115aa..3b2c489 100644
--- a/src/pkg/regexp/all_test.go
+++ b/src/pkg/regexp/all_test.go
@@ -377,3 +377,49 @@
re.ReplaceAllString(x, "")
}
}
+
+func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ re := MustCompile("^zbc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
+
+func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ for i := 0; i < 15; i++ {
+ x = append(x, x...)
+ }
+ re := MustCompile("^zbc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
+
+func BenchmarkAnchoredShortMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ re := MustCompile("^.bc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}
+
+func BenchmarkAnchoredLongMatch(b *testing.B) {
+ b.StopTimer()
+ x := []byte("abcdefghijklmnopqrstuvwxyz")
+ for i := 0; i < 15; i++ {
+ x = append(x, x...)
+ }
+ re := MustCompile("^.bc(d|e)")
+ b.StartTimer()
+ for i := 0; i < b.N; i++ {
+ re.Match(x)
+ }
+}