regexp: fix LiteralPrefix for certain onepass progs
The prefix computation for onepass was incorrectly returning
complete=true when it encountered a beginning-of-text empty width match
(^) in the middle of an expression.
Fix by returning complete only when the prefix is followed by $ and then
an accepting state.
Fixes #11175.
Change-Id: Ie9c4cf5f76c1d2c904a6fb2f016cedb265c19fde
Reviewed-on: https://go-review.googlesource.com/16200
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/src/regexp/all_test.go b/src/regexp/all_test.go
index ebe31d7..88391ff 100644
--- a/src/regexp/all_test.go
+++ b/src/regexp/all_test.go
@@ -359,6 +359,19 @@
{`!@#$%^&*()_+-=[{]}\|,<.>/?~`, `!@#\$%\^&\*\(\)_\+-=\[\{\]\}\\\|,<\.>/\?~`, `!@#`, false},
}
+var literalPrefixTests = []MetaTest{
+ // See golang.org/issue/11175.
+ // output is unused.
+ {`^0^0$`, ``, `0`, false},
+ {`^0^`, ``, ``, false},
+ {`^0$`, ``, `0`, true},
+ {`$0^`, ``, ``, false},
+ {`$0$`, ``, ``, false},
+ {`^^0$$`, ``, ``, false},
+ {`^$^$`, ``, ``, false},
+ {`$$0^^`, ``, ``, false},
+}
+
func TestQuoteMeta(t *testing.T) {
for _, tc := range metaTests {
// Verify that QuoteMeta returns the expected string.
@@ -390,7 +403,7 @@
}
func TestLiteralPrefix(t *testing.T) {
- for _, tc := range metaTests {
+ for _, tc := range append(metaTests, literalPrefixTests...) {
// Literal method needs to scan the pattern.
re := MustCompile(tc.pattern)
str, complete := re.LiteralPrefix()
diff --git a/src/regexp/onepass.go b/src/regexp/onepass.go
index e6f4285..2bd81e3 100644
--- a/src/regexp/onepass.go
+++ b/src/regexp/onepass.go
@@ -59,7 +59,12 @@
buf.WriteRune(i.Rune[0])
pc, i = i.Out, &p.Inst[i.Out]
}
- return buf.String(), i.Op == syntax.InstEmptyWidth && (syntax.EmptyOp(i.Arg))&syntax.EmptyBeginText != 0, pc
+ if i.Op == syntax.InstEmptyWidth &&
+ syntax.EmptyOp(i.Arg)&syntax.EmptyEndText != 0 &&
+ p.Inst[i.Out].Op == syntax.InstMatch {
+ complete = true
+ }
+ return buf.String(), complete, pc
}
// OnePassNext selects the next actionable state of the prog, based on the input character.