special case: recognize '[^\n]' and make it as fast as '.'
R=rsc
DELTA=25 (23 added, 1 deleted, 1 changed)
OCL=32793
CL=32799
diff --git a/src/pkg/regexp/all_test.go b/src/pkg/regexp/all_test.go
index 23c2200..0d16b24 100644
--- a/src/pkg/regexp/all_test.go
+++ b/src/pkg/regexp/all_test.go
@@ -27,6 +27,7 @@
`[]`,
`[abc]`,
`[^1234]`,
+ `[^\n]`,
}
// TODO: nice to do this with a map
@@ -72,6 +73,7 @@
tester{ `[a-z]+`, "abcd", vec{0,4} },
tester{ `[^a-z]+`, "ab1234cd", vec{2,6} },
tester{ `[a\-\]z]+`, "az]-bcz", vec{0,4} },
+ tester{ `[^\n]+`, "abcd\n", vec{0,4} },
tester{ `[日本語]+`, "日本語日本語", vec{0,18} },
tester{ `()`, "", vec{0,0, 0,0} },
tester{ `(a)`, "a", vec{0,1, 0,1} },
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index 1ab9246..745a3ae 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -87,7 +87,8 @@
_EOT; // '$' end of text
_CHAR; // 'a' regular character
_CHARCLASS; // [a-z] character class
- _ANY; // '.' any character
+ _ANY; // '.' any character including newline
+ _NOTNL; // [^\n] special case: any character but newline
_BRA; // '(' parenthesized expression
_EBRA; // ')'; end of '(' parenthesized expression
_ALT; // '|' alternation
@@ -200,6 +201,14 @@
func (any *_Any) kind() int { return _ANY }
func (any *_Any) print() { print("any") }
+// --- NOTNL any character but newline
+type _NotNl struct {
+ common
+}
+
+func (notnl *_NotNl) kind() int { return _NOTNL }
+func (notnl *_NotNl) print() { print("notnl") }
+
// --- BRA parenthesized expression
type _Bra struct {
common;
@@ -305,7 +314,6 @@
func (p *parser) charClass() instr {
cc := newCharClass();
- p.re.add(cc);
if p.c() == '^' {
cc.negate = true;
p.nextc();
@@ -317,6 +325,14 @@
if left >= 0 {
p.re.setError(ErrBadRange);
}
+ // Is it [^\n]?
+ if cc.negate && cc.ranges.Len() == 2 &&
+ cc.ranges.At(0) == '\n' && cc.ranges.At(1) == '\n' {
+ nl := new(_NotNl);
+ p.re.add(nl);
+ return nl;
+ }
+ p.re.add(cc);
return cc;
case '-': // do this before backslash processing
p.re.setError(ErrBadRange);
@@ -680,6 +696,10 @@
if c != endOfFile {
s[out] = addState(s[out], st.inst.next(), st.match)
}
+ case _NOTNL:
+ if c != endOfFile && c != '\n' {
+ s[out] = addState(s[out], st.inst.next(), st.match)
+ }
case _BRA:
n := st.inst.(*_Bra).n;
st.match[2*n] = pos;