From 79475d55878ee129a94cd669a43c0c0df84969df Mon Sep 17 00:00:00 2001 From: Rob Pike Date: Wed, 5 Aug 2009 14:40:34 -0700 Subject: special case: recognize '[^\n]' and make it as fast as '.' R=rsc DELTA=25 (23 added, 1 deleted, 1 changed) OCL=32793 CL=32799 --- src/pkg/regexp/regexp.go | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'src/pkg/regexp/regexp.go') diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index 1ab9246f6..745a3ae72 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -87,7 +87,8 @@ const ( _EOT; // '$' end of text _CHAR; // 'a' regular character _CHARCLASS; // [a-z] character class - _ANY; // '.' any character + _ANY; // '.' any character including newline + _NOTNL; // [^\n] special case: any character but newline _BRA; // '(' parenthesized expression _EBRA; // ')'; end of '(' parenthesized expression _ALT; // '|' alternation @@ -200,6 +201,14 @@ type _Any struct { func (any *_Any) kind() int { return _ANY } func (any *_Any) print() { print("any") } +// --- NOTNL any character but newline +type _NotNl struct { + common +} + +func (notnl *_NotNl) kind() int { return _NOTNL } +func (notnl *_NotNl) print() { print("notnl") } + // --- BRA parenthesized expression type _Bra struct { common; @@ -305,7 +314,6 @@ func specialcclass(c int) bool { func (p *parser) charClass() instr { cc := newCharClass(); - p.re.add(cc); if p.c() == '^' { cc.negate = true; p.nextc(); @@ -317,6 +325,14 @@ func (p *parser) charClass() instr { if left >= 0 { p.re.setError(ErrBadRange); } + // Is it [^\n]? + if cc.negate && cc.ranges.Len() == 2 && + cc.ranges.At(0) == '\n' && cc.ranges.At(1) == '\n' { + nl := new(_NotNl); + p.re.add(nl); + return nl; + } + p.re.add(cc); return cc; case '-': // do this before backslash processing p.re.setError(ErrBadRange); @@ -680,6 +696,10 @@ func (re *Regexp) doExecute(str string, pos int) []int { if c != endOfFile { s[out] = addState(s[out], st.inst.next(), st.match) } + case _NOTNL: + if c != endOfFile && c != '\n' { + s[out] = addState(s[out], st.inst.next(), st.match) + } case _BRA: n := st.inst.(*_Bra).n; st.match[2*n] = pos; -- cgit v1.2.3