diff options
author | Stephen Ma <stephenm@golang.org> | 2009-08-09 19:30:47 -0700 |
---|---|---|
committer | Stephen Ma <stephenm@golang.org> | 2009-08-09 19:30:47 -0700 |
commit | 06bd359d0e7f92e197a46637b6af5dab794e50e9 (patch) | |
tree | e2a57a5f492c643901db909871e48f9b247f8c09 /src/pkg/regexp/regexp.go | |
parent | e2c9bb805e50f83d4b6f9129b6fa8e4a4733db2b (diff) | |
download | golang-06bd359d0e7f92e197a46637b6af5dab794e50e9.tar.gz |
Add methods AllMatches, AllMatchesString, AllMatchesIter,
AllMatchesStringIter, based on sawn and sawzall functions in sawzall.
APPROVED=rsc
DELTA=218 (218 added, 0 deleted, 0 changed)
OCL=32408
CL=32949
Diffstat (limited to 'src/pkg/regexp/regexp.go')
-rw-r--r-- | src/pkg/regexp/regexp.go | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index 6b8b1bf86..f78bf864e 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -951,3 +951,122 @@ func QuoteMeta(s string) string { return string(b[0:j]); } +// Find matches in slice b if b is non-nil, otherwise find matches in string s. +func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int)) { + var end int; + if b == nil { + end = len(s); + } else { + end = len(b); + } + + for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; { + matches := re.doExecute(s, b, pos); + if len(matches) == 0 { + break; + } + + accept := true; + if matches[1] == pos { + // We've found an empty match. + if matches[0] == prevMatchEnd { + // We don't allow an empty match right + // after a previous match, so ignore it. + accept = false; + } + var rune, width int; + if b == nil { + rune, width = utf8.DecodeRuneInString(s[pos:end]); + } else { + rune, width = utf8.DecodeRune(b[pos:end]); + } + if width > 0 { + pos += width; + } else { + pos = end + 1; + } + } else { + pos = matches[1]; + } + prevMatchEnd = matches[1]; + + if accept { + deliver(matches[0], matches[1]); + i++; + } + } +} + +// AllMatches slices the byte slice b into substrings that are successive +// matches of the Regexp within b. If n > 0, the function returns at most n +// matches. Text that does not match the expression will be skipped. Empty +// matches abutting a preceding match are ignored. The function returns a slice +// containing the matching substrings. +func (re *Regexp) AllMatches(b []byte, n int) [][]byte { + if n <= 0 { + n = len(b) + 1; + } + result := make([][]byte, n); + i := 0; + re.allMatches("", b, n, func(start, end int) { + result[i] = b[start:end]; + i++; + }); + return result[0:i]; +} + +// AllMatchesString slices the string s into substrings that are successive +// matches of the Regexp within s. If n > 0, the function returns at most n +// matches. Text that does not match the expression will be skipped. Empty +// matches abutting a preceding match are ignored. The function returns a slice +// containing the matching substrings. +func (re *Regexp) AllMatchesString(s string, n int) []string { + if n <= 0 { + n = len(s) + 1; + } + result := make([]string, n); + i := 0; + re.allMatches(s, nil, n, func(start, end int) { + result[i] = s[start:end]; + i++; + }); + return result[0:i]; +} + +// AllMatchesIter slices the byte slice b into substrings that are successive +// matches of the Regexp within b. If n > 0, the function returns at most n +// matches. Text that does not match the expression will be skipped. Empty +// matches abutting a preceding match are ignored. The function returns a +// channel that iterates over the matching substrings. +func (re *Regexp) AllMatchesIter(b []byte, n int) (<-chan []byte) { + if n <= 0 { + n = len(b) + 1; + } + c := make(chan []byte, 10); + go func() { + re.allMatches("", b, n, func(start, end int) { + c <- b[start:end]; + }); + close(c); + }(); + return c; +} + +// AllMatchesStringIter slices the string s into substrings that are successive +// matches of the Regexp within s. If n > 0, the function returns at most n +// matches. Text that does not match the expression will be skipped. Empty +// matches abutting a preceding match are ignored. The function returns a +// channel that iterates over the matching substrings. +func (re *Regexp) AllMatchesStringIter(s string, n int) (<-chan string) { + if n <= 0 { + n = len(s) + 1; + } + c := make(chan string, 10); + go func() { + re.allMatches(s, nil, n, func(start, end int) { + c <- s[start:end]; + }); + close(c); + }(); + return c; +} |