summaryrefslogtreecommitdiff
path: root/src/pkg/regexp/regexp.go
diff options
context:
space:
mode:
authorStephen Ma <stephenm@golang.org>2009-08-09 19:30:47 -0700
committerStephen Ma <stephenm@golang.org>2009-08-09 19:30:47 -0700
commit06bd359d0e7f92e197a46637b6af5dab794e50e9 (patch)
treee2a57a5f492c643901db909871e48f9b247f8c09 /src/pkg/regexp/regexp.go
parente2c9bb805e50f83d4b6f9129b6fa8e4a4733db2b (diff)
downloadgolang-06bd359d0e7f92e197a46637b6af5dab794e50e9.tar.gz
Add methods AllMatches, AllMatchesString, AllMatchesIter,
AllMatchesStringIter, based on sawn and sawzall functions in sawzall. APPROVED=rsc DELTA=218 (218 added, 0 deleted, 0 changed) OCL=32408 CL=32949
Diffstat (limited to 'src/pkg/regexp/regexp.go')
-rw-r--r--src/pkg/regexp/regexp.go119
1 files changed, 119 insertions, 0 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index 6b8b1bf86..f78bf864e 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -951,3 +951,122 @@ func QuoteMeta(s string) string {
return string(b[0:j]);
}
+// Find matches in slice b if b is non-nil, otherwise find matches in string s.
+func (re *Regexp) allMatches(s string, b []byte, n int, deliver func(int, int)) {
+ var end int;
+ if b == nil {
+ end = len(s);
+ } else {
+ end = len(b);
+ }
+
+ for pos, i, prevMatchEnd := 0, 0, -1; i < n && pos <= end; {
+ matches := re.doExecute(s, b, pos);
+ if len(matches) == 0 {
+ break;
+ }
+
+ accept := true;
+ if matches[1] == pos {
+ // We've found an empty match.
+ if matches[0] == prevMatchEnd {
+ // We don't allow an empty match right
+ // after a previous match, so ignore it.
+ accept = false;
+ }
+ var rune, width int;
+ if b == nil {
+ rune, width = utf8.DecodeRuneInString(s[pos:end]);
+ } else {
+ rune, width = utf8.DecodeRune(b[pos:end]);
+ }
+ if width > 0 {
+ pos += width;
+ } else {
+ pos = end + 1;
+ }
+ } else {
+ pos = matches[1];
+ }
+ prevMatchEnd = matches[1];
+
+ if accept {
+ deliver(matches[0], matches[1]);
+ i++;
+ }
+ }
+}
+
+// AllMatches slices the byte slice b into substrings that are successive
+// matches of the Regexp within b. If n > 0, the function returns at most n
+// matches. Text that does not match the expression will be skipped. Empty
+// matches abutting a preceding match are ignored. The function returns a slice
+// containing the matching substrings.
+func (re *Regexp) AllMatches(b []byte, n int) [][]byte {
+ if n <= 0 {
+ n = len(b) + 1;
+ }
+ result := make([][]byte, n);
+ i := 0;
+ re.allMatches("", b, n, func(start, end int) {
+ result[i] = b[start:end];
+ i++;
+ });
+ return result[0:i];
+}
+
+// AllMatchesString slices the string s into substrings that are successive
+// matches of the Regexp within s. If n > 0, the function returns at most n
+// matches. Text that does not match the expression will be skipped. Empty
+// matches abutting a preceding match are ignored. The function returns a slice
+// containing the matching substrings.
+func (re *Regexp) AllMatchesString(s string, n int) []string {
+ if n <= 0 {
+ n = len(s) + 1;
+ }
+ result := make([]string, n);
+ i := 0;
+ re.allMatches(s, nil, n, func(start, end int) {
+ result[i] = s[start:end];
+ i++;
+ });
+ return result[0:i];
+}
+
+// AllMatchesIter slices the byte slice b into substrings that are successive
+// matches of the Regexp within b. If n > 0, the function returns at most n
+// matches. Text that does not match the expression will be skipped. Empty
+// matches abutting a preceding match are ignored. The function returns a
+// channel that iterates over the matching substrings.
+func (re *Regexp) AllMatchesIter(b []byte, n int) (<-chan []byte) {
+ if n <= 0 {
+ n = len(b) + 1;
+ }
+ c := make(chan []byte, 10);
+ go func() {
+ re.allMatches("", b, n, func(start, end int) {
+ c <- b[start:end];
+ });
+ close(c);
+ }();
+ return c;
+}
+
+// AllMatchesStringIter slices the string s into substrings that are successive
+// matches of the Regexp within s. If n > 0, the function returns at most n
+// matches. Text that does not match the expression will be skipped. Empty
+// matches abutting a preceding match are ignored. The function returns a
+// channel that iterates over the matching substrings.
+func (re *Regexp) AllMatchesStringIter(s string, n int) (<-chan string) {
+ if n <= 0 {
+ n = len(s) + 1;
+ }
+ c := make(chan string, 10);
+ go func() {
+ re.allMatches(s, nil, n, func(start, end int) {
+ c <- s[start:end];
+ });
+ close(c);
+ }();
+ return c;
+}