diff options
author | Michael Stapelberg <stapelberg@debian.org> | 2013-03-04 21:27:36 +0100 |
---|---|---|
committer | Michael Stapelberg <michael@stapelberg.de> | 2013-03-04 21:27:36 +0100 |
commit | 04b08da9af0c450d645ab7389d1467308cfc2db8 (patch) | |
tree | db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/regexp.go | |
parent | 917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff) | |
download | golang-04b08da9af0c450d645ab7389d1467308cfc2db8.tar.gz |
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/pkg/regexp/regexp.go')
-rw-r--r-- | src/pkg/regexp/regexp.go | 74 |
1 files changed, 66 insertions, 8 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go index 87e6b1c61..3aa16dec6 100644 --- a/src/pkg/regexp/regexp.go +++ b/src/pkg/regexp/regexp.go @@ -130,6 +130,14 @@ func CompilePOSIX(expr string) (*Regexp, error) { return compile(expr, syntax.POSIX, true) } +// Longest makes future searches prefer the leftmost-longest match. +// That is, when matching against text, the regexp returns a match that +// begins as early as possible in the input (leftmost), and among those +// it chooses a match that is as long as possible. +func (re *Regexp) Longest() { + re.longest = true +} + func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) { re, err := syntax.Parse(expr, mode) if err != nil { @@ -441,7 +449,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string { } // ReplaceAllStringFunc returns a copy of src in which all matches of the -// Regexp have been replaced by the return value of of function repl applied +// Regexp have been replaced by the return value of function repl applied // to the matched substring. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { @@ -539,7 +547,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte { } // ReplaceAllFunc returns a copy of src in which all matches of the -// Regexp have been replaced by the return value of of function repl applied +// Regexp have been replaced by the return value of function repl applied // to the matched byte slice. The replacement returned by repl is substituted // directly, without using Expand. func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { @@ -686,8 +694,9 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) { // FindReaderIndex returns a two-element slice of integers defining the // location of the leftmost match of the regular expression in text read from -// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return -// value of nil indicates no match. +// the RuneReader. The match text was found in the input stream at +// byte offset loc[0] through loc[1]-1. +// A return value of nil indicates no match. func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) { a := re.doExecute(r, nil, "", 0, 2) if a == nil { @@ -719,7 +728,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // append, Expand replaces variables in the template with corresponding // matches drawn from src. The match slice should have been returned by // FindSubmatchIndex. -// +// // In the template, a variable is denoted by a substring of the form // $name or ${name}, where name is a non-empty sequence of letters, // digits, and underscores. A purely numeric name like $1 refers to @@ -727,10 +736,10 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte { // capturing parentheses named with the (?P<name>...) syntax. A // reference to an out of range or unmatched index or a name that is not // present in the regular expression is replaced with an empty slice. -// +// // In the $name form, name is taken to be as long as possible: $1x is // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0. -// +// // To insert a literal $ in the output, use $$ in the template. func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte { return re.expand(dst, string(template), src, "", match) @@ -766,7 +775,7 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m } template = rest if num >= 0 { - if 2*num+1 < len(match) { + if 2*num+1 < len(match) && match[2*num] >= 0 { if bsrc != nil { dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...) } else { @@ -1047,3 +1056,52 @@ func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { } return result } + +// Split slices s into substrings separated by the expression and returns a slice of +// the substrings between those expression matches. +// +// The slice returned by this method consists of all the substrings of s +// not contained in the slice returned by FindAllString. When called on an expression +// that contains no metacharacters, it is equivalent to strings.SplitN. +// +// Example: +// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5) +// // s: ["", "b", "b", "c", "cadaaae"] +// +// The count determines the number of substrings to return: +// n > 0: at most n substrings; the last substring will be the unsplit remainder. +// n == 0: the result is nil (zero substrings) +// n < 0: all substrings +func (re *Regexp) Split(s string, n int) []string { + + if n == 0 { + return nil + } + + if len(re.expr) > 0 && len(s) == 0 { + return []string{""} + } + + matches := re.FindAllStringIndex(s, n) + strings := make([]string, 0, len(matches)) + + beg := 0 + end := 0 + for _, match := range matches { + if n > 0 && len(strings) >= n-1 { + break + } + + end = match[0] + if match[1] != 0 { + strings = append(strings, s[beg:end]) + } + beg = match[1] + } + + if end != len(s) { + strings = append(strings, s[beg:]) + } + + return strings +} |