Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304

author: Michael Stapelberg <stapelberg@debian.org> 2013-03-04 21:27:36 +0100
committer: Michael Stapelberg <michael@stapelberg.de> 2013-03-04 21:27:36 +0100
commit: 04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
tree: db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/regexp.go
parent: 917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
download: golang-04b08da9af0c450d645ab7389d1467308cfc2db8.tar.gz
1 files changed, 66 insertions, 8 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index 87e6b1c61..3aa16dec6 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -130,6 +130,14 @@ func CompilePOSIX(expr string) (*Regexp, error) {
 	return compile(expr, syntax.POSIX, true)
 }
 
+// Longest makes future searches prefer the leftmost-longest match.
+// That is, when matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses a match that is as long as possible.
+func (re *Regexp) Longest() {
+	re.longest = true
+}
+
 func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
 	re, err := syntax.Parse(expr, mode)
 	if err != nil {
@@ -441,7 +449,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
 }
 
 // ReplaceAllStringFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of of function repl applied
+// Regexp have been replaced by the return value of function repl applied
 // to the matched substring.  The replacement returned by repl is substituted
 // directly, without using Expand.
 func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
@@ -539,7 +547,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
 }
 
 // ReplaceAllFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of of function repl applied
+// Regexp have been replaced by the return value of function repl applied
 // to the matched byte slice.  The replacement returned by repl is substituted
 // directly, without using Expand.
 func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
@@ -686,8 +694,9 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
 
 // FindReaderIndex returns a two-element slice of integers defining the
 // location of the leftmost match of the regular expression in text read from
-// the RuneReader.  The match itself is at s[loc[0]:loc[1]].  A return
-// value of nil indicates no match.
+// the RuneReader.  The match text was found in the input stream at
+// byte offset loc[0] through loc[1]-1.
+// A return value of nil indicates no match.
 func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
 	a := re.doExecute(r, nil, "", 0, 2)
 	if a == nil {
@@ -719,7 +728,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
 // append, Expand replaces variables in the template with corresponding
 // matches drawn from src.  The match slice should have been returned by
 // FindSubmatchIndex.
-// 
+//
 // In the template, a variable is denoted by a substring of the form
 // $name or ${name}, where name is a non-empty sequence of letters,
 // digits, and underscores.  A purely numeric name like $1 refers to
@@ -727,10 +736,10 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
 // capturing parentheses named with the (?P<name>...) syntax.  A
 // reference to an out of range or unmatched index or a name that is not
 // present in the regular expression is replaced with an empty slice.
-// 
+//
 // In the $name form, name is taken to be as long as possible: $1x is
 // equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
-// 
+//
 // To insert a literal $ in the output, use $$ in the template.
 func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
 	return re.expand(dst, string(template), src, "", match)
@@ -766,7 +775,7 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m
 		}
 		template = rest
 		if num >= 0 {
-			if 2*num+1 < len(match) {
+			if 2*num+1 < len(match) && match[2*num] >= 0 {
 				if bsrc != nil {
 					dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
 				} else {
@@ -1047,3 +1056,52 @@ func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
 	}
 	return result
 }
+
+// Split slices s into substrings separated by the expression and returns a slice of
+// the substrings between those expression matches.
+//
+// The slice returned by this method consists of all the substrings of s
+// not contained in the slice returned by FindAllString. When called on an expression
+// that contains no metacharacters, it is equivalent to strings.SplitN.
+//
+// Example:
+//   s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
+//   // s: ["", "b", "b", "c", "cadaaae"]
+//
+// The count determines the number of substrings to return:
+//   n > 0: at most n substrings; the last substring will be the unsplit remainder.
+//   n == 0: the result is nil (zero substrings)
+//   n < 0: all substrings
+func (re *Regexp) Split(s string, n int) []string {
+
+	if n == 0 {
+		return nil
+	}
+
+	if len(re.expr) > 0 && len(s) == 0 {
+		return []string{""}
+	}
+
+	matches := re.FindAllStringIndex(s, n)
+	strings := make([]string, 0, len(matches))
+
+	beg := 0
+	end := 0
+	for _, match := range matches {
+		if n > 0 && len(strings) >= n-1 {
+			break
+		}
+
+		end = match[0]
+		if match[1] != 0 {
+			strings = append(strings, s[beg:end])
+		}
+		beg = match[1]
+	}
+
+	if end != len(s) {
+		strings = append(strings, s[beg:])
+	}
+
+	return strings
+}
author	Michael Stapelberg <stapelberg@debian.org>	2013-03-04 21:27:36 +0100
committer	Michael Stapelberg <michael@stapelberg.de>	2013-03-04 21:27:36 +0100
commit	04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
tree	db247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/regexp.go
parent	917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
download	golang-04b08da9af0c450d645ab7389d1467308cfc2db8.tar.gz