summaryrefslogtreecommitdiff
path: root/src/pkg/regexp/regexp.go
diff options
context:
space:
mode:
authorMichael Stapelberg <stapelberg@debian.org>2013-03-04 21:27:36 +0100
committerMichael Stapelberg <michael@stapelberg.de>2013-03-04 21:27:36 +0100
commit04b08da9af0c450d645ab7389d1467308cfc2db8 (patch)
treedb247935fa4f2f94408edc3acd5d0d4f997aa0d8 /src/pkg/regexp/regexp.go
parent917c5fb8ec48e22459d77e3849e6d388f93d3260 (diff)
downloadgolang-04b08da9af0c450d645ab7389d1467308cfc2db8.tar.gz
Imported Upstream version 1.1~hg20130304upstream/1.1_hg20130304
Diffstat (limited to 'src/pkg/regexp/regexp.go')
-rw-r--r--src/pkg/regexp/regexp.go74
1 files changed, 66 insertions, 8 deletions
diff --git a/src/pkg/regexp/regexp.go b/src/pkg/regexp/regexp.go
index 87e6b1c61..3aa16dec6 100644
--- a/src/pkg/regexp/regexp.go
+++ b/src/pkg/regexp/regexp.go
@@ -130,6 +130,14 @@ func CompilePOSIX(expr string) (*Regexp, error) {
return compile(expr, syntax.POSIX, true)
}
+// Longest makes future searches prefer the leftmost-longest match.
+// That is, when matching against text, the regexp returns a match that
+// begins as early as possible in the input (leftmost), and among those
+// it chooses a match that is as long as possible.
+func (re *Regexp) Longest() {
+ re.longest = true
+}
+
func compile(expr string, mode syntax.Flags, longest bool) (*Regexp, error) {
re, err := syntax.Parse(expr, mode)
if err != nil {
@@ -441,7 +449,7 @@ func (re *Regexp) ReplaceAllLiteralString(src, repl string) string {
}
// ReplaceAllStringFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of of function repl applied
+// Regexp have been replaced by the return value of function repl applied
// to the matched substring. The replacement returned by repl is substituted
// directly, without using Expand.
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
@@ -539,7 +547,7 @@ func (re *Regexp) ReplaceAllLiteral(src, repl []byte) []byte {
}
// ReplaceAllFunc returns a copy of src in which all matches of the
-// Regexp have been replaced by the return value of of function repl applied
+// Regexp have been replaced by the return value of function repl applied
// to the matched byte slice. The replacement returned by repl is substituted
// directly, without using Expand.
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
@@ -686,8 +694,9 @@ func (re *Regexp) FindStringIndex(s string) (loc []int) {
// FindReaderIndex returns a two-element slice of integers defining the
// location of the leftmost match of the regular expression in text read from
-// the RuneReader. The match itself is at s[loc[0]:loc[1]]. A return
-// value of nil indicates no match.
+// the RuneReader. The match text was found in the input stream at
+// byte offset loc[0] through loc[1]-1.
+// A return value of nil indicates no match.
func (re *Regexp) FindReaderIndex(r io.RuneReader) (loc []int) {
a := re.doExecute(r, nil, "", 0, 2)
if a == nil {
@@ -719,7 +728,7 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// append, Expand replaces variables in the template with corresponding
// matches drawn from src. The match slice should have been returned by
// FindSubmatchIndex.
-//
+//
// In the template, a variable is denoted by a substring of the form
// $name or ${name}, where name is a non-empty sequence of letters,
// digits, and underscores. A purely numeric name like $1 refers to
@@ -727,10 +736,10 @@ func (re *Regexp) FindSubmatch(b []byte) [][]byte {
// capturing parentheses named with the (?P<name>...) syntax. A
// reference to an out of range or unmatched index or a name that is not
// present in the regular expression is replaced with an empty slice.
-//
+//
// In the $name form, name is taken to be as long as possible: $1x is
// equivalent to ${1x}, not ${1}x, and, $10 is equivalent to ${10}, not ${1}0.
-//
+//
// To insert a literal $ in the output, use $$ in the template.
func (re *Regexp) Expand(dst []byte, template []byte, src []byte, match []int) []byte {
return re.expand(dst, string(template), src, "", match)
@@ -766,7 +775,7 @@ func (re *Regexp) expand(dst []byte, template string, bsrc []byte, src string, m
}
template = rest
if num >= 0 {
- if 2*num+1 < len(match) {
+ if 2*num+1 < len(match) && match[2*num] >= 0 {
if bsrc != nil {
dst = append(dst, bsrc[match[2*num]:match[2*num+1]]...)
} else {
@@ -1047,3 +1056,52 @@ func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
}
return result
}
+
+// Split slices s into substrings separated by the expression and returns a slice of
+// the substrings between those expression matches.
+//
+// The slice returned by this method consists of all the substrings of s
+// not contained in the slice returned by FindAllString. When called on an expression
+// that contains no metacharacters, it is equivalent to strings.SplitN.
+//
+// Example:
+// s := regexp.MustCompile("a*").Split("abaabaccadaaae", 5)
+// // s: ["", "b", "b", "c", "cadaaae"]
+//
+// The count determines the number of substrings to return:
+// n > 0: at most n substrings; the last substring will be the unsplit remainder.
+// n == 0: the result is nil (zero substrings)
+// n < 0: all substrings
+func (re *Regexp) Split(s string, n int) []string {
+
+ if n == 0 {
+ return nil
+ }
+
+ if len(re.expr) > 0 && len(s) == 0 {
+ return []string{""}
+ }
+
+ matches := re.FindAllStringIndex(s, n)
+ strings := make([]string, 0, len(matches))
+
+ beg := 0
+ end := 0
+ for _, match := range matches {
+ if n > 0 && len(strings) >= n-1 {
+ break
+ }
+
+ end = match[0]
+ if match[1] != 0 {
+ strings = append(strings, s[beg:end])
+ }
+ beg = match[1]
+ }
+
+ if end != len(s) {
+ strings = append(strings, s[beg:])
+ }
+
+ return strings
+}