diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/regexp/syntax/regexp.go | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/pkg/regexp/syntax/regexp.go')
-rw-r--r-- | src/pkg/regexp/syntax/regexp.go | 319 |
1 files changed, 0 insertions, 319 deletions
diff --git a/src/pkg/regexp/syntax/regexp.go b/src/pkg/regexp/syntax/regexp.go deleted file mode 100644 index 329a90e01..000000000 --- a/src/pkg/regexp/syntax/regexp.go +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -// Note to implementers: -// In this package, re is always a *Regexp and r is always a rune. - -import ( - "bytes" - "strconv" - "strings" - "unicode" -) - -// A Regexp is a node in a regular expression syntax tree. -type Regexp struct { - Op Op // operator - Flags Flags - Sub []*Regexp // subexpressions, if any - Sub0 [1]*Regexp // storage for short Sub - Rune []rune // matched runes, for OpLiteral, OpCharClass - Rune0 [2]rune // storage for short Rune - Min, Max int // min, max for OpRepeat - Cap int // capturing index, for OpCapture - Name string // capturing name, for OpCapture -} - -// An Op is a single regular expression operator. -type Op uint8 - -// Operators are listed in precedence order, tightest binding to weakest. -// Character class operators are listed simplest to most complex -// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar). - -const ( - OpNoMatch Op = 1 + iota // matches no strings - OpEmptyMatch // matches empty string - OpLiteral // matches Runes sequence - OpCharClass // matches Runes interpreted as range pair list - OpAnyCharNotNL // matches any character - OpAnyChar // matches any character - OpBeginLine // matches empty string at beginning of line - OpEndLine // matches empty string at end of line - OpBeginText // matches empty string at beginning of text - OpEndText // matches empty string at end of text - OpWordBoundary // matches word boundary `\b` - OpNoWordBoundary // matches word non-boundary `\B` - OpCapture // capturing subexpression with index Cap, optional name Name - OpStar // matches Sub[0] zero or more times - OpPlus // matches Sub[0] one or more times - OpQuest // matches Sub[0] zero or one times - OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit) - OpConcat // matches concatenation of Subs - OpAlternate // matches alternation of Subs -) - -const opPseudo Op = 128 // where pseudo-ops start - -// Equal returns true if x and y have identical structure. -func (x *Regexp) Equal(y *Regexp) bool { - if x == nil || y == nil { - return x == y - } - if x.Op != y.Op { - return false - } - switch x.Op { - case OpEndText: - // The parse flags remember whether this is \z or \Z. - if x.Flags&WasDollar != y.Flags&WasDollar { - return false - } - - case OpLiteral, OpCharClass: - if len(x.Rune) != len(y.Rune) { - return false - } - for i, r := range x.Rune { - if r != y.Rune[i] { - return false - } - } - - case OpAlternate, OpConcat: - if len(x.Sub) != len(y.Sub) { - return false - } - for i, sub := range x.Sub { - if !sub.Equal(y.Sub[i]) { - return false - } - } - - case OpStar, OpPlus, OpQuest: - if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - - case OpRepeat: - if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - - case OpCapture: - if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - } - return true -} - -// writeRegexp writes the Perl syntax for the regular expression re to b. -func writeRegexp(b *bytes.Buffer, re *Regexp) { - switch re.Op { - default: - b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">") - case OpNoMatch: - b.WriteString(`[^\x00-\x{10FFFF}]`) - case OpEmptyMatch: - b.WriteString(`(?:)`) - case OpLiteral: - if re.Flags&FoldCase != 0 { - b.WriteString(`(?i:`) - } - for _, r := range re.Rune { - escape(b, r, false) - } - if re.Flags&FoldCase != 0 { - b.WriteString(`)`) - } - case OpCharClass: - if len(re.Rune)%2 != 0 { - b.WriteString(`[invalid char class]`) - break - } - b.WriteRune('[') - if len(re.Rune) == 0 { - b.WriteString(`^\x00-\x{10FFFF}`) - } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune { - // Contains 0 and MaxRune. Probably a negated class. - // Print the gaps. - b.WriteRune('^') - for i := 1; i < len(re.Rune)-1; i += 2 { - lo, hi := re.Rune[i]+1, re.Rune[i+1]-1 - escape(b, lo, lo == '-') - if lo != hi { - b.WriteRune('-') - escape(b, hi, hi == '-') - } - } - } else { - for i := 0; i < len(re.Rune); i += 2 { - lo, hi := re.Rune[i], re.Rune[i+1] - escape(b, lo, lo == '-') - if lo != hi { - b.WriteRune('-') - escape(b, hi, hi == '-') - } - } - } - b.WriteRune(']') - case OpAnyCharNotNL: - b.WriteString(`(?-s:.)`) - case OpAnyChar: - b.WriteString(`(?s:.)`) - case OpBeginLine: - b.WriteRune('^') - case OpEndLine: - b.WriteRune('$') - case OpBeginText: - b.WriteString(`\A`) - case OpEndText: - if re.Flags&WasDollar != 0 { - b.WriteString(`(?-m:$)`) - } else { - b.WriteString(`\z`) - } - case OpWordBoundary: - b.WriteString(`\b`) - case OpNoWordBoundary: - b.WriteString(`\B`) - case OpCapture: - if re.Name != "" { - b.WriteString(`(?P<`) - b.WriteString(re.Name) - b.WriteRune('>') - } else { - b.WriteRune('(') - } - if re.Sub[0].Op != OpEmptyMatch { - writeRegexp(b, re.Sub[0]) - } - b.WriteRune(')') - case OpStar, OpPlus, OpQuest, OpRepeat: - if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 { - b.WriteString(`(?:`) - writeRegexp(b, sub) - b.WriteString(`)`) - } else { - writeRegexp(b, sub) - } - switch re.Op { - case OpStar: - b.WriteRune('*') - case OpPlus: - b.WriteRune('+') - case OpQuest: - b.WriteRune('?') - case OpRepeat: - b.WriteRune('{') - b.WriteString(strconv.Itoa(re.Min)) - if re.Max != re.Min { - b.WriteRune(',') - if re.Max >= 0 { - b.WriteString(strconv.Itoa(re.Max)) - } - } - b.WriteRune('}') - } - if re.Flags&NonGreedy != 0 { - b.WriteRune('?') - } - case OpConcat: - for _, sub := range re.Sub { - if sub.Op == OpAlternate { - b.WriteString(`(?:`) - writeRegexp(b, sub) - b.WriteString(`)`) - } else { - writeRegexp(b, sub) - } - } - case OpAlternate: - for i, sub := range re.Sub { - if i > 0 { - b.WriteRune('|') - } - writeRegexp(b, sub) - } - } -} - -func (re *Regexp) String() string { - var b bytes.Buffer - writeRegexp(&b, re) - return b.String() -} - -const meta = `\.+*?()|[]{}^$` - -func escape(b *bytes.Buffer, r rune, force bool) { - if unicode.IsPrint(r) { - if strings.IndexRune(meta, r) >= 0 || force { - b.WriteRune('\\') - } - b.WriteRune(r) - return - } - - switch r { - case '\a': - b.WriteString(`\a`) - case '\f': - b.WriteString(`\f`) - case '\n': - b.WriteString(`\n`) - case '\r': - b.WriteString(`\r`) - case '\t': - b.WriteString(`\t`) - case '\v': - b.WriteString(`\v`) - default: - if r < 0x100 { - b.WriteString(`\x`) - s := strconv.FormatInt(int64(r), 16) - if len(s) == 1 { - b.WriteRune('0') - } - b.WriteString(s) - break - } - b.WriteString(`\x{`) - b.WriteString(strconv.FormatInt(int64(r), 16)) - b.WriteString(`}`) - } -} - -// MaxCap walks the regexp to find the maximum capture index. -func (re *Regexp) MaxCap() int { - m := 0 - if re.Op == OpCapture { - m = re.Cap - } - for _, sub := range re.Sub { - if n := sub.MaxCap(); m < n { - m = n - } - } - return m -} - -// CapNames walks the regexp to find the names of capturing groups. -func (re *Regexp) CapNames() []string { - names := make([]string, re.MaxCap()+1) - re.capNames(names) - return names -} - -func (re *Regexp) capNames(names []string) { - if re.Op == OpCapture { - names[re.Cap] = re.Name - } - for _, sub := range re.Sub { - sub.capNames(names) - } -} |