diff options
Diffstat (limited to 'src/pkg/exp/regexp/syntax/regexp.go')
-rw-r--r-- | src/pkg/exp/regexp/syntax/regexp.go | 284 |
1 files changed, 0 insertions, 284 deletions
diff --git a/src/pkg/exp/regexp/syntax/regexp.go b/src/pkg/exp/regexp/syntax/regexp.go deleted file mode 100644 index 00a4addef..000000000 --- a/src/pkg/exp/regexp/syntax/regexp.go +++ /dev/null @@ -1,284 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package syntax parses regular expressions into syntax trees. -// WORK IN PROGRESS. -package syntax - -// Note to implementers: -// In this package, re is always a *Regexp and r is always a rune. - -import ( - "bytes" - "strconv" - "strings" - "unicode" -) - -// A Regexp is a node in a regular expression syntax tree. -type Regexp struct { - Op Op // operator - Flags Flags - Sub []*Regexp // subexpressions, if any - Sub0 [1]*Regexp // storage for short Sub - Rune []int // matched runes, for OpLiteral, OpCharClass - Rune0 [2]int // storage for short Rune - Min, Max int // min, max for OpRepeat - Cap int // capturing index, for OpCapture - Name string // capturing name, for OpCapture -} - -// An Op is a single regular expression operator. -type Op uint8 - -// Operators are listed in precedence order, tightest binding to weakest. -// Character class operators are listed simplest to most complex -// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar). - -const ( - OpNoMatch Op = 1 + iota // matches no strings - OpEmptyMatch // matches empty string - OpLiteral // matches Runes sequence - OpCharClass // matches Runes interpreted as range pair list - OpAnyCharNotNL // matches any character - OpAnyChar // matches any character - OpBeginLine // matches empty string at beginning of line - OpEndLine // matches empty string at end of line - OpBeginText // matches empty string at beginning of text - OpEndText // matches empty string at end of text - OpWordBoundary // matches word boundary `\b` - OpNoWordBoundary // matches word non-boundary `\B` - OpCapture // capturing subexpression with index Cap, optional name Name - OpStar // matches Sub[0] zero or more times - OpPlus // matches Sub[0] one or more times - OpQuest // matches Sub[0] zero or one times - OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit) - OpConcat // matches concatenation of Subs - OpAlternate // matches alternation of Subs -) - -const opPseudo Op = 128 // where pseudo-ops start - -// Equal returns true if x and y have identical structure. -func (x *Regexp) Equal(y *Regexp) bool { - if x == nil || y == nil { - return x == y - } - if x.Op != y.Op { - return false - } - switch x.Op { - case OpEndText: - // The parse flags remember whether this is \z or \Z. - if x.Flags&WasDollar != y.Flags&WasDollar { - return false - } - - case OpLiteral, OpCharClass: - if len(x.Rune) != len(y.Rune) { - return false - } - for i, r := range x.Rune { - if r != y.Rune[i] { - return false - } - } - - case OpAlternate, OpConcat: - if len(x.Sub) != len(y.Sub) { - return false - } - for i, sub := range x.Sub { - if !sub.Equal(y.Sub[i]) { - return false - } - } - - case OpStar, OpPlus, OpQuest: - if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - - case OpRepeat: - if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - - case OpCapture: - if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) { - return false - } - } - return true -} - -// writeRegexp writes the Perl syntax for the regular expression re to b. -func writeRegexp(b *bytes.Buffer, re *Regexp) { - switch re.Op { - default: - b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">") - case OpNoMatch: - b.WriteString(`[^\x00-\x{10FFFF}]`) - case OpEmptyMatch: - b.WriteString(`(?:)`) - case OpLiteral: - if re.Flags&FoldCase != 0 { - b.WriteString(`(?i:`) - } - for _, r := range re.Rune { - escape(b, r, false) - } - if re.Flags&FoldCase != 0 { - b.WriteString(`)`) - } - case OpCharClass: - if len(re.Rune)%2 != 0 { - b.WriteString(`[invalid char class]`) - break - } - b.WriteRune('[') - if len(re.Rune) == 0 { - b.WriteString(`^\x00-\x{10FFFF}`) - } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune { - // Contains 0 and MaxRune. Probably a negated class. - // Print the gaps. - b.WriteRune('^') - for i := 1; i < len(re.Rune)-1; i += 2 { - lo, hi := re.Rune[i]+1, re.Rune[i+1]-1 - escape(b, lo, lo == '-') - if lo != hi { - b.WriteRune('-') - escape(b, hi, hi == '-') - } - } - } else { - for i := 0; i < len(re.Rune); i += 2 { - lo, hi := re.Rune[i], re.Rune[i+1] - escape(b, lo, lo == '-') - if lo != hi { - b.WriteRune('-') - escape(b, hi, hi == '-') - } - } - } - b.WriteRune(']') - case OpAnyCharNotNL: - b.WriteString(`[^\n]`) - case OpAnyChar: - b.WriteRune('.') - case OpBeginLine: - b.WriteRune('^') - case OpEndLine: - b.WriteRune('$') - case OpBeginText: - b.WriteString(`\A`) - case OpEndText: - b.WriteString(`\z`) - case OpWordBoundary: - b.WriteString(`\b`) - case OpNoWordBoundary: - b.WriteString(`\B`) - case OpCapture: - if re.Name != "" { - b.WriteString(`(?P<`) - b.WriteString(re.Name) - b.WriteRune('>') - } else { - b.WriteRune('(') - } - if re.Sub[0].Op != OpEmptyMatch { - writeRegexp(b, re.Sub[0]) - } - b.WriteRune(')') - case OpStar, OpPlus, OpQuest, OpRepeat: - if sub := re.Sub[0]; sub.Op > OpCapture { - b.WriteString(`(?:`) - writeRegexp(b, sub) - b.WriteString(`)`) - } else { - writeRegexp(b, sub) - } - switch re.Op { - case OpStar: - b.WriteRune('*') - case OpPlus: - b.WriteRune('+') - case OpQuest: - b.WriteRune('?') - case OpRepeat: - b.WriteRune('{') - b.WriteString(strconv.Itoa(re.Min)) - if re.Max != re.Min { - b.WriteRune(',') - if re.Max >= 0 { - b.WriteString(strconv.Itoa(re.Max)) - } - } - b.WriteRune('}') - } - case OpConcat: - for _, sub := range re.Sub { - if sub.Op == OpAlternate { - b.WriteString(`(?:`) - writeRegexp(b, sub) - b.WriteString(`)`) - } else { - writeRegexp(b, sub) - } - } - case OpAlternate: - for i, sub := range re.Sub { - if i > 0 { - b.WriteRune('|') - } - writeRegexp(b, sub) - } - } -} - -func (re *Regexp) String() string { - var b bytes.Buffer - writeRegexp(&b, re) - return b.String() -} - -const meta = `\.+*?()|[]{}^$` - -func escape(b *bytes.Buffer, r int, force bool) { - if unicode.IsPrint(r) { - if strings.IndexRune(meta, r) >= 0 || force { - b.WriteRune('\\') - } - b.WriteRune(r) - return - } - - switch r { - case '\a': - b.WriteString(`\a`) - case '\f': - b.WriteString(`\f`) - case '\n': - b.WriteString(`\n`) - case '\r': - b.WriteString(`\r`) - case '\t': - b.WriteString(`\t`) - case '\v': - b.WriteString(`\v`) - default: - if r < 0x100 { - b.WriteString(`\x`) - s := strconv.Itob(r, 16) - if len(s) == 1 { - b.WriteRune('0') - } - b.WriteString(s) - break - } - b.WriteString(`\x{`) - b.WriteString(strconv.Itob(r, 16)) - b.WriteString(`}`) - } -} |