summaryrefslogtreecommitdiff
path: root/src/pkg/regexp/syntax/regexp.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/regexp/syntax/regexp.go')
-rw-r--r--src/pkg/regexp/syntax/regexp.go319
1 files changed, 0 insertions, 319 deletions
diff --git a/src/pkg/regexp/syntax/regexp.go b/src/pkg/regexp/syntax/regexp.go
deleted file mode 100644
index 329a90e01..000000000
--- a/src/pkg/regexp/syntax/regexp.go
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package syntax
-
-// Note to implementers:
-// In this package, re is always a *Regexp and r is always a rune.
-
-import (
- "bytes"
- "strconv"
- "strings"
- "unicode"
-)
-
-// A Regexp is a node in a regular expression syntax tree.
-type Regexp struct {
- Op Op // operator
- Flags Flags
- Sub []*Regexp // subexpressions, if any
- Sub0 [1]*Regexp // storage for short Sub
- Rune []rune // matched runes, for OpLiteral, OpCharClass
- Rune0 [2]rune // storage for short Rune
- Min, Max int // min, max for OpRepeat
- Cap int // capturing index, for OpCapture
- Name string // capturing name, for OpCapture
-}
-
-// An Op is a single regular expression operator.
-type Op uint8
-
-// Operators are listed in precedence order, tightest binding to weakest.
-// Character class operators are listed simplest to most complex
-// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
-
-const (
- OpNoMatch Op = 1 + iota // matches no strings
- OpEmptyMatch // matches empty string
- OpLiteral // matches Runes sequence
- OpCharClass // matches Runes interpreted as range pair list
- OpAnyCharNotNL // matches any character
- OpAnyChar // matches any character
- OpBeginLine // matches empty string at beginning of line
- OpEndLine // matches empty string at end of line
- OpBeginText // matches empty string at beginning of text
- OpEndText // matches empty string at end of text
- OpWordBoundary // matches word boundary `\b`
- OpNoWordBoundary // matches word non-boundary `\B`
- OpCapture // capturing subexpression with index Cap, optional name Name
- OpStar // matches Sub[0] zero or more times
- OpPlus // matches Sub[0] one or more times
- OpQuest // matches Sub[0] zero or one times
- OpRepeat // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
- OpConcat // matches concatenation of Subs
- OpAlternate // matches alternation of Subs
-)
-
-const opPseudo Op = 128 // where pseudo-ops start
-
-// Equal returns true if x and y have identical structure.
-func (x *Regexp) Equal(y *Regexp) bool {
- if x == nil || y == nil {
- return x == y
- }
- if x.Op != y.Op {
- return false
- }
- switch x.Op {
- case OpEndText:
- // The parse flags remember whether this is \z or \Z.
- if x.Flags&WasDollar != y.Flags&WasDollar {
- return false
- }
-
- case OpLiteral, OpCharClass:
- if len(x.Rune) != len(y.Rune) {
- return false
- }
- for i, r := range x.Rune {
- if r != y.Rune[i] {
- return false
- }
- }
-
- case OpAlternate, OpConcat:
- if len(x.Sub) != len(y.Sub) {
- return false
- }
- for i, sub := range x.Sub {
- if !sub.Equal(y.Sub[i]) {
- return false
- }
- }
-
- case OpStar, OpPlus, OpQuest:
- if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
- return false
- }
-
- case OpRepeat:
- if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
- return false
- }
-
- case OpCapture:
- if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
- return false
- }
- }
- return true
-}
-
-// writeRegexp writes the Perl syntax for the regular expression re to b.
-func writeRegexp(b *bytes.Buffer, re *Regexp) {
- switch re.Op {
- default:
- b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
- case OpNoMatch:
- b.WriteString(`[^\x00-\x{10FFFF}]`)
- case OpEmptyMatch:
- b.WriteString(`(?:)`)
- case OpLiteral:
- if re.Flags&FoldCase != 0 {
- b.WriteString(`(?i:`)
- }
- for _, r := range re.Rune {
- escape(b, r, false)
- }
- if re.Flags&FoldCase != 0 {
- b.WriteString(`)`)
- }
- case OpCharClass:
- if len(re.Rune)%2 != 0 {
- b.WriteString(`[invalid char class]`)
- break
- }
- b.WriteRune('[')
- if len(re.Rune) == 0 {
- b.WriteString(`^\x00-\x{10FFFF}`)
- } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
- // Contains 0 and MaxRune. Probably a negated class.
- // Print the gaps.
- b.WriteRune('^')
- for i := 1; i < len(re.Rune)-1; i += 2 {
- lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
- escape(b, lo, lo == '-')
- if lo != hi {
- b.WriteRune('-')
- escape(b, hi, hi == '-')
- }
- }
- } else {
- for i := 0; i < len(re.Rune); i += 2 {
- lo, hi := re.Rune[i], re.Rune[i+1]
- escape(b, lo, lo == '-')
- if lo != hi {
- b.WriteRune('-')
- escape(b, hi, hi == '-')
- }
- }
- }
- b.WriteRune(']')
- case OpAnyCharNotNL:
- b.WriteString(`(?-s:.)`)
- case OpAnyChar:
- b.WriteString(`(?s:.)`)
- case OpBeginLine:
- b.WriteRune('^')
- case OpEndLine:
- b.WriteRune('$')
- case OpBeginText:
- b.WriteString(`\A`)
- case OpEndText:
- if re.Flags&WasDollar != 0 {
- b.WriteString(`(?-m:$)`)
- } else {
- b.WriteString(`\z`)
- }
- case OpWordBoundary:
- b.WriteString(`\b`)
- case OpNoWordBoundary:
- b.WriteString(`\B`)
- case OpCapture:
- if re.Name != "" {
- b.WriteString(`(?P<`)
- b.WriteString(re.Name)
- b.WriteRune('>')
- } else {
- b.WriteRune('(')
- }
- if re.Sub[0].Op != OpEmptyMatch {
- writeRegexp(b, re.Sub[0])
- }
- b.WriteRune(')')
- case OpStar, OpPlus, OpQuest, OpRepeat:
- if sub := re.Sub[0]; sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
- b.WriteString(`(?:`)
- writeRegexp(b, sub)
- b.WriteString(`)`)
- } else {
- writeRegexp(b, sub)
- }
- switch re.Op {
- case OpStar:
- b.WriteRune('*')
- case OpPlus:
- b.WriteRune('+')
- case OpQuest:
- b.WriteRune('?')
- case OpRepeat:
- b.WriteRune('{')
- b.WriteString(strconv.Itoa(re.Min))
- if re.Max != re.Min {
- b.WriteRune(',')
- if re.Max >= 0 {
- b.WriteString(strconv.Itoa(re.Max))
- }
- }
- b.WriteRune('}')
- }
- if re.Flags&NonGreedy != 0 {
- b.WriteRune('?')
- }
- case OpConcat:
- for _, sub := range re.Sub {
- if sub.Op == OpAlternate {
- b.WriteString(`(?:`)
- writeRegexp(b, sub)
- b.WriteString(`)`)
- } else {
- writeRegexp(b, sub)
- }
- }
- case OpAlternate:
- for i, sub := range re.Sub {
- if i > 0 {
- b.WriteRune('|')
- }
- writeRegexp(b, sub)
- }
- }
-}
-
-func (re *Regexp) String() string {
- var b bytes.Buffer
- writeRegexp(&b, re)
- return b.String()
-}
-
-const meta = `\.+*?()|[]{}^$`
-
-func escape(b *bytes.Buffer, r rune, force bool) {
- if unicode.IsPrint(r) {
- if strings.IndexRune(meta, r) >= 0 || force {
- b.WriteRune('\\')
- }
- b.WriteRune(r)
- return
- }
-
- switch r {
- case '\a':
- b.WriteString(`\a`)
- case '\f':
- b.WriteString(`\f`)
- case '\n':
- b.WriteString(`\n`)
- case '\r':
- b.WriteString(`\r`)
- case '\t':
- b.WriteString(`\t`)
- case '\v':
- b.WriteString(`\v`)
- default:
- if r < 0x100 {
- b.WriteString(`\x`)
- s := strconv.FormatInt(int64(r), 16)
- if len(s) == 1 {
- b.WriteRune('0')
- }
- b.WriteString(s)
- break
- }
- b.WriteString(`\x{`)
- b.WriteString(strconv.FormatInt(int64(r), 16))
- b.WriteString(`}`)
- }
-}
-
-// MaxCap walks the regexp to find the maximum capture index.
-func (re *Regexp) MaxCap() int {
- m := 0
- if re.Op == OpCapture {
- m = re.Cap
- }
- for _, sub := range re.Sub {
- if n := sub.MaxCap(); m < n {
- m = n
- }
- }
- return m
-}
-
-// CapNames walks the regexp to find the names of capturing groups.
-func (re *Regexp) CapNames() []string {
- names := make([]string, re.MaxCap()+1)
- re.capNames(names)
- return names
-}
-
-func (re *Regexp) capNames(names []string) {
- if re.Op == OpCapture {
- names[re.Cap] = re.Name
- }
- for _, sub := range re.Sub {
- sub.capNames(names)
- }
-}