diff options
author | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
---|---|---|
committer | Tianon Gravi <admwiggin@gmail.com> | 2015-01-15 11:54:00 -0700 |
commit | f154da9e12608589e8d5f0508f908a0c3e88a1bb (patch) | |
tree | f8255d51e10c6f1e0ed69702200b966c9556a431 /src/pkg/regexp/syntax/prog.go | |
parent | 8d8329ed5dfb9622c82a9fbec6fd99a580f9c9f6 (diff) | |
download | golang-upstream/1.4.tar.gz |
Imported Upstream version 1.4upstream/1.4
Diffstat (limited to 'src/pkg/regexp/syntax/prog.go')
-rw-r--r-- | src/pkg/regexp/syntax/prog.go | 345 |
1 files changed, 0 insertions, 345 deletions
diff --git a/src/pkg/regexp/syntax/prog.go b/src/pkg/regexp/syntax/prog.go deleted file mode 100644 index 29bd282d0..000000000 --- a/src/pkg/regexp/syntax/prog.go +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package syntax - -import ( - "bytes" - "strconv" - "unicode" -) - -// Compiled program. -// May not belong in this package, but convenient for now. - -// A Prog is a compiled regular expression program. -type Prog struct { - Inst []Inst - Start int // index of start instruction - NumCap int // number of InstCapture insts in re -} - -// An InstOp is an instruction opcode. -type InstOp uint8 - -const ( - InstAlt InstOp = iota - InstAltMatch - InstCapture - InstEmptyWidth - InstMatch - InstFail - InstNop - InstRune - InstRune1 - InstRuneAny - InstRuneAnyNotNL -) - -var instOpNames = []string{ - "InstAlt", - "InstAltMatch", - "InstCapture", - "InstEmptyWidth", - "InstMatch", - "InstFail", - "InstNop", - "InstRune", - "InstRune1", - "InstRuneAny", - "InstRuneAnyNotNL", -} - -func (i InstOp) String() string { - if uint(i) >= uint(len(instOpNames)) { - return "" - } - return instOpNames[i] -} - -// An EmptyOp specifies a kind or mixture of zero-width assertions. -type EmptyOp uint8 - -const ( - EmptyBeginLine EmptyOp = 1 << iota - EmptyEndLine - EmptyBeginText - EmptyEndText - EmptyWordBoundary - EmptyNoWordBoundary -) - -// EmptyOpContext returns the zero-width assertions -// satisfied at the position between the runes r1 and r2. -// Passing r1 == -1 indicates that the position is -// at the beginning of the text. -// Passing r2 == -1 indicates that the position is -// at the end of the text. -func EmptyOpContext(r1, r2 rune) EmptyOp { - var op EmptyOp = EmptyNoWordBoundary - var boundary byte - switch { - case IsWordChar(r1): - boundary = 1 - case r1 == '\n': - op |= EmptyBeginLine - case r1 < 0: - op |= EmptyBeginText | EmptyBeginLine - } - switch { - case IsWordChar(r2): - boundary ^= 1 - case r2 == '\n': - op |= EmptyEndLine - case r2 < 0: - op |= EmptyEndText | EmptyEndLine - } - if boundary != 0 { // IsWordChar(r1) != IsWordChar(r2) - op ^= (EmptyWordBoundary | EmptyNoWordBoundary) - } - return op -} - -// IsWordChar reports whether r is consider a ``word character'' -// during the evaluation of the \b and \B zero-width assertions. -// These assertions are ASCII-only: the word characters are [A-Za-z0-9_]. -func IsWordChar(r rune) bool { - return 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' || '0' <= r && r <= '9' || r == '_' -} - -// An Inst is a single instruction in a regular expression program. -type Inst struct { - Op InstOp - Out uint32 // all but InstMatch, InstFail - Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth - Rune []rune -} - -func (p *Prog) String() string { - var b bytes.Buffer - dumpProg(&b, p) - return b.String() -} - -// skipNop follows any no-op or capturing instructions -// and returns the resulting pc. -func (p *Prog) skipNop(pc uint32) (*Inst, uint32) { - i := &p.Inst[pc] - for i.Op == InstNop || i.Op == InstCapture { - pc = i.Out - i = &p.Inst[pc] - } - return i, pc -} - -// op returns i.Op but merges all the Rune special cases into InstRune -func (i *Inst) op() InstOp { - op := i.Op - switch op { - case InstRune1, InstRuneAny, InstRuneAnyNotNL: - op = InstRune - } - return op -} - -// Prefix returns a literal string that all matches for the -// regexp must start with. Complete is true if the prefix -// is the entire match. -func (p *Prog) Prefix() (prefix string, complete bool) { - i, _ := p.skipNop(uint32(p.Start)) - - // Avoid allocation of buffer if prefix is empty. - if i.op() != InstRune || len(i.Rune) != 1 { - return "", i.Op == InstMatch - } - - // Have prefix; gather characters. - var buf bytes.Buffer - for i.op() == InstRune && len(i.Rune) == 1 && Flags(i.Arg)&FoldCase == 0 { - buf.WriteRune(i.Rune[0]) - i, _ = p.skipNop(i.Out) - } - return buf.String(), i.Op == InstMatch -} - -// StartCond returns the leading empty-width conditions that must -// be true in any match. It returns ^EmptyOp(0) if no matches are possible. -func (p *Prog) StartCond() EmptyOp { - var flag EmptyOp - pc := uint32(p.Start) - i := &p.Inst[pc] -Loop: - for { - switch i.Op { - case InstEmptyWidth: - flag |= EmptyOp(i.Arg) - case InstFail: - return ^EmptyOp(0) - case InstCapture, InstNop: - // skip - default: - break Loop - } - pc = i.Out - i = &p.Inst[pc] - } - return flag -} - -const noMatch = -1 - -// MatchRune returns true if the instruction matches (and consumes) r. -// It should only be called when i.Op == InstRune. -func (i *Inst) MatchRune(r rune) bool { - return i.MatchRunePos(r) != noMatch -} - -// MatchRunePos checks whether the instruction matches (and consumes) r. -// If so, MatchRunePos returns the index of the matching rune pair -// (or, when len(i.Rune) == 1, rune singleton). -// If not, MatchRunePos returns -1. -// MatchRunePos should only be called when i.Op == InstRune. -func (i *Inst) MatchRunePos(r rune) int { - rune := i.Rune - - // Special case: single-rune slice is from literal string, not char class. - if len(rune) == 1 { - r0 := rune[0] - if r == r0 { - return 0 - } - if Flags(i.Arg)&FoldCase != 0 { - for r1 := unicode.SimpleFold(r0); r1 != r0; r1 = unicode.SimpleFold(r1) { - if r == r1 { - return 0 - } - } - } - return noMatch - } - - // Peek at the first few pairs. - // Should handle ASCII well. - for j := 0; j < len(rune) && j <= 8; j += 2 { - if r < rune[j] { - return noMatch - } - if r <= rune[j+1] { - return j / 2 - } - } - - // Otherwise binary search. - lo := 0 - hi := len(rune) / 2 - for lo < hi { - m := lo + (hi-lo)/2 - if c := rune[2*m]; c <= r { - if r <= rune[2*m+1] { - return m - } - lo = m + 1 - } else { - hi = m - } - } - return noMatch -} - -// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char. -// Since we act on runes, it would be easy to support Unicode here. -func wordRune(r rune) bool { - return r == '_' || - ('A' <= r && r <= 'Z') || - ('a' <= r && r <= 'z') || - ('0' <= r && r <= '9') -} - -// MatchEmptyWidth returns true if the instruction matches -// an empty string between the runes before and after. -// It should only be called when i.Op == InstEmptyWidth. -func (i *Inst) MatchEmptyWidth(before rune, after rune) bool { - switch EmptyOp(i.Arg) { - case EmptyBeginLine: - return before == '\n' || before == -1 - case EmptyEndLine: - return after == '\n' || after == -1 - case EmptyBeginText: - return before == -1 - case EmptyEndText: - return after == -1 - case EmptyWordBoundary: - return wordRune(before) != wordRune(after) - case EmptyNoWordBoundary: - return wordRune(before) == wordRune(after) - } - panic("unknown empty width arg") -} - -func (i *Inst) String() string { - var b bytes.Buffer - dumpInst(&b, i) - return b.String() -} - -func bw(b *bytes.Buffer, args ...string) { - for _, s := range args { - b.WriteString(s) - } -} - -func dumpProg(b *bytes.Buffer, p *Prog) { - for j := range p.Inst { - i := &p.Inst[j] - pc := strconv.Itoa(j) - if len(pc) < 3 { - b.WriteString(" "[len(pc):]) - } - if j == p.Start { - pc += "*" - } - bw(b, pc, "\t") - dumpInst(b, i) - bw(b, "\n") - } -} - -func u32(i uint32) string { - return strconv.FormatUint(uint64(i), 10) -} - -func dumpInst(b *bytes.Buffer, i *Inst) { - switch i.Op { - case InstAlt: - bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg)) - case InstAltMatch: - bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg)) - case InstCapture: - bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out)) - case InstEmptyWidth: - bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out)) - case InstMatch: - bw(b, "match") - case InstFail: - bw(b, "fail") - case InstNop: - bw(b, "nop -> ", u32(i.Out)) - case InstRune: - if i.Rune == nil { - // shouldn't happen - bw(b, "rune <nil>") - } - bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune))) - if Flags(i.Arg)&FoldCase != 0 { - bw(b, "/i") - } - bw(b, " -> ", u32(i.Out)) - case InstRune1: - bw(b, "rune1 ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out)) - case InstRuneAny: - bw(b, "any -> ", u32(i.Out)) - case InstRuneAnyNotNL: - bw(b, "anynotnl -> ", u32(i.Out)) - } -} |