summaryrefslogtreecommitdiff
path: root/src/pkg/exp/regexp/syntax/prog.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/exp/regexp/syntax/prog.go')
-rw-r--r--src/pkg/exp/regexp/syntax/prog.go182
1 files changed, 0 insertions, 182 deletions
diff --git a/src/pkg/exp/regexp/syntax/prog.go b/src/pkg/exp/regexp/syntax/prog.go
deleted file mode 100644
index 6eeb3da0c..000000000
--- a/src/pkg/exp/regexp/syntax/prog.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package syntax
-
-import (
- "bytes"
- "strconv"
-)
-
-// Compiled program.
-// May not belong in this package, but convenient for now.
-
-// A Prog is a compiled regular expression program.
-type Prog struct {
- Inst []Inst
- Start int // index of start instruction
-}
-
-// An InstOp is an instruction opcode.
-type InstOp uint8
-
-const (
- InstAlt InstOp = iota
- InstAltMatch
- InstCapture
- InstEmptyWidth
- InstMatch
- InstFail
- InstNop
- InstRune
-)
-
-// An EmptyOp specifies a kind or mixture of zero-width assertions.
-type EmptyOp uint8
-
-const (
- EmptyBeginLine EmptyOp = 1 << iota
- EmptyEndLine
- EmptyBeginText
- EmptyEndText
- EmptyWordBoundary
- EmptyNoWordBoundary
-)
-
-// An Inst is a single instruction in a regular expression program.
-type Inst struct {
- Op InstOp
- Out uint32 // all but InstMatch, InstFail
- Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
- Rune []int
-}
-
-func (p *Prog) String() string {
- var b bytes.Buffer
- dumpProg(&b, p)
- return b.String()
-}
-
-// MatchRune returns true if the instruction matches (and consumes) r.
-// It should only be called when i.Op == InstRune.
-func (i *Inst) MatchRune(r int) bool {
- rune := i.Rune
-
- // Special case: single-rune slice is from literal string, not char class.
- // TODO: Case folding.
- if len(rune) == 1 {
- return r == rune[0]
- }
-
- // Peek at the first few pairs.
- // Should handle ASCII well.
- for j := 0; j < len(rune) && j <= 8; j += 2 {
- if r < rune[j] {
- return false
- }
- if r <= rune[j+1] {
- return true
- }
- }
-
- // Otherwise binary search.
- lo := 0
- hi := len(rune) / 2
- for lo < hi {
- m := lo + (hi-lo)/2
- if c := rune[2*m]; c <= r {
- if r <= rune[2*m+1] {
- return true
- }
- lo = m + 1
- } else {
- hi = m
- }
- }
- return false
-}
-
-// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
-// Since we act on runes, it would be easy to support Unicode here.
-func wordRune(rune int) bool {
- return rune == '_' ||
- ('A' <= rune && rune <= 'Z') ||
- ('a' <= rune && rune <= 'z') ||
- ('0' <= rune && rune <= '9')
-}
-
-// MatchEmptyWidth returns true if the instruction matches
-// an empty string between the runes before and after.
-// It should only be called when i.Op == InstEmptyWidth.
-func (i *Inst) MatchEmptyWidth(before int, after int) bool {
- switch EmptyOp(i.Arg) {
- case EmptyBeginLine:
- return before == '\n' || before == -1
- case EmptyEndLine:
- return after == '\n' || after == -1
- case EmptyBeginText:
- return before == -1
- case EmptyEndText:
- return after == -1
- case EmptyWordBoundary:
- return wordRune(before) != wordRune(after)
- case EmptyNoWordBoundary:
- return wordRune(before) == wordRune(after)
- }
- panic("unknown empty width arg")
-}
-
-
-func (i *Inst) String() string {
- var b bytes.Buffer
- dumpInst(&b, i)
- return b.String()
-}
-
-func bw(b *bytes.Buffer, args ...string) {
- for _, s := range args {
- b.WriteString(s)
- }
-}
-
-func dumpProg(b *bytes.Buffer, p *Prog) {
- for j := range p.Inst {
- i := &p.Inst[j]
- pc := strconv.Itoa(j)
- if len(pc) < 3 {
- b.WriteString(" "[len(pc):])
- }
- if j == p.Start {
- pc += "*"
- }
- bw(b, pc, "\t")
- dumpInst(b, i)
- bw(b, "\n")
- }
-}
-
-func u32(i uint32) string {
- return strconv.Uitoa64(uint64(i))
-}
-
-func dumpInst(b *bytes.Buffer, i *Inst) {
- switch i.Op {
- case InstAlt:
- bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
- case InstAltMatch:
- bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
- case InstCapture:
- bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
- case InstEmptyWidth:
- bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
- case InstMatch:
- bw(b, "match")
- case InstFail:
- bw(b, "fail")
- case InstNop:
- bw(b, "nop -> ", u32(i.Out))
- case InstRune:
- if i.Rune == nil {
- // shouldn't happen
- bw(b, "rune <nil>")
- }
- bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
- }
-}