summaryrefslogtreecommitdiff
path: root/src/pkg/exp
diff options
context:
space:
mode:
Diffstat (limited to 'src/pkg/exp')
-rw-r--r--src/pkg/exp/ogle/cmd.go2
-rw-r--r--src/pkg/exp/regexp/syntax/Makefile3
-rw-r--r--src/pkg/exp/regexp/syntax/compile.go264
-rw-r--r--src/pkg/exp/regexp/syntax/parse.go754
-rw-r--r--src/pkg/exp/regexp/syntax/parse_test.go310
-rw-r--r--src/pkg/exp/regexp/syntax/prog.go182
-rw-r--r--src/pkg/exp/regexp/syntax/prog_test.go91
-rw-r--r--src/pkg/exp/regexp/syntax/regexp.go78
-rw-r--r--src/pkg/exp/regexp/syntax/simplify.go151
-rw-r--r--src/pkg/exp/regexp/syntax/simplify_test.go151
-rw-r--r--src/pkg/exp/template/Makefile5
-rw-r--r--src/pkg/exp/template/exec.go508
-rw-r--r--src/pkg/exp/template/exec_test.go342
-rw-r--r--src/pkg/exp/template/funcs.go294
-rw-r--r--src/pkg/exp/template/lex.go180
-rw-r--r--src/pkg/exp/template/lex_test.go44
-rw-r--r--src/pkg/exp/template/parse.go467
-rw-r--r--src/pkg/exp/template/parse_test.go127
-rw-r--r--src/pkg/exp/template/set.go115
-rw-r--r--src/pkg/exp/template/set_test.go101
20 files changed, 3739 insertions, 430 deletions
diff --git a/src/pkg/exp/ogle/cmd.go b/src/pkg/exp/ogle/cmd.go
index a8db523ea..ff0d24c69 100644
--- a/src/pkg/exp/ogle/cmd.go
+++ b/src/pkg/exp/ogle/cmd.go
@@ -154,7 +154,7 @@ func cmdLoad(args []byte) os.Error {
}
println("Attached to", pid)
} else {
- parts := strings.Split(path, " ", -1)
+ parts := strings.Split(path, " ")
if len(parts) == 0 {
fname = ""
} else {
diff --git a/src/pkg/exp/regexp/syntax/Makefile b/src/pkg/exp/regexp/syntax/Makefile
index 8e0b4c1e6..97d4ad6ca 100644
--- a/src/pkg/exp/regexp/syntax/Makefile
+++ b/src/pkg/exp/regexp/syntax/Makefile
@@ -6,8 +6,11 @@ include ../../../../Make.inc
TARG=exp/regexp/syntax
GOFILES=\
+ compile.go\
parse.go\
perl_groups.go\
+ prog.go\
regexp.go\
+ simplify.go\
include ../../../../Make.pkg
diff --git a/src/pkg/exp/regexp/syntax/compile.go b/src/pkg/exp/regexp/syntax/compile.go
new file mode 100644
index 000000000..ec9556fde
--- /dev/null
+++ b/src/pkg/exp/regexp/syntax/compile.go
@@ -0,0 +1,264 @@
+package syntax
+
+import (
+ "os"
+ "unicode"
+)
+
+// A patchList is a list of instruction pointers that need to be filled in (patched).
+// Because the pointers haven't been filled in yet, we can reuse their storage
+// to hold the list. It's kind of sleazy, but works well in practice.
+// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration.
+//
+// These aren't really pointers: they're integers, so we can reinterpret them
+// this way without using package unsafe. A value l denotes
+// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).
+// l == 0 denotes the empty list, okay because we start every program
+// with a fail instruction, so we'll never want to point at its output link.
+type patchList uint32
+
+func (l patchList) next(p *Prog) patchList {
+ i := &p.Inst[l>>1]
+ if l&1 == 0 {
+ return patchList(i.Out)
+ }
+ return patchList(i.Arg)
+}
+
+func (l patchList) patch(p *Prog, val uint32) {
+ for l != 0 {
+ i := &p.Inst[l>>1]
+ if l&1 == 0 {
+ l = patchList(i.Out)
+ i.Out = val
+ } else {
+ l = patchList(i.Arg)
+ i.Arg = val
+ }
+ }
+}
+
+func (l1 patchList) append(p *Prog, l2 patchList) patchList {
+ if l1 == 0 {
+ return l2
+ }
+ if l2 == 0 {
+ return l1
+ }
+
+ last := l1
+ for {
+ next := last.next(p)
+ if next == 0 {
+ break
+ }
+ last = next
+ }
+
+ i := &p.Inst[last>>1]
+ if last&1 == 0 {
+ i.Out = uint32(l2)
+ } else {
+ i.Arg = uint32(l2)
+ }
+ return l1
+}
+
+// A frag represents a compiled program fragment.
+type frag struct {
+ i uint32 // index of first instruction
+ out patchList // where to record end instruction
+}
+
+type compiler struct {
+ p *Prog
+}
+
+// Compile compiles the regexp into a program to be executed.
+func Compile(re *Regexp) (*Prog, os.Error) {
+ var c compiler
+ c.init()
+ f := c.compile(re)
+ f.out.patch(c.p, c.inst(InstMatch).i)
+ c.p.Start = int(f.i)
+ return c.p, nil
+}
+
+func (c *compiler) init() {
+ c.p = new(Prog)
+ c.inst(InstFail)
+}
+
+var anyRuneNotNL = []int{0, '\n' - 1, '\n' - 1, unicode.MaxRune}
+var anyRune = []int{0, unicode.MaxRune}
+
+func (c *compiler) compile(re *Regexp) frag {
+ switch re.Op {
+ case OpNoMatch:
+ return c.fail()
+ case OpEmptyMatch:
+ return c.nop()
+ case OpLiteral:
+ if len(re.Rune) == 0 {
+ return c.nop()
+ }
+ var f frag
+ for j := range re.Rune {
+ f1 := c.rune(re.Rune[j : j+1])
+ if j == 0 {
+ f = f1
+ } else {
+ f = c.cat(f, f1)
+ }
+ }
+ return f
+ case OpCharClass:
+ return c.rune(re.Rune)
+ case OpAnyCharNotNL:
+ return c.rune(anyRuneNotNL)
+ case OpAnyChar:
+ return c.rune(anyRune)
+ case OpBeginLine:
+ return c.empty(EmptyBeginLine)
+ case OpEndLine:
+ return c.empty(EmptyEndLine)
+ case OpBeginText:
+ return c.empty(EmptyBeginText)
+ case OpEndText:
+ return c.empty(EmptyEndText)
+ case OpWordBoundary:
+ return c.empty(EmptyWordBoundary)
+ case OpNoWordBoundary:
+ return c.empty(EmptyNoWordBoundary)
+ case OpCapture:
+ bra := c.cap(uint32(re.Cap << 1))
+ sub := c.compile(re.Sub[0])
+ ket := c.cap(uint32(re.Cap<<1 | 1))
+ return c.cat(c.cat(bra, sub), ket)
+ case OpStar:
+ return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
+ case OpPlus:
+ return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
+ case OpQuest:
+ return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0)
+ case OpConcat:
+ if len(re.Sub) == 0 {
+ return c.nop()
+ }
+ var f frag
+ for i, sub := range re.Sub {
+ if i == 0 {
+ f = c.compile(sub)
+ } else {
+ f = c.cat(f, c.compile(sub))
+ }
+ }
+ return f
+ case OpAlternate:
+ var f frag
+ for _, sub := range re.Sub {
+ f = c.alt(f, c.compile(sub))
+ }
+ return f
+ }
+ panic("regexp: unhandled case in compile")
+}
+
+func (c *compiler) inst(op InstOp) frag {
+ // TODO: impose length limit
+ f := frag{i: uint32(len(c.p.Inst))}
+ c.p.Inst = append(c.p.Inst, Inst{Op: op})
+ return f
+}
+
+func (c *compiler) nop() frag {
+ f := c.inst(InstNop)
+ f.out = patchList(f.i << 1)
+ return f
+}
+
+func (c *compiler) fail() frag {
+ return frag{}
+}
+
+func (c *compiler) cap(arg uint32) frag {
+ f := c.inst(InstCapture)
+ f.out = patchList(f.i << 1)
+ c.p.Inst[f.i].Arg = arg
+ return f
+}
+
+func (c *compiler) cat(f1, f2 frag) frag {
+ // concat of failure is failure
+ if f1.i == 0 || f2.i == 0 {
+ return frag{}
+ }
+
+ // TODO: elide nop
+
+ f1.out.patch(c.p, f2.i)
+ return frag{f1.i, f2.out}
+}
+
+func (c *compiler) alt(f1, f2 frag) frag {
+ // alt of failure is other
+ if f1.i == 0 {
+ return f2
+ }
+ if f2.i == 0 {
+ return f1
+ }
+
+ f := c.inst(InstAlt)
+ i := &c.p.Inst[f.i]
+ i.Out = f1.i
+ i.Arg = f2.i
+ f.out = f1.out.append(c.p, f2.out)
+ return f
+}
+
+func (c *compiler) quest(f1 frag, nongreedy bool) frag {
+ f := c.inst(InstAlt)
+ i := &c.p.Inst[f.i]
+ if nongreedy {
+ i.Arg = f1.i
+ f.out = patchList(f.i << 1)
+ } else {
+ i.Out = f1.i
+ f.out = patchList(f.i<<1 | 1)
+ }
+ f.out = f.out.append(c.p, f1.out)
+ return f
+}
+
+func (c *compiler) star(f1 frag, nongreedy bool) frag {
+ f := c.inst(InstAlt)
+ i := &c.p.Inst[f.i]
+ if nongreedy {
+ i.Arg = f1.i
+ f.out = patchList(f.i << 1)
+ } else {
+ i.Out = f1.i
+ f.out = patchList(f.i<<1 | 1)
+ }
+ f1.out.patch(c.p, f.i)
+ return f
+}
+
+func (c *compiler) plus(f1 frag, nongreedy bool) frag {
+ return frag{f1.i, c.star(f1, nongreedy).out}
+}
+
+func (c *compiler) empty(op EmptyOp) frag {
+ f := c.inst(InstEmptyWidth)
+ c.p.Inst[f.i].Arg = uint32(op)
+ f.out = patchList(f.i << 1)
+ return f
+}
+
+func (c *compiler) rune(rune []int) frag {
+ f := c.inst(InstRune)
+ c.p.Inst[f.i].Rune = rune
+ f.out = patchList(f.i << 1)
+ return f
+}
diff --git a/src/pkg/exp/regexp/syntax/parse.go b/src/pkg/exp/regexp/syntax/parse.go
index d04f25097..b6c91f7e1 100644
--- a/src/pkg/exp/regexp/syntax/parse.go
+++ b/src/pkg/exp/regexp/syntax/parse.go
@@ -79,28 +79,108 @@ const (
type parser struct {
flags Flags // parse mode flags
stack []*Regexp // stack of parsed expressions
- numCap int // number of capturing groups seen
+ free *Regexp
+ numCap int // number of capturing groups seen
wholeRegexp string
+ tmpClass []int // temporary char class work space
+}
+
+func (p *parser) newRegexp(op Op) *Regexp {
+ re := p.free
+ if re != nil {
+ p.free = re.Sub0[0]
+ *re = Regexp{}
+ } else {
+ re = new(Regexp)
+ }
+ re.Op = op
+ return re
+}
+
+func (p *parser) reuse(re *Regexp) {
+ re.Sub0[0] = p.free
+ p.free = re
}
// Parse stack manipulation.
// push pushes the regexp re onto the parse stack and returns the regexp.
func (p *parser) push(re *Regexp) *Regexp {
- // TODO: automatic concatenation
- // TODO: turn character class into literal
- // TODO: compute simple
+ if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
+ // Single rune.
+ if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
+ return nil
+ }
+ re.Op = OpLiteral
+ re.Rune = re.Rune[:1]
+ re.Flags = p.flags &^ FoldCase
+ } else if re.Op == OpCharClass && len(re.Rune) == 4 &&
+ re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] &&
+ unicode.SimpleFold(re.Rune[0]) == re.Rune[2] &&
+ unicode.SimpleFold(re.Rune[2]) == re.Rune[0] ||
+ re.Op == OpCharClass && len(re.Rune) == 2 &&
+ re.Rune[0]+1 == re.Rune[1] &&
+ unicode.SimpleFold(re.Rune[0]) == re.Rune[1] &&
+ unicode.SimpleFold(re.Rune[1]) == re.Rune[0] {
+ // Case-insensitive rune like [Aa] or [Δδ].
+ if p.maybeConcat(re.Rune[0], p.flags|FoldCase) {
+ return nil
+ }
+
+ // Rewrite as (case-insensitive) literal.
+ re.Op = OpLiteral
+ re.Rune = re.Rune[:1]
+ re.Flags = p.flags | FoldCase
+ } else {
+ // Incremental concatenation.
+ p.maybeConcat(-1, 0)
+ }
p.stack = append(p.stack, re)
return re
}
-// newLiteral returns a new OpLiteral Regexp with the given flags
-func newLiteral(r int, flags Flags) *Regexp {
- re := &Regexp{
- Op: OpLiteral,
- Flags: flags,
+// maybeConcat implements incremental concatenation
+// of literal runes into string nodes. The parser calls this
+// before each push, so only the top fragment of the stack
+// might need processing. Since this is called before a push,
+// the topmost literal is no longer subject to operators like *
+// (Otherwise ab* would turn into (ab)*.)
+// If r >= 0 and there's a node left over, maybeConcat uses it
+// to push r with the given flags.
+// maybeConcat reports whether r was pushed.
+func (p *parser) maybeConcat(r int, flags Flags) bool {
+ n := len(p.stack)
+ if n < 2 {
+ return false
}
+
+ re1 := p.stack[n-1]
+ re2 := p.stack[n-2]
+ if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase {
+ return false
+ }
+
+ // Push re1 into re2.
+ re2.Rune = append(re2.Rune, re1.Rune...)
+
+ // Reuse re1 if possible.
+ if r >= 0 {
+ re1.Rune = re1.Rune0[:1]
+ re1.Rune[0] = r
+ re1.Flags = flags
+ return true
+ }
+
+ p.stack = p.stack[:n-1]
+ p.reuse(re1)
+ return false // did not push r
+}
+
+// newLiteral returns a new OpLiteral Regexp with the given flags
+func (p *parser) newLiteral(r int, flags Flags) *Regexp {
+ re := p.newRegexp(OpLiteral)
+ re.Flags = flags
re.Rune0[0] = r
re.Rune = re.Rune0[:1]
return re
@@ -108,14 +188,16 @@ func newLiteral(r int, flags Flags) *Regexp {
// literal pushes a literal regexp for the rune r on the stack
// and returns that regexp.
-func (p *parser) literal(r int) *Regexp {
- return p.push(newLiteral(r, p.flags))
+func (p *parser) literal(r int) {
+ p.push(p.newLiteral(r, p.flags))
}
// op pushes a regexp with the given op onto the stack
// and returns that regexp.
func (p *parser) op(op Op) *Regexp {
- return p.push(&Regexp{Op: op, Flags: p.flags})
+ re := p.newRegexp(op)
+ re.Flags = p.flags
+ return p.push(re)
}
// repeat replaces the top stack element with itself repeated
@@ -139,12 +221,10 @@ func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (strin
return "", &Error{ErrMissingRepeatArgument, opstr}
}
sub := p.stack[n-1]
- re := &Regexp{
- Op: op,
- Min: min,
- Max: max,
- Flags: flags,
- }
+ re := p.newRegexp(op)
+ re.Min = min
+ re.Max = max
+ re.Flags = flags
re.Sub = re.Sub0[:1]
re.Sub[0] = sub
p.stack[n-1] = re
@@ -153,60 +233,385 @@ func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (strin
// concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.
func (p *parser) concat() *Regexp {
- // TODO: Flatten concats.
+ p.maybeConcat(-1, 0)
// Scan down to find pseudo-operator | or (.
i := len(p.stack)
for i > 0 && p.stack[i-1].Op < opPseudo {
i--
}
- sub := p.stack[i:]
+ subs := p.stack[i:]
p.stack = p.stack[:i]
- var re *Regexp
- switch len(sub) {
- case 0:
- re = &Regexp{Op: OpEmptyMatch}
- case 1:
- re = sub[0]
- default:
- re = &Regexp{Op: OpConcat}
- re.Sub = append(re.Sub0[:0], sub...)
+ // Empty concatenation is special case.
+ if len(subs) == 0 {
+ return p.push(p.newRegexp(OpEmptyMatch))
}
- return p.push(re)
+
+ return p.push(p.collapse(subs, OpConcat))
}
// alternate replaces the top of the stack (above the topmost '(') with its alternation.
func (p *parser) alternate() *Regexp {
- // TODO: Flatten alternates.
-
// Scan down to find pseudo-operator (.
// There are no | above (.
i := len(p.stack)
for i > 0 && p.stack[i-1].Op < opPseudo {
i--
}
- sub := p.stack[i:]
+ subs := p.stack[i:]
p.stack = p.stack[:i]
- var re *Regexp
- switch len(sub) {
- case 0:
- re = &Regexp{Op: OpNoMatch}
- case 1:
- re = sub[0]
- default:
- re = &Regexp{Op: OpAlternate}
- re.Sub = append(re.Sub0[:0], sub...)
+ // Make sure top class is clean.
+ // All the others already are (see swapVerticalBar).
+ if len(subs) > 0 {
+ cleanAlt(subs[len(subs)-1])
}
- return p.push(re)
+
+ // Empty alternate is special case
+ // (shouldn't happen but easy to handle).
+ if len(subs) == 0 {
+ return p.push(p.newRegexp(OpNoMatch))
+ }
+
+ return p.push(p.collapse(subs, OpAlternate))
}
-func literalRegexp(s string, flags Flags) *Regexp {
- re := &Regexp{
- Op: OpLiteral,
- Flags: flags,
+// cleanAlt cleans re for eventual inclusion in an alternation.
+func cleanAlt(re *Regexp) {
+ switch re.Op {
+ case OpCharClass:
+ re.Rune = cleanClass(&re.Rune)
+ if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune {
+ re.Rune = nil
+ re.Op = OpAnyChar
+ return
+ }
+ if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune {
+ re.Rune = nil
+ re.Op = OpAnyCharNotNL
+ return
+ }
+ if cap(re.Rune)-len(re.Rune) > 100 {
+ // re.Rune will not grow any more.
+ // Make a copy or inline to reclaim storage.
+ re.Rune = append(re.Rune0[:0], re.Rune...)
+ }
+ }
+}
+
+// collapse returns the result of applying op to sub.
+// If sub contains op nodes, they all get hoisted up
+// so that there is never a concat of a concat or an
+// alternate of an alternate.
+func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
+ if len(subs) == 1 {
+ return subs[0]
+ }
+ re := p.newRegexp(op)
+ re.Sub = re.Sub0[:0]
+ for _, sub := range subs {
+ if sub.Op == op {
+ re.Sub = append(re.Sub, sub.Sub...)
+ p.reuse(sub)
+ } else {
+ re.Sub = append(re.Sub, sub)
+ }
+ }
+ if op == OpAlternate {
+ re.Sub = p.factor(re.Sub, re.Flags)
+ if len(re.Sub) == 1 {
+ old := re
+ re = re.Sub[0]
+ p.reuse(old)
+ }
+ }
+ return re
+}
+
+// factor factors common prefixes from the alternation list sub.
+// It returns a replacement list that reuses the same storage and
+// frees (passes to p.reuse) any removed *Regexps.
+//
+// For example,
+// ABC|ABD|AEF|BCX|BCY
+// simplifies by literal prefix extraction to
+// A(B(C|D)|EF)|BC(X|Y)
+// which simplifies by character class introduction to
+// A(B[CD]|EF)|BC[XY]
+//
+func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp {
+ if len(sub) < 2 {
+ return sub
+ }
+
+ // Round 1: Factor out common literal prefixes.
+ var str []int
+ var strflags Flags
+ start := 0
+ out := sub[:0]
+ for i := 0; i <= len(sub); i++ {
+ // Invariant: the Regexps that were in sub[0:start] have been
+ // used or marked for reuse, and the slice space has been reused
+ // for out (len(out) <= start).
+ //
+ // Invariant: sub[start:i] consists of regexps that all begin
+ // with str as modified by strflags.
+ var istr []int
+ var iflags Flags
+ if i < len(sub) {
+ istr, iflags = p.leadingString(sub[i])
+ if iflags == strflags {
+ same := 0
+ for same < len(str) && same < len(istr) && str[same] == istr[same] {
+ same++
+ }
+ if same > 0 {
+ // Matches at least one rune in current range.
+ // Keep going around.
+ str = str[:same]
+ continue
+ }
+ }
+ }
+
+ // Found end of a run with common leading literal string:
+ // sub[start:i] all begin with str[0:len(str)], but sub[i]
+ // does not even begin with str[0].
+ //
+ // Factor out common string and append factored expression to out.
+ if i == start {
+ // Nothing to do - run of length 0.
+ } else if i == start+1 {
+ // Just one: don't bother factoring.
+ out = append(out, sub[start])
+ } else {
+ // Construct factored form: prefix(suffix1|suffix2|...)
+ prefix := p.newRegexp(OpLiteral)
+ prefix.Flags = strflags
+ prefix.Rune = append(prefix.Rune[:0], str...)
+
+ for j := start; j < i; j++ {
+ sub[j] = p.removeLeadingString(sub[j], len(str))
+ }
+ suffix := p.collapse(sub[start:i], OpAlternate) // recurse
+
+ re := p.newRegexp(OpConcat)
+ re.Sub = append(re.Sub[:0], prefix, suffix)
+ out = append(out, re)
+ }
+
+ // Prepare for next iteration.
+ start = i
+ str = istr
+ strflags = iflags
+ }
+ sub = out
+
+ // Round 2: Factor out common complex prefixes,
+ // just the first piece of each concatenation,
+ // whatever it is. This is good enough a lot of the time.
+ start = 0
+ out = sub[:0]
+ var first *Regexp
+ for i := 0; i <= len(sub); i++ {
+ // Invariant: the Regexps that were in sub[0:start] have been
+ // used or marked for reuse, and the slice space has been reused
+ // for out (len(out) <= start).
+ //
+ // Invariant: sub[start:i] consists of regexps that all begin
+ // with str as modified by strflags.
+ var ifirst *Regexp
+ if i < len(sub) {
+ ifirst = p.leadingRegexp(sub[i])
+ if first != nil && first.Equal(ifirst) {
+ continue
+ }
+ }
+
+ // Found end of a run with common leading regexp:
+ // sub[start:i] all begin with first but sub[i] does not.
+ //
+ // Factor out common regexp and append factored expression to out.
+ if i == start {
+ // Nothing to do - run of length 0.
+ } else if i == start+1 {
+ // Just one: don't bother factoring.
+ out = append(out, sub[start])
+ } else {
+ // Construct factored form: prefix(suffix1|suffix2|...)
+ prefix := first
+
+ for j := start; j < i; j++ {
+ reuse := j != start // prefix came from sub[start]
+ sub[j] = p.removeLeadingRegexp(sub[j], reuse)
+ }
+ suffix := p.collapse(sub[start:i], OpAlternate) // recurse
+
+ re := p.newRegexp(OpConcat)
+ re.Sub = append(re.Sub[:0], prefix, suffix)
+ out = append(out, re)
+ }
+
+ // Prepare for next iteration.
+ start = i
+ first = ifirst
+ }
+ sub = out
+
+ // Round 3: Collapse runs of single literals into character classes.
+ start = 0
+ out = sub[:0]
+ for i := 0; i <= len(sub); i++ {
+ // Invariant: the Regexps that were in sub[0:start] have been
+ // used or marked for reuse, and the slice space has been reused
+ // for out (len(out) <= start).
+ //
+ // Invariant: sub[start:i] consists of regexps that are either
+ // literal runes or character classes.
+ if i < len(sub) && isCharClass(sub[i]) {
+ continue
+ }
+
+ // sub[i] is not a char or char class;
+ // emit char class for sub[start:i]...
+ if i == start {
+ // Nothing to do - run of length 0.
+ } else if i == start+1 {
+ out = append(out, sub[start])
+ } else {
+ // Make new char class.
+ // Start with most complex regexp in sub[start].
+ max := start
+ for j := start + 1; j < i; j++ {
+ if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) {
+ max = j
+ }
+ }
+ sub[start], sub[max] = sub[max], sub[start]
+
+ for j := start + 1; j < i; j++ {
+ mergeCharClass(sub[start], sub[j])
+ p.reuse(sub[j])
+ }
+ cleanAlt(sub[start])
+ out = append(out, sub[start])
+ }
+
+ // ... and then emit sub[i].
+ if i < len(sub) {
+ out = append(out, sub[i])
+ }
+ start = i + 1
+ }
+ sub = out
+
+ // Round 4: Collapse runs of empty matches into a single empty match.
+ start = 0
+ out = sub[:0]
+ for i := range sub {
+ if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch {
+ continue
+ }
+ out = append(out, sub[i])
+ }
+ sub = out
+
+ return sub
+}
+
+// leadingString returns the leading literal string that re begins with.
+// The string refers to storage in re or its children.
+func (p *parser) leadingString(re *Regexp) ([]int, Flags) {
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ re = re.Sub[0]
+ }
+ if re.Op != OpLiteral {
+ return nil, 0
+ }
+ return re.Rune, re.Flags & FoldCase
+}
+
+// removeLeadingString removes the first n leading runes
+// from the beginning of re. It returns the replacement for re.
+func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp {
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ // Removing a leading string in a concatenation
+ // might simplify the concatenation.
+ sub := re.Sub[0]
+ sub = p.removeLeadingString(sub, n)
+ re.Sub[0] = sub
+ if sub.Op == OpEmptyMatch {
+ p.reuse(sub)
+ switch len(re.Sub) {
+ case 0, 1:
+ // Impossible but handle.
+ re.Op = OpEmptyMatch
+ re.Sub = nil
+ case 2:
+ old := re
+ re = re.Sub[1]
+ p.reuse(old)
+ default:
+ copy(re.Sub, re.Sub[1:])
+ re.Sub = re.Sub[:len(re.Sub)-1]
+ }
+ }
+ return re
}
+
+ if re.Op == OpLiteral {
+ re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])]
+ if len(re.Rune) == 0 {
+ re.Op = OpEmptyMatch
+ }
+ }
+ return re
+}
+
+// leadingRegexp returns the leading regexp that re begins with.
+// The regexp refers to storage in re or its children.
+func (p *parser) leadingRegexp(re *Regexp) *Regexp {
+ if re.Op == OpEmptyMatch {
+ return nil
+ }
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ sub := re.Sub[0]
+ if sub.Op == OpEmptyMatch {
+ return nil
+ }
+ return sub
+ }
+ return re
+}
+
+// removeLeadingRegexp removes the leading regexp in re.
+// It returns the replacement for re.
+// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse.
+func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp {
+ if re.Op == OpConcat && len(re.Sub) > 0 {
+ if reuse {
+ p.reuse(re.Sub[0])
+ }
+ re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])]
+ switch len(re.Sub) {
+ case 0:
+ re.Op = OpEmptyMatch
+ re.Sub = nil
+ case 1:
+ old := re
+ re = re.Sub[0]
+ p.reuse(old)
+ }
+ return re
+ }
+ re.Op = OpEmptyMatch
+ return re
+}
+
+func literalRegexp(s string, flags Flags) *Regexp {
+ re := &Regexp{Op: OpLiteral}
+ re.Flags = flags
re.Rune = re.Rune0[:0] // use local storage for small strings
for _, c := range s {
if len(re.Rune) >= cap(re.Rune) {
@@ -264,7 +669,6 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
p.op(opLeftParen).Cap = p.numCap
t = t[1:]
case '|':
- p.concat()
if err = p.parseVerticalBar(); err != nil {
return nil, err
}
@@ -360,7 +764,8 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
}
}
- re := &Regexp{Op: OpCharClass, Flags: p.flags}
+ re := p.newRegexp(OpCharClass)
+ re.Flags = p.flags
// Look for Unicode character group like \p{Han}
if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') {
@@ -371,7 +776,6 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
if r != nil {
re.Rune = r
t = rest
- // TODO: Handle FoldCase flag.
p.push(re)
break BigSwitch
}
@@ -381,12 +785,10 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {
if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {
re.Rune = r
t = rest
- // TODO: Handle FoldCase flag.
p.push(re)
break BigSwitch
}
-
- // TODO: Give re back to parser's pool.
+ p.reuse(re)
// Ordinary single-character escape.
if c, t, err = p.parseEscape(t); err != nil {
@@ -592,6 +994,35 @@ func (p *parser) parseInt(s string) (n int, rest string, ok bool) {
return
}
+// can this be represented as a character class?
+// single-rune literal string, char class, ., and .|\n.
+func isCharClass(re *Regexp) bool {
+ return re.Op == OpLiteral && len(re.Rune) == 1 ||
+ re.Op == OpCharClass ||
+ re.Op == OpAnyCharNotNL ||
+ re.Op == OpAnyChar
+}
+
+// does re match r?
+func matchRune(re *Regexp, r int) bool {
+ switch re.Op {
+ case OpLiteral:
+ return len(re.Rune) == 1 && re.Rune[0] == r
+ case OpCharClass:
+ for i := 0; i < len(re.Rune); i += 2 {
+ if re.Rune[i] <= r && r <= re.Rune[i+1] {
+ return true
+ }
+ }
+ return false
+ case OpAnyCharNotNL:
+ return r != '\n'
+ case OpAnyChar:
+ return true
+ }
+ return false
+}
+
// parseVerticalBar handles a | in the input.
func (p *parser) parseVerticalBar() os.Error {
p.concat()
@@ -607,14 +1038,66 @@ func (p *parser) parseVerticalBar() os.Error {
return nil
}
+// mergeCharClass makes dst = dst|src.
+// The caller must ensure that dst.Op >= src.Op,
+// to reduce the amount of copying.
+func mergeCharClass(dst, src *Regexp) {
+ switch dst.Op {
+ case OpAnyChar:
+ // src doesn't add anything.
+ case OpAnyCharNotNL:
+ // src might add \n
+ if matchRune(src, '\n') {
+ dst.Op = OpAnyChar
+ }
+ case OpCharClass:
+ // src is simpler, so either literal or char class
+ if src.Op == OpLiteral {
+ dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0])
+ } else {
+ dst.Rune = appendClass(dst.Rune, src.Rune)
+ }
+ case OpLiteral:
+ // both literal
+ if src.Rune[0] == dst.Rune[0] {
+ break
+ }
+ dst.Op = OpCharClass
+ dst.Rune = append(dst.Rune, dst.Rune[0])
+ dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0])
+ }
+}
+
// If the top of the stack is an element followed by an opVerticalBar
// swapVerticalBar swaps the two and returns true.
// Otherwise it returns false.
func (p *parser) swapVerticalBar() bool {
- if n := len(p.stack); n >= 2 {
+ // If above and below vertical bar are literal or char class,
+ // can merge into a single char class.
+ n := len(p.stack)
+ if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) {
+ re1 := p.stack[n-1]
+ re3 := p.stack[n-3]
+ // Make re3 the more complex of the two.
+ if re1.Op > re3.Op {
+ re1, re3 = re3, re1
+ p.stack[n-3] = re3
+ }
+ mergeCharClass(re3, re1)
+ p.reuse(re1)
+ p.stack = p.stack[:n-1]
+ return true
+ }
+
+ if n >= 2 {
re1 := p.stack[n-1]
re2 := p.stack[n-2]
if re2.Op == opVerticalBar {
+ if n >= 3 {
+ // Now out of reach.
+ // Clean opportunistically.
+ cleanAlt(p.stack[n-3])
+ }
p.stack[n-2] = re1
p.stack[n-1] = re2
return true
@@ -729,6 +1212,7 @@ Switch:
if r > unicode.MaxRune {
break Switch
}
+ nhex++
}
if nhex == 0 {
break Switch
@@ -801,12 +1285,7 @@ func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string
if g.sign == 0 {
return
}
- if g.sign < 0 {
- r = appendNegatedClass(r, g.class)
- } else {
- r = appendClass(r, g.class)
- }
- return r, s[2:]
+ return p.appendGroup(r, g), s[2:]
}
// parseNamedClass parses a leading POSIX named character class like [:alnum:]
@@ -827,23 +1306,40 @@ func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err
if g.sign == 0 {
return nil, "", &Error{ErrInvalidCharRange, name}
}
- if g.sign < 0 {
- r = appendNegatedClass(r, g.class)
+ return p.appendGroup(r, g), s, nil
+}
+
+func (p *parser) appendGroup(r []int, g charGroup) []int {
+ if p.flags&FoldCase == 0 {
+ if g.sign < 0 {
+ r = appendNegatedClass(r, g.class)
+ } else {
+ r = appendClass(r, g.class)
+ }
} else {
- r = appendClass(r, g.class)
+ tmp := p.tmpClass[:0]
+ tmp = appendFoldedClass(tmp, g.class)
+ p.tmpClass = tmp
+ tmp = cleanClass(&p.tmpClass)
+ if g.sign < 0 {
+ r = appendNegatedClass(r, tmp)
+ } else {
+ r = appendClass(r, tmp)
+ }
}
- return r, s, nil
+ return r
}
-// unicodeTable returns the unicode.RangeTable identified by name.
-func unicodeTable(name string) *unicode.RangeTable {
+// unicodeTable returns the unicode.RangeTable identified by name
+// and the table of additional fold-equivalent code points.
+func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {
if t := unicode.Categories[name]; t != nil {
- return t
+ return t, unicode.FoldCategory[name]
}
if t := unicode.Scripts[name]; t != nil {
- return t
+ return t, unicode.FoldScript[name]
}
- return nil
+ return nil, nil
}
// parseUnicodeClass parses a leading Unicode character class like \p{Han}
@@ -891,14 +1387,31 @@ func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, e
name = name[1:]
}
- tab := unicodeTable(name)
+ tab, fold := unicodeTable(name)
if tab == nil {
return nil, "", &Error{ErrInvalidCharRange, seq}
}
- if sign > 0 {
- r = appendTable(r, tab)
+
+ if p.flags&FoldCase == 0 || fold == nil {
+ if sign > 0 {
+ r = appendTable(r, tab)
+ } else {
+ r = appendNegatedTable(r, tab)
+ }
} else {
- r = appendNegatedTable(r, tab)
+ // Merge and clean tab and fold in a temporary buffer.
+ // This is necessary for the negative case and just tidy
+ // for the positive case.
+ tmp := p.tmpClass[:0]
+ tmp = appendTable(tmp, tab)
+ tmp = appendTable(tmp, fold)
+ p.tmpClass = tmp
+ tmp = cleanClass(&p.tmpClass)
+ if sign > 0 {
+ r = appendClass(r, tmp)
+ } else {
+ r = appendNegatedClass(r, tmp)
+ }
}
return r, t, nil
}
@@ -907,7 +1420,8 @@ func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, e
// and pushes it onto the parse stack.
func (p *parser) parseClass(s string) (rest string, err os.Error) {
t := s[1:] // chop [
- re := &Regexp{Op: OpCharClass, Flags: p.flags}
+ re := p.newRegexp(OpCharClass)
+ re.Flags = p.flags
re.Rune = re.Rune0[:0]
sign := +1
@@ -979,12 +1493,14 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {
return "", &Error{Code: ErrInvalidCharRange, Expr: rng}
}
}
- class = appendRange(class, lo, hi)
+ if p.flags&FoldCase == 0 {
+ class = appendRange(class, lo, hi)
+ } else {
+ class = appendFoldedRange(class, lo, hi)
+ }
}
t = t[1:] // chop ]
- // TODO: Handle FoldCase flag.
-
// Use &re.Rune instead of &class to avoid allocation.
re.Rune = class
class = cleanClass(&re.Rune)
@@ -999,10 +1515,15 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {
// cleanClass sorts the ranges (pairs of elements of r),
// merges them, and eliminates duplicates.
func cleanClass(rp *[]int) []int {
+
// Sort by lo increasing, hi decreasing to break ties.
sort.Sort(ranges{rp})
r := *rp
+ if len(r) < 2 {
+ return r
+ }
+
// Merge abutting, overlapping.
w := 2 // write index
for i := 2; i < len(r); i += 2 {
@@ -1025,23 +1546,71 @@ func cleanClass(rp *[]int) []int {
// appendRange returns the result of appending the range lo-hi to the class r.
func appendRange(r []int, lo, hi int) []int {
- // Expand last range if overlaps or abuts.
- if n := len(r); n > 0 {
- rlo, rhi := r[n-2], r[n-1]
- if lo <= rhi+1 && rlo <= hi+1 {
- if lo < rlo {
- r[n-2] = lo
- }
- if hi > rhi {
- r[n-1] = hi
+ // Expand last range or next to last range if it overlaps or abuts.
+ // Checking two ranges helps when appending case-folded
+ // alphabets, so that one range can be expanding A-Z and the
+ // other expanding a-z.
+ n := len(r)
+ for i := 2; i <= 4; i += 2 { // twice, using i=2, i=4
+ if n >= i {
+ rlo, rhi := r[n-i], r[n-i+1]
+ if lo <= rhi+1 && rlo <= hi+1 {
+ if lo < rlo {
+ r[n-i] = lo
+ }
+ if hi > rhi {
+ r[n-i+1] = hi
+ }
+ return r
}
- return r
}
}
return append(r, lo, hi)
}
+const (
+ // minimum and maximum runes involved in folding.
+ // checked during test.
+ minFold = 0x0041
+ maxFold = 0x1044f
+)
+
+// appendFoldedRange returns the result of appending the range lo-hi
+// and its case folding-equivalent runes to the class r.
+func appendFoldedRange(r []int, lo, hi int) []int {
+ // Optimizations.
+ if lo <= minFold && hi >= maxFold {
+ // Range is full: folding can't add more.
+ return appendRange(r, lo, hi)
+ }
+ if hi < minFold || lo > maxFold {
+ // Range is outside folding possibilities.
+ return appendRange(r, lo, hi)
+ }
+ if lo < minFold {
+ // [lo, minFold-1] needs no folding.
+ r = appendRange(r, lo, minFold-1)
+ lo = minFold
+ }
+ if hi > maxFold {
+ // [maxFold+1, hi] needs no folding.
+ r = appendRange(r, maxFold+1, hi)
+ hi = maxFold
+ }
+
+ // Brute force. Depend on appendRange to coalesce ranges on the fly.
+ for c := lo; c <= hi; c++ {
+ r = appendRange(r, c, c)
+ f := unicode.SimpleFold(c)
+ for f != c {
+ r = appendRange(r, f, f)
+ f = unicode.SimpleFold(f)
+ }
+ }
+ return r
+}
+
// appendClass returns the result of appending the class x to the class r.
// It assume x is clean.
func appendClass(r []int, x []int) []int {
@@ -1051,6 +1620,14 @@ func appendClass(r []int, x []int) []int {
return r
}
+// appendFolded returns the result of appending the case folding of the class x to the class r.
+func appendFoldedClass(r []int, x []int) []int {
+ for i := 0; i < len(x); i += 2 {
+ r = appendFoldedRange(r, x[i], x[i+1])
+ }
+ return r
+}
+
// appendNegatedClass returns the result of appending the negation of the class x to the class r.
// It assumes x is clean.
func appendNegatedClass(r []int, x []int) []int {
@@ -1148,10 +1725,11 @@ func negateClass(r []int) []int {
}
nextLo = hi + 1
}
+ r = r[:w]
if nextLo <= unicode.MaxRune {
// It's possible for the negation to have one more
// range - this one - than the original class, so use append.
- r = append(r[:w], nextLo, unicode.MaxRune)
+ r = append(r, nextLo, unicode.MaxRune)
}
return r
}
diff --git a/src/pkg/exp/regexp/syntax/parse_test.go b/src/pkg/exp/regexp/syntax/parse_test.go
index b52cab1a1..779b9afde 100644
--- a/src/pkg/exp/regexp/syntax/parse_test.go
+++ b/src/pkg/exp/regexp/syntax/parse_test.go
@@ -16,141 +16,152 @@ var parseTests = []struct {
Dump string
}{
// Base cases
- {"a", "lit{a}"},
- {"a.", "cat{lit{a}dot{}}"},
- {"a.b", "cat{lit{a}dot{}lit{b}}"},
- // { "ab", "str{ab}" },
- {"ab", "cat{lit{a}lit{b}}"},
- {"a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}"},
- // { "abc", "str{abc}" },
- {"abc", "cat{lit{a}lit{b}lit{c}}"},
- {"a|^", "alt{lit{a}bol{}}"},
- // { "a|b", "cc{0x61-0x62}" },
- {"a|b", "alt{lit{a}lit{b}}"},
- {"(a)", "cap{lit{a}}"},
- {"(a)|b", "alt{cap{lit{a}}lit{b}}"},
- {"a*", "star{lit{a}}"},
- {"a+", "plus{lit{a}}"},
- {"a?", "que{lit{a}}"},
- {"a{2}", "rep{2,2 lit{a}}"},
- {"a{2,3}", "rep{2,3 lit{a}}"},
- {"a{2,}", "rep{2,-1 lit{a}}"},
- {"a*?", "nstar{lit{a}}"},
- {"a+?", "nplus{lit{a}}"},
- {"a??", "nque{lit{a}}"},
- {"a{2}?", "nrep{2,2 lit{a}}"},
- {"a{2,3}?", "nrep{2,3 lit{a}}"},
- {"a{2,}?", "nrep{2,-1 lit{a}}"},
- {"", "emp{}"},
- // { "|", "emp{}" }, // alt{emp{}emp{}} but got factored
- {"|", "alt{emp{}emp{}}"},
- {"|x|", "alt{emp{}lit{x}emp{}}"},
- {".", "dot{}"},
- {"^", "bol{}"},
- {"$", "eol{}"},
- {"\\|", "lit{|}"},
- {"\\(", "lit{(}"},
- {"\\)", "lit{)}"},
- {"\\*", "lit{*}"},
- {"\\+", "lit{+}"},
- {"\\?", "lit{?}"},
- {"{", "lit{{}"},
- {"}", "lit{}}"},
- {"\\.", "lit{.}"},
- {"\\^", "lit{^}"},
- {"\\$", "lit{$}"},
- {"\\\\", "lit{\\}"},
- {"[ace]", "cc{0x61 0x63 0x65}"},
- {"[abc]", "cc{0x61-0x63}"},
- {"[a-z]", "cc{0x61-0x7a}"},
- // { "[a]", "lit{a}" },
- {"[a]", "cc{0x61}"},
- {"\\-", "lit{-}"},
- {"-", "lit{-}"},
- {"\\_", "lit{_}"},
+ {`a`, `lit{a}`},
+ {`a.`, `cat{lit{a}dot{}}`},
+ {`a.b`, `cat{lit{a}dot{}lit{b}}`},
+ {`ab`, `str{ab}`},
+ {`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`},
+ {`abc`, `str{abc}`},
+ {`a|^`, `alt{lit{a}bol{}}`},
+ {`a|b`, `cc{0x61-0x62}`},
+ {`(a)`, `cap{lit{a}}`},
+ {`(a)|b`, `alt{cap{lit{a}}lit{b}}`},
+ {`a*`, `star{lit{a}}`},
+ {`a+`, `plus{lit{a}}`},
+ {`a?`, `que{lit{a}}`},
+ {`a{2}`, `rep{2,2 lit{a}}`},
+ {`a{2,3}`, `rep{2,3 lit{a}}`},
+ {`a{2,}`, `rep{2,-1 lit{a}}`},
+ {`a*?`, `nstar{lit{a}}`},
+ {`a+?`, `nplus{lit{a}}`},
+ {`a??`, `nque{lit{a}}`},
+ {`a{2}?`, `nrep{2,2 lit{a}}`},
+ {`a{2,3}?`, `nrep{2,3 lit{a}}`},
+ {`a{2,}?`, `nrep{2,-1 lit{a}}`},
+ {``, `emp{}`},
+ {`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
+ {`|x|`, `alt{emp{}lit{x}emp{}}`},
+ {`.`, `dot{}`},
+ {`^`, `bol{}`},
+ {`$`, `eol{}`},
+ {`\|`, `lit{|}`},
+ {`\(`, `lit{(}`},
+ {`\)`, `lit{)}`},
+ {`\*`, `lit{*}`},
+ {`\+`, `lit{+}`},
+ {`\?`, `lit{?}`},
+ {`{`, `lit{{}`},
+ {`}`, `lit{}}`},
+ {`\.`, `lit{.}`},
+ {`\^`, `lit{^}`},
+ {`\$`, `lit{$}`},
+ {`\\`, `lit{\}`},
+ {`[ace]`, `cc{0x61 0x63 0x65}`},
+ {`[abc]`, `cc{0x61-0x63}`},
+ {`[a-z]`, `cc{0x61-0x7a}`},
+ {`[a]`, `lit{a}`},
+ {`\-`, `lit{-}`},
+ {`-`, `lit{-}`},
+ {`\_`, `lit{_}`},
+ {`abc`, `str{abc}`},
+ {`abc|def`, `alt{str{abc}str{def}}`},
+ {`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},
// Posix and Perl extensions
- {"[[:lower:]]", "cc{0x61-0x7a}"},
- {"[a-z]", "cc{0x61-0x7a}"},
- {"[^[:lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"},
- {"[[:^lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"},
- // { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
- // { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
- // { "(?i)[^[:lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
- // { "(?i)[[:^lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
- {"\\d", "cc{0x30-0x39}"},
- {"\\D", "cc{0x0-0x2f 0x3a-0x10ffff}"},
- {"\\s", "cc{0x9-0xa 0xc-0xd 0x20}"},
- {"\\S", "cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}"},
- {"\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}"},
- {"\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}"},
- // { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" },
- // { "(?i)\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
- {"[^\\\\]", "cc{0x0-0x5b 0x5d-0x10ffff}"},
- // { "\\C", "byte{}" },
+ {`[[:lower:]]`, `cc{0x61-0x7a}`},
+ {`[a-z]`, `cc{0x61-0x7a}`},
+ {`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
+ {`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
+ {`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
+ {`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
+ {`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
+ {`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
+ {`\d`, `cc{0x30-0x39}`},
+ {`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`},
+ {`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`},
+ {`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`},
+ {`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`},
+ {`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`},
+ {`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`},
+ {`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
+ {`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`},
+ // { `\C`, `byte{}` }, // probably never
// Unicode, negatives, and a double negative.
- {"\\p{Braille}", "cc{0x2800-0x28ff}"},
- {"\\P{Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
- {"\\p{^Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
- {"\\P{^Braille}", "cc{0x2800-0x28ff}"},
- {"\\pZ", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}"},
- {"[\\p{Braille}]", "cc{0x2800-0x28ff}"},
- {"[\\P{Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
- {"[\\p{^Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"},
- {"[\\P{^Braille}]", "cc{0x2800-0x28ff}"},
- {"[\\pZ]", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}"},
+ {`\p{Braille}`, `cc{0x2800-0x28ff}`},
+ {`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`\P{^Braille}`, `cc{0x2800-0x28ff}`},
+ {`\pZ`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
+ {`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
+ {`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
+ {`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
+ {`[\pZ]`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
+ {`\p{Lu}`, mkCharClass(unicode.IsUpper)},
+ {`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
+ {`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
+
+ // Hex, octal.
+ {`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
+ {`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},
// More interesting regular expressions.
- // { "a{,2}", "str{a{,2}}" },
- // { "\\.\\^\\$\\\\", "str{.^$\\}" },
- {"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"},
- {"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"},
- {"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8
- {"a*{", "cat{star{lit{a}}lit{{}}"},
+ {`a{,2}`, `str{a{,2}}`},
+ {`\.\^\$\\`, `str{.^$\}`},
+ {`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`},
+ {`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
+ {`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8
+ {`a*{`, `cat{star{lit{a}}lit{{}}`},
// Test precedences
- // { "(?:ab)*", "star{str{ab}}" },
- // { "(ab)*", "star{cap{str{ab}}}" },
- // { "ab|cd", "alt{str{ab}str{cd}}" },
- // { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
- {"(?:ab)*", "star{cat{lit{a}lit{b}}}"},
- {"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"},
- {"ab|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"},
- {"a(b|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"},
+ {`(?:ab)*`, `star{str{ab}}`},
+ {`(ab)*`, `star{cap{str{ab}}}`},
+ {`ab|cd`, `alt{str{ab}str{cd}}`},
+ {`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},
// Test flattening.
- {"(?:a)", "lit{a}"},
- // { "(?:ab)(?:cd)", "str{abcd}" },
- // { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
- // { "a|.", "dot{}" },
- // { ".|a", "dot{}" },
+ {`(?:a)`, `lit{a}`},
+ {`(?:ab)(?:cd)`, `str{abcd}`},
+ {`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
+ {`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
+ {`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`},
+ {`a|.`, `dot{}`},
+ {`.|a`, `dot{}`},
+ {`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`},
+ {`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},
// Test Perl quoted literals
- {"\\Q+|*?{[\\E", "str{+|*?{[}"},
- {"\\Q+\\E+", "plus{lit{+}}"},
- {"\\Q\\\\E", "lit{\\}"},
- {"\\Q\\\\\\E", "str{\\\\}"},
+ {`\Q+|*?{[\E`, `str{+|*?{[}`},
+ {`\Q+\E+`, `plus{lit{+}}`},
+ {`\Q\\E`, `lit{\}`},
+ {`\Q\\\E`, `str{\\}`},
// Test Perl \A and \z
- {"(?m)^", "bol{}"},
- {"(?m)$", "eol{}"},
- {"(?-m)^", "bot{}"},
- {"(?-m)$", "eot{}"},
- {"(?m)\\A", "bot{}"},
- {"(?m)\\z", "eot{\\z}"},
- {"(?-m)\\A", "bot{}"},
- {"(?-m)\\z", "eot{\\z}"},
+ {`(?m)^`, `bol{}`},
+ {`(?m)$`, `eol{}`},
+ {`(?-m)^`, `bot{}`},
+ {`(?-m)$`, `eot{}`},
+ {`(?m)\A`, `bot{}`},
+ {`(?m)\z`, `eot{\z}`},
+ {`(?-m)\A`, `bot{}`},
+ {`(?-m)\z`, `eot{\z}`},
// Test named captures
- {"(?P<name>a)", "cap{name:lit{a}}"},
+ {`(?P<name>a)`, `cap{name:lit{a}}`},
// Case-folded literals
- // { "[Aa]", "litfold{a}" },
+ {`[Aa]`, `litfold{A}`},
+ {`[\x{100}\x{101}]`, `litfold{Ā}`},
+ {`[Δδ]`, `litfold{Δ}`},
// Strings
- // { "abcde", "str{abcde}" },
- // { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
+ {`abcde`, `str{abcde}`},
+ {`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`},
+
+ // Factoring.
+ {`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`},
+ {`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`},
}
const testFlags = MatchNL | PerlX | UnicodeGroups
@@ -223,8 +234,9 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) {
}
if re.Flags&FoldCase != 0 {
for _, r := range re.Rune {
- if unicode.ToUpper(r) != r {
+ if unicode.SimpleFold(r) != r {
b.WriteString("fold")
+ break
}
}
}
@@ -270,3 +282,69 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) {
}
b.WriteByte('}')
}
+
+func mkCharClass(f func(int) bool) string {
+ re := &Regexp{Op: OpCharClass}
+ lo := -1
+ for i := 0; i <= unicode.MaxRune; i++ {
+ if f(i) {
+ if lo < 0 {
+ lo = i
+ }
+ } else {
+ if lo >= 0 {
+ re.Rune = append(re.Rune, lo, i-1)
+ lo = -1
+ }
+ }
+ }
+ if lo >= 0 {
+ re.Rune = append(re.Rune, lo, unicode.MaxRune)
+ }
+ return dump(re)
+}
+
+func isUpperFold(rune int) bool {
+ if unicode.IsUpper(rune) {
+ return true
+ }
+ c := unicode.SimpleFold(rune)
+ for c != rune {
+ if unicode.IsUpper(c) {
+ return true
+ }
+ c = unicode.SimpleFold(c)
+ }
+ return false
+}
+
+func TestFoldConstants(t *testing.T) {
+ last := -1
+ for i := 0; i <= unicode.MaxRune; i++ {
+ if unicode.SimpleFold(i) == i {
+ continue
+ }
+ if last == -1 && minFold != i {
+ t.Errorf("minFold=%#U should be %#U", minFold, i)
+ }
+ last = i
+ }
+ if maxFold != last {
+ t.Errorf("maxFold=%#U should be %#U", maxFold, last)
+ }
+}
+
+func TestAppendRangeCollapse(t *testing.T) {
+ // AppendRange should collapse each of the new ranges
+ // into the earlier ones (it looks back two ranges), so that
+ // the slice never grows very large.
+ // Note that we are not calling cleanClass.
+ var r []int
+ for i := 'A'; i <= 'Z'; i++ {
+ r = appendRange(r, i, i)
+ r = appendRange(r, i+'a'-'A', i+'a'-'A')
+ }
+ if string(r) != "AZaz" {
+ t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
+ }
+}
diff --git a/src/pkg/exp/regexp/syntax/prog.go b/src/pkg/exp/regexp/syntax/prog.go
new file mode 100644
index 000000000..6eeb3da0c
--- /dev/null
+++ b/src/pkg/exp/regexp/syntax/prog.go
@@ -0,0 +1,182 @@
+package syntax
+
+import (
+ "bytes"
+ "strconv"
+)
+
+// Compiled program.
+// May not belong in this package, but convenient for now.
+
+// A Prog is a compiled regular expression program.
+type Prog struct {
+ Inst []Inst
+ Start int // index of start instruction
+}
+
+// An InstOp is an instruction opcode.
+type InstOp uint8
+
+const (
+ InstAlt InstOp = iota
+ InstAltMatch
+ InstCapture
+ InstEmptyWidth
+ InstMatch
+ InstFail
+ InstNop
+ InstRune
+)
+
+// An EmptyOp specifies a kind or mixture of zero-width assertions.
+type EmptyOp uint8
+
+const (
+ EmptyBeginLine EmptyOp = 1 << iota
+ EmptyEndLine
+ EmptyBeginText
+ EmptyEndText
+ EmptyWordBoundary
+ EmptyNoWordBoundary
+)
+
+// An Inst is a single instruction in a regular expression program.
+type Inst struct {
+ Op InstOp
+ Out uint32 // all but InstMatch, InstFail
+ Arg uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth
+ Rune []int
+}
+
+func (p *Prog) String() string {
+ var b bytes.Buffer
+ dumpProg(&b, p)
+ return b.String()
+}
+
+// MatchRune returns true if the instruction matches (and consumes) r.
+// It should only be called when i.Op == InstRune.
+func (i *Inst) MatchRune(r int) bool {
+ rune := i.Rune
+
+ // Special case: single-rune slice is from literal string, not char class.
+ // TODO: Case folding.
+ if len(rune) == 1 {
+ return r == rune[0]
+ }
+
+ // Peek at the first few pairs.
+ // Should handle ASCII well.
+ for j := 0; j < len(rune) && j <= 8; j += 2 {
+ if r < rune[j] {
+ return false
+ }
+ if r <= rune[j+1] {
+ return true
+ }
+ }
+
+ // Otherwise binary search.
+ lo := 0
+ hi := len(rune) / 2
+ for lo < hi {
+ m := lo + (hi-lo)/2
+ if c := rune[2*m]; c <= r {
+ if r <= rune[2*m+1] {
+ return true
+ }
+ lo = m + 1
+ } else {
+ hi = m
+ }
+ }
+ return false
+}
+
+// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char.
+// Since we act on runes, it would be easy to support Unicode here.
+func wordRune(rune int) bool {
+ return rune == '_' ||
+ ('A' <= rune && rune <= 'Z') ||
+ ('a' <= rune && rune <= 'z') ||
+ ('0' <= rune && rune <= '9')
+}
+
+// MatchEmptyWidth returns true if the instruction matches
+// an empty string between the runes before and after.
+// It should only be called when i.Op == InstEmptyWidth.
+func (i *Inst) MatchEmptyWidth(before int, after int) bool {
+ switch EmptyOp(i.Arg) {
+ case EmptyBeginLine:
+ return before == '\n' || before == -1
+ case EmptyEndLine:
+ return after == '\n' || after == -1
+ case EmptyBeginText:
+ return before == -1
+ case EmptyEndText:
+ return after == -1
+ case EmptyWordBoundary:
+ return wordRune(before) != wordRune(after)
+ case EmptyNoWordBoundary:
+ return wordRune(before) == wordRune(after)
+ }
+ panic("unknown empty width arg")
+}
+
+
+func (i *Inst) String() string {
+ var b bytes.Buffer
+ dumpInst(&b, i)
+ return b.String()
+}
+
+func bw(b *bytes.Buffer, args ...string) {
+ for _, s := range args {
+ b.WriteString(s)
+ }
+}
+
+func dumpProg(b *bytes.Buffer, p *Prog) {
+ for j := range p.Inst {
+ i := &p.Inst[j]
+ pc := strconv.Itoa(j)
+ if len(pc) < 3 {
+ b.WriteString(" "[len(pc):])
+ }
+ if j == p.Start {
+ pc += "*"
+ }
+ bw(b, pc, "\t")
+ dumpInst(b, i)
+ bw(b, "\n")
+ }
+}
+
+func u32(i uint32) string {
+ return strconv.Uitoa64(uint64(i))
+}
+
+func dumpInst(b *bytes.Buffer, i *Inst) {
+ switch i.Op {
+ case InstAlt:
+ bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg))
+ case InstAltMatch:
+ bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg))
+ case InstCapture:
+ bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out))
+ case InstEmptyWidth:
+ bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out))
+ case InstMatch:
+ bw(b, "match")
+ case InstFail:
+ bw(b, "fail")
+ case InstNop:
+ bw(b, "nop -> ", u32(i.Out))
+ case InstRune:
+ if i.Rune == nil {
+ // shouldn't happen
+ bw(b, "rune <nil>")
+ }
+ bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out))
+ }
+}
diff --git a/src/pkg/exp/regexp/syntax/prog_test.go b/src/pkg/exp/regexp/syntax/prog_test.go
new file mode 100644
index 000000000..7be4281c2
--- /dev/null
+++ b/src/pkg/exp/regexp/syntax/prog_test.go
@@ -0,0 +1,91 @@
+package syntax
+
+import (
+ "testing"
+)
+
+var compileTests = []struct {
+ Regexp string
+ Prog string
+}{
+ {"a", ` 0 fail
+ 1* rune "a" -> 2
+ 2 match
+`},
+ {"[A-M][n-z]", ` 0 fail
+ 1* rune "AM" -> 2
+ 2 rune "nz" -> 3
+ 3 match
+`},
+ {"", ` 0 fail
+ 1* nop -> 2
+ 2 match
+`},
+ {"a?", ` 0 fail
+ 1 rune "a" -> 3
+ 2* alt -> 1, 3
+ 3 match
+`},
+ {"a??", ` 0 fail
+ 1 rune "a" -> 3
+ 2* alt -> 3, 1
+ 3 match
+`},
+ {"a+", ` 0 fail
+ 1* rune "a" -> 2
+ 2 alt -> 1, 3
+ 3 match
+`},
+ {"a+?", ` 0 fail
+ 1* rune "a" -> 2
+ 2 alt -> 3, 1
+ 3 match
+`},
+ {"a*", ` 0 fail
+ 1 rune "a" -> 2
+ 2* alt -> 1, 3
+ 3 match
+`},
+ {"a*?", ` 0 fail
+ 1 rune "a" -> 2
+ 2* alt -> 3, 1
+ 3 match
+`},
+ {"a+b+", ` 0 fail
+ 1* rune "a" -> 2
+ 2 alt -> 1, 3
+ 3 rune "b" -> 4
+ 4 alt -> 3, 5
+ 5 match
+`},
+ {"(a+)(b+)", ` 0 fail
+ 1* cap 2 -> 2
+ 2 rune "a" -> 3
+ 3 alt -> 2, 4
+ 4 cap 3 -> 5
+ 5 cap 4 -> 6
+ 6 rune "b" -> 7
+ 7 alt -> 6, 8
+ 8 cap 5 -> 9
+ 9 match
+`},
+ {"a+|b+", ` 0 fail
+ 1 rune "a" -> 2
+ 2 alt -> 1, 6
+ 3 rune "b" -> 4
+ 4 alt -> 3, 6
+ 5* alt -> 1, 3
+ 6 match
+`},
+}
+
+func TestCompile(t *testing.T) {
+ for _, tt := range compileTests {
+ re, _ := Parse(tt.Regexp, Perl)
+ p, _ := Compile(re)
+ s := p.String()
+ if s != tt.Prog {
+ t.Errorf("compiled %#q:\n--- have\n%s---\n--- want\n%s---", tt.Regexp, s, tt.Prog)
+ }
+ }
+}
diff --git a/src/pkg/exp/regexp/syntax/regexp.go b/src/pkg/exp/regexp/syntax/regexp.go
index a0c465967..00a4addef 100644
--- a/src/pkg/exp/regexp/syntax/regexp.go
+++ b/src/pkg/exp/regexp/syntax/regexp.go
@@ -33,6 +33,8 @@ type Regexp struct {
type Op uint8
// Operators are listed in precedence order, tightest binding to weakest.
+// Character class operators are listed simplest to most complex
+// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
const (
OpNoMatch Op = 1 + iota // matches no strings
@@ -58,6 +60,59 @@ const (
const opPseudo Op = 128 // where pseudo-ops start
+// Equal returns true if x and y have identical structure.
+func (x *Regexp) Equal(y *Regexp) bool {
+ if x == nil || y == nil {
+ return x == y
+ }
+ if x.Op != y.Op {
+ return false
+ }
+ switch x.Op {
+ case OpEndText:
+ // The parse flags remember whether this is \z or \Z.
+ if x.Flags&WasDollar != y.Flags&WasDollar {
+ return false
+ }
+
+ case OpLiteral, OpCharClass:
+ if len(x.Rune) != len(y.Rune) {
+ return false
+ }
+ for i, r := range x.Rune {
+ if r != y.Rune[i] {
+ return false
+ }
+ }
+
+ case OpAlternate, OpConcat:
+ if len(x.Sub) != len(y.Sub) {
+ return false
+ }
+ for i, sub := range x.Sub {
+ if !sub.Equal(y.Sub[i]) {
+ return false
+ }
+ }
+
+ case OpStar, OpPlus, OpQuest:
+ if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
+ return false
+ }
+
+ case OpRepeat:
+ if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
+ return false
+ }
+
+ case OpCapture:
+ if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
+ return false
+ }
+ }
+ return true
+}
+
// writeRegexp writes the Perl syntax for the regular expression re to b.
func writeRegexp(b *bytes.Buffer, re *Regexp) {
switch re.Op {
@@ -68,16 +123,24 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
case OpEmptyMatch:
b.WriteString(`(?:)`)
case OpLiteral:
+ if re.Flags&FoldCase != 0 {
+ b.WriteString(`(?i:`)
+ }
for _, r := range re.Rune {
escape(b, r, false)
}
+ if re.Flags&FoldCase != 0 {
+ b.WriteString(`)`)
+ }
case OpCharClass:
if len(re.Rune)%2 != 0 {
b.WriteString(`[invalid char class]`)
break
}
b.WriteRune('[')
- if len(re.Rune) > 0 && re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
+ if len(re.Rune) == 0 {
+ b.WriteString(`^\x00-\x{10FFFF}`)
+ } else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {
// Contains 0 and MaxRune. Probably a negated class.
// Print the gaps.
b.WriteRune('^')
@@ -124,7 +187,9 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {
} else {
b.WriteRune('(')
}
- writeRegexp(b, re.Sub[0])
+ if re.Sub[0].Op != OpEmptyMatch {
+ writeRegexp(b, re.Sub[0])
+ }
b.WriteRune(')')
case OpStar, OpPlus, OpQuest, OpRepeat:
if sub := re.Sub[0]; sub.Op > OpCapture {
@@ -203,6 +268,15 @@ func escape(b *bytes.Buffer, r int, force bool) {
case '\v':
b.WriteString(`\v`)
default:
+ if r < 0x100 {
+ b.WriteString(`\x`)
+ s := strconv.Itob(r, 16)
+ if len(s) == 1 {
+ b.WriteRune('0')
+ }
+ b.WriteString(s)
+ break
+ }
b.WriteString(`\x{`)
b.WriteString(strconv.Itob(r, 16))
b.WriteString(`}`)
diff --git a/src/pkg/exp/regexp/syntax/simplify.go b/src/pkg/exp/regexp/syntax/simplify.go
new file mode 100644
index 000000000..72390417b
--- /dev/null
+++ b/src/pkg/exp/regexp/syntax/simplify.go
@@ -0,0 +1,151 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Simplify returns a regexp equivalent to re but without counted repetitions
+// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/.
+// The resulting regexp will execute correctly but its string representation
+// will not produce the same parse tree, because capturing parentheses
+// may have been duplicated or removed. For example, the simplified form
+// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1.
+// The returned regexp may share structure with or be the original.
+func (re *Regexp) Simplify() *Regexp {
+ if re == nil {
+ return nil
+ }
+ switch re.Op {
+ case OpCapture, OpConcat, OpAlternate:
+ // Simplify children, building new Regexp if children change.
+ nre := re
+ for i, sub := range re.Sub {
+ nsub := sub.Simplify()
+ if nre == re && nsub != sub {
+ // Start a copy.
+ nre = new(Regexp)
+ *nre = *re
+ nre.Rune = nil
+ nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...)
+ }
+ if nre != re {
+ nre.Sub = append(nre.Sub, nsub)
+ }
+ }
+ return nre
+
+ case OpStar, OpPlus, OpQuest:
+ sub := re.Sub[0].Simplify()
+ return simplify1(re.Op, re.Flags, sub, re)
+
+ case OpRepeat:
+ // Special special case: x{0} matches the empty string
+ // and doesn't even need to consider x.
+ if re.Min == 0 && re.Max == 0 {
+ return &Regexp{Op: OpEmptyMatch}
+ }
+
+ // The fun begins.
+ sub := re.Sub[0].Simplify()
+
+ // x{n,} means at least n matches of x.
+ if re.Max == -1 {
+ // Special case: x{0,} is x*.
+ if re.Min == 0 {
+ return simplify1(OpStar, re.Flags, sub, nil)
+ }
+
+ // Special case: x{1,} is x+.
+ if re.Min == 1 {
+ return simplify1(OpPlus, re.Flags, sub, nil)
+ }
+
+ // General case: x{4,} is xxxx+.
+ nre := &Regexp{Op: OpConcat}
+ nre.Sub = nre.Sub0[:0]
+ for i := 0; i < re.Min-1; i++ {
+ nre.Sub = append(nre.Sub, sub)
+ }
+ nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil))
+ return nre
+ }
+
+ // Special case x{0} handled above.
+
+ // Special case: x{1} is just x.
+ if re.Min == 1 && re.Max == 1 {
+ return sub
+ }
+
+ // General case: x{n,m} means n copies of x and m copies of x?
+ // The machine will do less work if we nest the final m copies,
+ // so that x{2,5} = xx(x(x(x)?)?)?
+
+ // Build leading prefix: xx.
+ var prefix *Regexp
+ if re.Min > 0 {
+ prefix = &Regexp{Op: OpConcat}
+ prefix.Sub = prefix.Sub0[:0]
+ for i := 0; i < re.Min; i++ {
+ prefix.Sub = append(prefix.Sub, sub)
+ }
+ }
+
+ // Build and attach suffix: (x(x(x)?)?)?
+ if re.Max > re.Min {
+ suffix := simplify1(OpQuest, re.Flags, sub, nil)
+ for i := re.Min + 1; i < re.Max; i++ {
+ nre2 := &Regexp{Op: OpConcat}
+ nre2.Sub = append(nre2.Sub0[:0], sub, suffix)
+ suffix = simplify1(OpQuest, re.Flags, nre2, nil)
+ }
+ if prefix == nil {
+ return suffix
+ }
+ prefix.Sub = append(prefix.Sub, suffix)
+ }
+ if prefix != nil {
+ return prefix
+ }
+
+ // Some degenerate case like min > max or min < max < 0.
+ // Handle as impossible match.
+ return &Regexp{Op: OpNoMatch}
+ }
+
+ return re
+}
+
+// simplify1 implements Simplify for the unary OpStar,
+// OpPlus, and OpQuest operators. It returns the simple regexp
+// equivalent to
+//
+// Regexp{Op: op, Flags: flags, Sub: {sub}}
+//
+// under the assumption that sub is already simple, and
+// without first allocating that structure. If the regexp
+// to be returned turns out to be equivalent to re, simplify1
+// returns re instead.
+//
+// simplify1 is factored out of Simplify because the implementation
+// for other operators generates these unary expressions.
+// Letting them call simplify1 makes sure the expressions they
+// generate are simple.
+func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp {
+ // Special case: repeat the empty string as much as
+ // you want, but it's still the empty string.
+ if sub.Op == OpEmptyMatch {
+ return sub
+ }
+ // The operators are idempotent if the flags match.
+ if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy {
+ return sub
+ }
+ if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] {
+ return re
+ }
+
+ re = &Regexp{Op: op, Flags: flags}
+ re.Sub = append(re.Sub0[:0], sub)
+ return re
+}
diff --git a/src/pkg/exp/regexp/syntax/simplify_test.go b/src/pkg/exp/regexp/syntax/simplify_test.go
new file mode 100644
index 000000000..c8cec2183
--- /dev/null
+++ b/src/pkg/exp/regexp/syntax/simplify_test.go
@@ -0,0 +1,151 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+import "testing"
+
+var simplifyTests = []struct {
+ Regexp string
+ Simple string
+}{
+ // Already-simple constructs
+ {`a`, `a`},
+ {`ab`, `ab`},
+ {`a|b`, `[a-b]`},
+ {`ab|cd`, `ab|cd`},
+ {`(ab)*`, `(ab)*`},
+ {`(ab)+`, `(ab)+`},
+ {`(ab)?`, `(ab)?`},
+ {`.`, `.`},
+ {`^`, `^`},
+ {`$`, `$`},
+ {`[ac]`, `[ac]`},
+ {`[^ac]`, `[^ac]`},
+
+ // Posix character classes
+ {`[[:alnum:]]`, `[0-9A-Za-z]`},
+ {`[[:alpha:]]`, `[A-Za-z]`},
+ {`[[:blank:]]`, `[\t ]`},
+ {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`},
+ {`[[:digit:]]`, `[0-9]`},
+ {`[[:graph:]]`, `[!-~]`},
+ {`[[:lower:]]`, `[a-z]`},
+ {`[[:print:]]`, `[ -~]`},
+ {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"},
+ {`[[:space:]]`, `[\t-\r ]`},
+ {`[[:upper:]]`, `[A-Z]`},
+ {`[[:xdigit:]]`, `[0-9A-Fa-f]`},
+
+ // Perl character classes
+ {`\d`, `[0-9]`},
+ {`\s`, `[\t-\n\f-\r ]`},
+ {`\w`, `[0-9A-Z_a-z]`},
+ {`\D`, `[^0-9]`},
+ {`\S`, `[^\t-\n\f-\r ]`},
+ {`\W`, `[^0-9A-Z_a-z]`},
+ {`[\d]`, `[0-9]`},
+ {`[\s]`, `[\t-\n\f-\r ]`},
+ {`[\w]`, `[0-9A-Z_a-z]`},
+ {`[\D]`, `[^0-9]`},
+ {`[\S]`, `[^\t-\n\f-\r ]`},
+ {`[\W]`, `[^0-9A-Z_a-z]`},
+
+ // Posix repetitions
+ {`a{1}`, `a`},
+ {`a{2}`, `aa`},
+ {`a{5}`, `aaaaa`},
+ {`a{0,1}`, `a?`},
+ // The next three are illegible because Simplify inserts (?:)
+ // parens instead of () parens to avoid creating extra
+ // captured subexpressions. The comments show a version with fewer parens.
+ {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)?
+ {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)?
+ {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)?
+ {`a{0,2}`, `(?:aa?)?`}, // (aa?)?
+ {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)?
+ {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)?
+ {`a{0,}`, `a*`},
+ {`a{1,}`, `a+`},
+ {`a{2,}`, `aa+`},
+ {`a{5,}`, `aaaaa+`},
+
+ // Test that operators simplify their arguments.
+ {`(?:a{1,}){1,}`, `a+`},
+ {`(a{1,}b{1,})`, `(a+b+)`},
+ {`a{1,}|b{1,}`, `a+|b+`},
+ {`(?:a{1,})*`, `(?:a+)*`},
+ {`(?:a{1,})+`, `a+`},
+ {`(?:a{1,})?`, `(?:a+)?`},
+ {``, `(?:)`},
+ {`a{0}`, `(?:)`},
+
+ // Character class simplification
+ {`[ab]`, `[a-b]`},
+ {`[a-za-za-z]`, `[a-z]`},
+ {`[A-Za-zA-Za-z]`, `[A-Za-z]`},
+ {`[ABCDEFGH]`, `[A-H]`},
+ {`[AB-CD-EF-GH]`, `[A-H]`},
+ {`[W-ZP-XE-R]`, `[E-Z]`},
+ {`[a-ee-gg-m]`, `[a-m]`},
+ {`[a-ea-ha-m]`, `[a-m]`},
+ {`[a-ma-ha-e]`, `[a-m]`},
+ {`[a-zA-Z0-9 -~]`, `[ -~]`},
+
+ // Empty character classes
+ {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`},
+
+ // Full character classes
+ {`[[:cntrl:][:^cntrl:]]`, `.`},
+
+ // Unicode case folding.
+ {`(?i)A`, `(?i:A)`},
+ {`(?i)a`, `(?i:a)`},
+ {`(?i)[A]`, `(?i:A)`},
+ {`(?i)[a]`, `(?i:A)`},
+ {`(?i)K`, `(?i:K)`},
+ {`(?i)k`, `(?i:k)`},
+ {`(?i)\x{212a}`, "(?i:\u212A)"},
+ {`(?i)[K]`, "[Kk\u212A]"},
+ {`(?i)[k]`, "[Kk\u212A]"},
+ {`(?i)[\x{212a}]`, "[Kk\u212A]"},
+ {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"},
+ {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"},
+ {`(?i)[\x00-\x{10FFFF}]`, `.`},
+
+ // Empty string as a regular expression.
+ // The empty string must be preserved inside parens in order
+ // to make submatches work right, so these tests are less
+ // interesting than they might otherwise be. String inserts
+ // explicit (?:) in place of non-parenthesized empty strings,
+ // to make them easier to spot for other parsers.
+ {`(a|b|)`, `([a-b]|(?:))`},
+ {`(|)`, `()`},
+ {`a()`, `a()`},
+ {`(()|())`, `(()|())`},
+ {`(a|)`, `(a|(?:))`},
+ {`ab()cd()`, `ab()cd()`},
+ {`()`, `()`},
+ {`()*`, `()*`},
+ {`()+`, `()+`},
+ {`()?`, `()?`},
+ {`(){0}`, `(?:)`},
+ {`(){1}`, `()`},
+ {`(){1,}`, `()+`},
+ {`(){0,2}`, `(?:()()?)?`},
+}
+
+func TestSimplify(t *testing.T) {
+ for _, tt := range simplifyTests {
+ re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine)
+ if err != nil {
+ t.Errorf("Parse(%#q) = error %v", tt.Regexp, err)
+ continue
+ }
+ s := re.Simplify().String()
+ if s != tt.Simple {
+ t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple)
+ }
+ }
+}
diff --git a/src/pkg/exp/template/Makefile b/src/pkg/exp/template/Makefile
index ab9832f61..8550b0d52 100644
--- a/src/pkg/exp/template/Makefile
+++ b/src/pkg/exp/template/Makefile
@@ -4,9 +4,12 @@
include ../../../Make.inc
-TARG=template
+TARG=exp/template
GOFILES=\
+ exec.go\
+ funcs.go\
lex.go\
parse.go\
+ set.go\
include ../../../Make.pkg
diff --git a/src/pkg/exp/template/exec.go b/src/pkg/exp/template/exec.go
new file mode 100644
index 000000000..fb0a9e621
--- /dev/null
+++ b/src/pkg/exp/template/exec.go
@@ -0,0 +1,508 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "reflect"
+ "strings"
+ "unicode"
+ "utf8"
+)
+
+// state represents the state of an execution. It's not part of the
+// template so that multiple executions of the same template
+// can execute in parallel.
+type state struct {
+ tmpl *Template
+ wr io.Writer
+ set *Set
+ line int // line number for errors
+}
+
+// errorf formats the error and terminates processing.
+func (s *state) errorf(format string, args ...interface{}) {
+ format = fmt.Sprintf("template: %s:%d: %s", s.tmpl.name, s.line, format)
+ panic(fmt.Errorf(format, args...))
+}
+
+// error terminates processing.
+func (s *state) error(err os.Error) {
+ s.errorf("%s", err)
+}
+
+// Execute applies a parsed template to the specified data object,
+// writing the output to wr.
+func (t *Template) Execute(wr io.Writer, data interface{}) os.Error {
+ return t.ExecuteInSet(wr, data, nil)
+}
+
+// ExecuteInSet applies a parsed template to the specified data object,
+// writing the output to wr. Nested template invocations will be resolved
+// from the specified set.
+func (t *Template) ExecuteInSet(wr io.Writer, data interface{}, set *Set) (err os.Error) {
+ defer t.recover(&err)
+ state := &state{
+ tmpl: t,
+ wr: wr,
+ set: set,
+ line: 1,
+ }
+ if t.root == nil {
+ state.errorf("must be parsed before execution")
+ }
+ state.walk(reflect.ValueOf(data), t.root)
+ return
+}
+
+// Walk functions step through the major pieces of the template structure,
+// generating output as they go.
+func (s *state) walk(data reflect.Value, n node) {
+ switch n := n.(type) {
+ case *actionNode:
+ s.line = n.line
+ s.printValue(n, s.evalPipeline(data, n.pipeline))
+ case *listNode:
+ for _, node := range n.nodes {
+ s.walk(data, node)
+ }
+ case *ifNode:
+ s.line = n.line
+ s.walkIfOrWith(nodeIf, data, n.pipeline, n.list, n.elseList)
+ case *rangeNode:
+ s.line = n.line
+ s.walkRange(data, n)
+ case *textNode:
+ if _, err := s.wr.Write(n.text); err != nil {
+ s.error(err)
+ }
+ case *templateNode:
+ s.line = n.line
+ s.walkTemplate(data, n)
+ case *withNode:
+ s.line = n.line
+ s.walkIfOrWith(nodeWith, data, n.pipeline, n.list, n.elseList)
+ default:
+ s.errorf("unknown node: %s", n)
+ }
+}
+
+// walkIfOrWith walks an 'if' or 'with' node. The two control structures
+// are identical in behavior except that 'with' sets dot.
+func (s *state) walkIfOrWith(typ nodeType, data reflect.Value, pipe []*commandNode, list, elseList *listNode) {
+ val := s.evalPipeline(data, pipe)
+ truth, ok := isTrue(val)
+ if !ok {
+ s.errorf("if/with can't use value of type %T", val.Interface())
+ }
+ if truth {
+ if typ == nodeWith {
+ data = val
+ }
+ s.walk(data, list)
+ } else if elseList != nil {
+ s.walk(data, elseList)
+ }
+}
+
+// isTrue returns whether the value is 'true', in the sense of not the zero of its type,
+// and whether the value has a meaningful truth value.
+func isTrue(val reflect.Value) (truth, ok bool) {
+ switch val.Kind() {
+ case reflect.Array, reflect.Map, reflect.Slice, reflect.String:
+ truth = val.Len() > 0
+ case reflect.Bool:
+ truth = val.Bool()
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ truth = val.Int() != 0
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ truth = val.Uint() != 0
+ case reflect.Float32, reflect.Float64:
+ truth = val.Float() != 0
+ case reflect.Complex64, reflect.Complex128:
+ truth = val.Complex() != 0
+ case reflect.Chan, reflect.Func, reflect.Ptr:
+ truth = !val.IsNil()
+ default:
+ return
+ }
+ return truth, true
+}
+
+func (s *state) walkRange(data reflect.Value, r *rangeNode) {
+ val := s.evalPipeline(data, r.pipeline)
+ switch val.Kind() {
+ case reflect.Array, reflect.Slice:
+ if val.Len() == 0 {
+ break
+ }
+ for i := 0; i < val.Len(); i++ {
+ s.walk(val.Index(i), r.list)
+ }
+ return
+ case reflect.Map:
+ if val.Len() == 0 {
+ break
+ }
+ for _, key := range val.MapKeys() {
+ s.walk(val.MapIndex(key), r.list)
+ }
+ return
+ default:
+ s.errorf("range can't iterate over value of type %T", val.Interface())
+ }
+ if r.elseList != nil {
+ s.walk(data, r.elseList)
+ }
+}
+
+func (s *state) walkTemplate(data reflect.Value, t *templateNode) {
+ name := s.evalArg(data, reflect.TypeOf("string"), t.name).String()
+ if s.set == nil {
+ s.errorf("no set defined in which to invoke template named %q", name)
+ }
+ tmpl := s.set.tmpl[name]
+ if tmpl == nil {
+ s.errorf("template %q not in set", name)
+ }
+ data = s.evalPipeline(data, t.pipeline)
+ newState := *s
+ newState.tmpl = tmpl
+ newState.walk(data, tmpl.root)
+}
+
+// Eval functions evaluate pipelines, commands, and their elements and extract
+// values from the data structure by examining fields, calling methods, and so on.
+// The printing of those values happens only through walk functions.
+
+func (s *state) evalPipeline(data reflect.Value, pipe []*commandNode) reflect.Value {
+ value := reflect.Value{}
+ for _, cmd := range pipe {
+ value = s.evalCommand(data, cmd, value) // previous value is this one's final arg.
+ // If the object has type interface{}, dig down one level to the thing inside.
+ if value.Kind() == reflect.Interface && value.Type().NumMethod() == 0 {
+ value = reflect.ValueOf(value.Interface()) // lovely!
+ }
+ }
+ return value
+}
+
+func (s *state) evalCommand(data reflect.Value, cmd *commandNode, final reflect.Value) reflect.Value {
+ firstWord := cmd.args[0]
+ switch n := firstWord.(type) {
+ case *fieldNode:
+ return s.evalFieldNode(data, n, cmd.args, final)
+ case *identifierNode:
+ return s.evalFieldOrCall(data, n.ident, cmd.args, final)
+ }
+ if len(cmd.args) > 1 || final.IsValid() {
+ s.errorf("can't give argument to non-function %s", cmd.args[0])
+ }
+ switch word := cmd.args[0].(type) {
+ case *dotNode:
+ return data
+ case *boolNode:
+ return reflect.ValueOf(word.true)
+ case *numberNode:
+ // These are ideal constants but we don't know the type
+ // and we have no context. (If it was a method argument,
+ // we'd know what we need.) The syntax guides us to some extent.
+ switch {
+ case word.isComplex:
+ return reflect.ValueOf(word.complex128) // incontrovertible.
+ case word.isFloat && strings.IndexAny(word.text, ".eE") >= 0:
+ return reflect.ValueOf(word.float64)
+ case word.isInt:
+ return reflect.ValueOf(word.int64)
+ case word.isUint:
+ return reflect.ValueOf(word.uint64)
+ }
+ case *stringNode:
+ return reflect.ValueOf(word.text)
+ }
+ s.errorf("can't handle command %q", firstWord)
+ panic("not reached")
+}
+
+func (s *state) evalFieldNode(data reflect.Value, field *fieldNode, args []node, final reflect.Value) reflect.Value {
+ // Up to the last entry, it must be a field.
+ n := len(field.ident)
+ for i := 0; i < n-1; i++ {
+ data = s.evalField(data, field.ident[i])
+ }
+ // Now it can be a field or method and if a method, gets arguments.
+ return s.evalFieldOrCall(data, field.ident[n-1], args, final)
+}
+
+// Is this an exported - upper case - name?
+func isExported(name string) bool {
+ rune, _ := utf8.DecodeRuneInString(name)
+ return unicode.IsUpper(rune)
+}
+
+func (s *state) evalField(data reflect.Value, fieldName string) reflect.Value {
+ var isNil bool
+ if data, isNil = indirect(data); isNil {
+ s.errorf("%s is nil pointer", fieldName)
+ }
+ switch data.Kind() {
+ case reflect.Struct:
+ // Is it a field?
+ field := data.FieldByName(fieldName)
+ // TODO: look higher up the tree if we can't find it here. Also unexported fields
+ // might succeed higher up, as map keys.
+ if field.IsValid() && isExported(fieldName) { // valid and exported
+ return field
+ }
+ s.errorf("%s has no exported field %q", data.Type(), fieldName)
+ default:
+ s.errorf("can't evaluate field %s of type %s", fieldName, data.Type())
+ }
+ panic("not reached")
+}
+
+func (s *state) evalFieldOrCall(data reflect.Value, fieldName string, args []node, final reflect.Value) reflect.Value {
+ // Is it a function?
+ if function, ok := findFunction(fieldName, s.tmpl, s.set); ok {
+ return s.evalCall(data, function, fieldName, false, args, final)
+ }
+ ptr := data
+ for data.Kind() == reflect.Ptr && !data.IsNil() {
+ ptr, data = data, reflect.Indirect(data)
+ }
+ // Is it a method? We use the pointer because it has value methods too.
+ if method, ok := methodByName(ptr.Type(), fieldName); ok {
+ return s.evalCall(ptr, method.Func, fieldName, true, args, final)
+ }
+ if len(args) > 1 || final.IsValid() {
+ s.errorf("%s is not a method but has arguments", fieldName)
+ }
+ switch data.Kind() {
+ case reflect.Struct:
+ return s.evalField(data, fieldName)
+ default:
+ s.errorf("can't handle evaluation of field %s of type %s", fieldName, data.Type())
+ }
+ panic("not reached")
+}
+
+// TODO: delete when reflect's own MethodByName is released.
+func methodByName(typ reflect.Type, name string) (reflect.Method, bool) {
+ for i := 0; i < typ.NumMethod(); i++ {
+ if typ.Method(i).Name == name {
+ return typ.Method(i), true
+ }
+ }
+ return reflect.Method{}, false
+}
+
+var (
+ osErrorType = reflect.TypeOf(new(os.Error)).Elem()
+)
+
+func (s *state) evalCall(v, fun reflect.Value, name string, isMethod bool, args []node, final reflect.Value) reflect.Value {
+ typ := fun.Type()
+ if !isMethod && len(args) > 0 { // Args will be nil if it's a niladic call in an argument list
+ args = args[1:] // first arg is name of function; not used in call.
+ }
+ numIn := len(args)
+ if final.IsValid() {
+ numIn++
+ }
+ numFixed := len(args)
+ if typ.IsVariadic() {
+ numFixed = typ.NumIn() - 1 // last arg is the variadic one.
+ if numIn < numFixed {
+ s.errorf("wrong number of args for %s: want at least %d got %d", name, typ.NumIn()-1, len(args))
+ }
+ } else if numIn < typ.NumIn()-1 || !typ.IsVariadic() && numIn != typ.NumIn() {
+ s.errorf("wrong number of args for %s: want %d got %d", name, typ.NumIn(), len(args))
+ }
+ if !goodFunc(typ) {
+ s.errorf("can't handle multiple results from method/function %q", name)
+ }
+ // Build the arg list.
+ argv := make([]reflect.Value, numIn)
+ // First arg is the receiver.
+ i := 0
+ if isMethod {
+ argv[0] = v
+ i++
+ }
+ // Others must be evaluated. Fixed args first.
+ for ; i < numFixed; i++ {
+ argv[i] = s.evalArg(v, typ.In(i), args[i])
+ }
+ // And now the ... args.
+ if typ.IsVariadic() {
+ argType := typ.In(typ.NumIn() - 1).Elem() // Argument is a slice.
+ for ; i < len(args); i++ {
+ argv[i] = s.evalArg(v, argType, args[i])
+ }
+ }
+ // Add final value if necessary.
+ if final.IsValid() {
+ argv[len(args)] = final
+ }
+ result := fun.Call(argv)
+ // If we have an os.Error that is not nil, stop execution and return that error to the caller.
+ if len(result) == 2 && !result[1].IsNil() {
+ s.error(result[1].Interface().(os.Error))
+ }
+ return result[0]
+}
+
+func (s *state) evalArg(data reflect.Value, typ reflect.Type, n node) reflect.Value {
+ if field, ok := n.(*fieldNode); ok {
+ value := s.evalFieldNode(data, field, []node{n}, reflect.Value{})
+ if !value.Type().AssignableTo(typ) {
+ s.errorf("wrong type for value; expected %s; got %s", typ, value.Type())
+ }
+ return value
+ }
+ switch typ.Kind() {
+ case reflect.Bool:
+ return s.evalBool(typ, n)
+ case reflect.String:
+ return s.evalString(typ, n)
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ return s.evalInteger(typ, n)
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ return s.evalUnsignedInteger(typ, n)
+ case reflect.Float32, reflect.Float64:
+ return s.evalFloat(typ, n)
+ case reflect.Complex64, reflect.Complex128:
+ return s.evalComplex(typ, n)
+ case reflect.Interface:
+ if typ.NumMethod() == 0 {
+ return s.evalEmptyInterface(data, typ, n)
+ }
+ }
+ s.errorf("can't handle %s for arg of type %s", n, typ)
+ panic("not reached")
+}
+
+func (s *state) evalBool(typ reflect.Type, n node) reflect.Value {
+ if n, ok := n.(*boolNode); ok {
+ value := reflect.New(typ).Elem()
+ value.SetBool(n.true)
+ return value
+ }
+ s.errorf("expected bool; found %s", n)
+ panic("not reached")
+}
+
+func (s *state) evalString(typ reflect.Type, n node) reflect.Value {
+ if n, ok := n.(*stringNode); ok {
+ value := reflect.New(typ).Elem()
+ value.SetString(n.text)
+ return value
+ }
+ s.errorf("expected string; found %s", n)
+ panic("not reached")
+}
+
+func (s *state) evalInteger(typ reflect.Type, n node) reflect.Value {
+ if n, ok := n.(*numberNode); ok && n.isInt {
+ value := reflect.New(typ).Elem()
+ value.SetInt(n.int64)
+ return value
+ }
+ s.errorf("expected integer; found %s", n)
+ panic("not reached")
+}
+
+func (s *state) evalUnsignedInteger(typ reflect.Type, n node) reflect.Value {
+ if n, ok := n.(*numberNode); ok && n.isUint {
+ value := reflect.New(typ).Elem()
+ value.SetUint(n.uint64)
+ return value
+ }
+ s.errorf("expected unsigned integer; found %s", n)
+ panic("not reached")
+}
+
+func (s *state) evalFloat(typ reflect.Type, n node) reflect.Value {
+ if n, ok := n.(*numberNode); ok && n.isFloat {
+ value := reflect.New(typ).Elem()
+ value.SetFloat(n.float64)
+ return value
+ }
+ s.errorf("expected float; found %s", n)
+ panic("not reached")
+}
+
+func (s *state) evalComplex(typ reflect.Type, n node) reflect.Value {
+ if n, ok := n.(*numberNode); ok && n.isComplex {
+ value := reflect.New(typ).Elem()
+ value.SetComplex(n.complex128)
+ return value
+ }
+ s.errorf("expected complex; found %s", n)
+ panic("not reached")
+}
+
+func (s *state) evalEmptyInterface(data reflect.Value, typ reflect.Type, n node) reflect.Value {
+ switch n := n.(type) {
+ case *boolNode:
+ return reflect.ValueOf(n.true)
+ case *dotNode:
+ return data
+ case *fieldNode:
+ return s.evalFieldNode(data, n, nil, reflect.Value{})
+ case *identifierNode:
+ return s.evalFieldOrCall(data, n.ident, nil, reflect.Value{})
+ case *numberNode:
+ if n.isComplex {
+ return reflect.ValueOf(n.complex128)
+ }
+ if n.isInt {
+ return reflect.ValueOf(n.int64)
+ }
+ if n.isUint {
+ return reflect.ValueOf(n.uint64)
+ }
+ if n.isFloat {
+ return reflect.ValueOf(n.float64)
+ }
+ case *stringNode:
+ return reflect.ValueOf(n.text)
+ }
+ s.errorf("can't handle assignment of %s to empty interface argument", n)
+ panic("not reached")
+}
+
+// indirect returns the item at the end of indirection, and a bool to indicate if it's nil.
+func indirect(v reflect.Value) (rv reflect.Value, isNil bool) {
+ for v.Kind() == reflect.Ptr {
+ if v.IsNil() {
+ return v, true
+ }
+ v = v.Elem()
+ }
+ return v, false
+}
+
+// printValue writes the textual representation of the value to the output of
+// the template.
+func (s *state) printValue(n node, v reflect.Value) {
+ if !v.IsValid() {
+ fmt.Fprint(s.wr, "<no value>")
+ return
+ }
+ switch v.Kind() {
+ case reflect.Ptr:
+ var isNil bool
+ if v, isNil = indirect(v); isNil {
+ fmt.Fprint(s.wr, "<nil>")
+ return
+ }
+ case reflect.Chan, reflect.Func, reflect.Interface:
+ s.errorf("can't print %s of type %s", n, v.Type())
+ }
+ fmt.Fprint(s.wr, v.Interface())
+}
diff --git a/src/pkg/exp/template/exec_test.go b/src/pkg/exp/template/exec_test.go
new file mode 100644
index 000000000..86b958e84
--- /dev/null
+++ b/src/pkg/exp/template/exec_test.go
@@ -0,0 +1,342 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "sort"
+ "strings"
+ "testing"
+)
+
+// T has lots of interesting pieces to use to test execution.
+type T struct {
+ // Basics
+ I int
+ U16 uint16
+ X string
+ FloatZero float64
+ ComplexZero float64
+ // Nested structs.
+ U *U
+ // Slices
+ SI []int
+ SIEmpty []int
+ SB []bool
+ // Maps
+ MSI map[string]int
+ MSIone map[string]int // one element, for deterministic output
+ MSIEmpty map[string]int
+ SMSI []map[string]int
+ // Empty interfaces; used to see if we can dig inside one.
+ Empty0 interface{} // nil
+ Empty1 interface{}
+ Empty2 interface{}
+ Empty3 interface{}
+ Empty4 interface{}
+ // Pointers
+ PI *int
+ PSI *[]int
+ NIL *int
+}
+
+var tVal = &T{
+ I: 17,
+ U16: 16,
+ X: "x",
+ U: &U{"v"},
+ SI: []int{3, 4, 5},
+ SB: []bool{true, false},
+ MSI: map[string]int{"one": 1, "two": 2, "three": 3},
+ MSIone: map[string]int{"one": 1},
+ SMSI: []map[string]int{
+ {"one": 1, "two": 2},
+ {"eleven": 11, "twelve": 12},
+ },
+ Empty1: 3,
+ Empty2: "empty2",
+ Empty3: []int{7, 8},
+ Empty4: &U{"v"},
+ PI: newInt(23),
+ PSI: newIntSlice(21, 22, 23),
+}
+
+// Helpers for creation.
+func newInt(n int) *int {
+ p := new(int)
+ *p = n
+ return p
+}
+
+func newIntSlice(n ...int) *[]int {
+ p := new([]int)
+ *p = make([]int, len(n))
+ copy(*p, n)
+ return p
+}
+
+// Simple methods with and without arguments.
+func (t *T) Method0() string {
+ return "resultOfMethod0"
+}
+
+func (t *T) Method1(a int) int {
+ return a
+}
+
+func (t *T) Method2(a uint16, b string) string {
+ return fmt.Sprintf("Method2: %d %s", a, b)
+}
+
+func (t *T) MAdd(a int, b []int) []int {
+ v := make([]int, len(b))
+ for i, x := range b {
+ v[i] = x + a
+ }
+ return v
+}
+
+// MSort is used to sort map keys for stable output. (Nice trick!)
+func (t *T) MSort(m map[string]int) []string {
+ keys := make([]string, len(m))
+ i := 0
+ for k := range m {
+ keys[i] = k
+ i++
+ }
+ sort.Strings(keys)
+ return keys
+}
+
+// EPERM returns a value and an os.Error according to its argument.
+func (t *T) EPERM(error bool) (bool, os.Error) {
+ if error {
+ return true, os.EPERM
+ }
+ return false, nil
+}
+
+type U struct {
+ V string
+}
+
+type execTest struct {
+ name string
+ input string
+ output string
+ data interface{}
+ ok bool
+}
+
+var execTests = []execTest{
+ // Trivial cases.
+ {"empty", "", "", nil, true},
+ {"text", "some text", "some text", nil, true},
+
+ // Fields of structs.
+ {".X", "-{{.X}}-", "-x-", tVal, true},
+ {".U.V", "-{{.U.V}}-", "-v-", tVal, true},
+
+ // Dots of all kinds to test basic evaluation.
+ {"dot int", "<{{.}}>", "<13>", 13, true},
+ {"dot uint", "<{{.}}>", "<14>", uint(14), true},
+ {"dot float", "<{{.}}>", "<15.1>", 15.1, true},
+ {"dot bool", "<{{.}}>", "<true>", true, true},
+ {"dot complex", "<{{.}}>", "<(16.2-17i)>", 16.2 - 17i, true},
+ {"dot string", "<{{.}}>", "<hello>", "hello", true},
+ {"dot slice", "<{{.}}>", "<[-1 -2 -3]>", []int{-1, -2, -3}, true},
+ {"dot map", "<{{.}}>", "<map[two:22 one:11]>", map[string]int{"one": 11, "two": 22}, true},
+ {"dot struct", "<{{.}}>", "<{7 seven}>", struct {
+ a int
+ b string
+ }{7, "seven"}, true},
+
+ // Pointers.
+ {"*int", "{{.PI}}", "23", tVal, true},
+ {"*[]int", "{{.PSI}}", "[21 22 23]", tVal, true},
+ {"*[]int[1]", "{{index .PSI 1}}", "22", tVal, true},
+ {"NIL", "{{.NIL}}", "<nil>", tVal, true},
+
+ // Emtpy interfaces holding values.
+ {"empty nil", "{{.Empty0}}", "<no value>", tVal, true},
+ {"empty with int", "{{.Empty1}}", "3", tVal, true},
+ {"empty with string", "{{.Empty2}}", "empty2", tVal, true},
+ {"empty with slice", "{{.Empty3}}", "[7 8]", tVal, true},
+ {"empty with struct", "{{.Empty4}}", "{v}", tVal, true},
+
+ // Method calls.
+ {".Method0", "-{{.Method0}}-", "-resultOfMethod0-", tVal, true},
+ {".Method1(1234)", "-{{.Method1 1234}}-", "-1234-", tVal, true},
+ {".Method1(.I)", "-{{.Method1 .I}}-", "-17-", tVal, true},
+ {".Method2(3, .X)", "-{{.Method2 3 .X}}-", "-Method2: 3 x-", tVal, true},
+ {".Method2(.U16, `str`)", "-{{.Method2 .U16 `str`}}-", "-Method2: 16 str-", tVal, true},
+
+ // Pipelines.
+ {"pipeline", "-{{.Method0 | .Method2 .U16}}-", "-Method2: 16 resultOfMethod0-", tVal, true},
+
+ // If.
+ {"if true", "{{if true}}TRUE{{end}}", "TRUE", tVal, true},
+ {"if false", "{{if false}}TRUE{{else}}FALSE{{end}}", "FALSE", tVal, true},
+ {"if 1", "{{if 1}}NON-ZERO{{else}}ZERO{{end}}", "NON-ZERO", tVal, true},
+ {"if 0", "{{if 0}}NON-ZERO{{else}}ZERO{{end}}", "ZERO", tVal, true},
+ {"if 1.5", "{{if 1.5}}NON-ZERO{{else}}ZERO{{end}}", "NON-ZERO", tVal, true},
+ {"if 0.0", "{{if .FloatZero}}NON-ZERO{{else}}ZERO{{end}}", "ZERO", tVal, true},
+ {"if 1.5i", "{{if 1.5i}}NON-ZERO{{else}}ZERO{{end}}", "NON-ZERO", tVal, true},
+ {"if 0.0i", "{{if .ComplexZero}}NON-ZERO{{else}}ZERO{{end}}", "ZERO", tVal, true},
+ {"if emptystring", "{{if ``}}NON-EMPTY{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"if string", "{{if `notempty`}}NON-EMPTY{{else}}EMPTY{{end}}", "NON-EMPTY", tVal, true},
+ {"if emptyslice", "{{if .SIEmpty}}NON-EMPTY{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"if slice", "{{if .SI}}NON-EMPTY{{else}}EMPTY{{end}}", "NON-EMPTY", tVal, true},
+ {"if emptymap", "{{if .MSIEmpty}}NON-EMPTY{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"if map", "{{if .MSI}}NON-EMPTY{{else}}EMPTY{{end}}", "NON-EMPTY", tVal, true},
+
+ // Printf.
+ {"printf", `{{printf "hello, printf"}}`, "hello, printf", tVal, true},
+ {"printf int", `{{printf "%04x" 127}}`, "007f", tVal, true},
+ {"printf float", `{{printf "%g" 3.5}}`, "3.5", tVal, true},
+ {"printf complex", `{{printf "%g" 1+7i}}`, "(1+7i)", tVal, true},
+ {"printf string", `{{printf "%s" "hello"}}`, "hello", tVal, true},
+ {"printf function", `{{printf "%#q" gopher}}`, "`gopher`", tVal, true},
+ {"printf field", `{{printf "%s" .U.V}}`, "v", tVal, true},
+ {"printf method", `{{printf "%s" .Method0}}`, "resultOfMethod0", tVal, true},
+ {"printf lots", `{{printf "%d %s %g %s" 127 "hello" 7-3i .Method0}}`, "127 hello (7-3i) resultOfMethod0", tVal, true},
+
+ // HTML.
+ {"html", `{{html "<script>alert(\"XSS\");</script>"}}`,
+ "&lt;script&gt;alert(&#34;XSS&#34;);&lt;/script&gt;", nil, true},
+ {"html pipeline", `{{printf "<script>alert(\"XSS\");</script>" | html}}`,
+ "&lt;script&gt;alert(&#34;XSS&#34;);&lt;/script&gt;", nil, true},
+
+ // JavaScript.
+ {"js", `{{js .}}`, `It\'d be nice.`, `It'd be nice.`, true},
+
+ // Booleans
+ {"not", "{{not true}} {{not false}}", "false true", nil, true},
+ {"and", "{{and 0 0}} {{and 1 0}} {{and 0 1}} {{and 1 1}}", "false false false true", nil, true},
+ {"or", "{{or 0 0}} {{or 1 0}} {{or 0 1}} {{or 1 1}}", "false true true true", nil, true},
+ {"boolean if", "{{if and true 1 `hi`}}TRUE{{else}}FALSE{{end}}", "TRUE", tVal, true},
+ {"boolean if not", "{{if and true 1 `hi` | not}}TRUE{{else}}FALSE{{end}}", "FALSE", nil, true},
+
+ // Indexing.
+ {"slice[0]", "{{index .SI 0}}", "3", tVal, true},
+ {"slice[1]", "{{index .SI 1}}", "4", tVal, true},
+ {"slice[HUGE]", "{{index .SI 10}}", "", tVal, false},
+ {"slice[WRONG]", "{{index .SI `hello`}}", "", tVal, false},
+ {"map[one]", "{{index .MSI `one`}}", "1", tVal, true},
+ {"map[two]", "{{index .MSI `two`}}", "2", tVal, true},
+ {"map[NO]", "{{index .MSI `XXX`}}", "", tVal, false},
+ {"map[WRONG]", "{{index .MSI 10}}", "", tVal, false},
+ {"double index", "{{index .SMSI 1 `eleven`}}", "11", tVal, true},
+
+ // With.
+ {"with true", "{{with true}}{{.}}{{end}}", "true", tVal, true},
+ {"with false", "{{with false}}{{.}}{{else}}FALSE{{end}}", "FALSE", tVal, true},
+ {"with 1", "{{with 1}}{{.}}{{else}}ZERO{{end}}", "1", tVal, true},
+ {"with 0", "{{with 0}}{{.}}{{else}}ZERO{{end}}", "ZERO", tVal, true},
+ {"with 1.5", "{{with 1.5}}{{.}}{{else}}ZERO{{end}}", "1.5", tVal, true},
+ {"with 0.0", "{{with .FloatZero}}{{.}}{{else}}ZERO{{end}}", "ZERO", tVal, true},
+ {"with 1.5i", "{{with 1.5i}}{{.}}{{else}}ZERO{{end}}", "(0+1.5i)", tVal, true},
+ {"with 0.0i", "{{with .ComplexZero}}{{.}}{{else}}ZERO{{end}}", "ZERO", tVal, true},
+ {"with emptystring", "{{with ``}}{{.}}{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"with string", "{{with `notempty`}}{{.}}{{else}}EMPTY{{end}}", "notempty", tVal, true},
+ {"with emptyslice", "{{with .SIEmpty}}{{.}}{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"with slice", "{{with .SI}}{{.}}{{else}}EMPTY{{end}}", "[3 4 5]", tVal, true},
+ {"with emptymap", "{{with .MSIEmpty}}{{.}}{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"with map", "{{with .MSIone}}{{.}}{{else}}EMPTY{{end}}", "map[one:1]", tVal, true},
+ {"with empty interface, struct field", "{{with .Empty4}}{{.V}}{{end}}", "v", tVal, true},
+
+ // Range.
+ {"range []int", "{{range .SI}}-{{.}}-{{end}}", "-3--4--5-", tVal, true},
+ {"range empty no else", "{{range .SIEmpty}}-{{.}}-{{end}}", "", tVal, true},
+ {"range []int else", "{{range .SI}}-{{.}}-{{else}}EMPTY{{end}}", "-3--4--5-", tVal, true},
+ {"range empty else", "{{range .SIEmpty}}-{{.}}-{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"range []bool", "{{range .SB}}-{{.}}-{{end}}", "-true--false-", tVal, true},
+ {"range []int method", "{{range .SI | .MAdd .I}}-{{.}}-{{end}}", "-20--21--22-", tVal, true},
+ {"range map", "{{range .MSI | .MSort}}-{{.}}-{{end}}", "-one--three--two-", tVal, true},
+ {"range empty map no else", "{{range .MSIEmpty}}-{{.}}-{{end}}", "", tVal, true},
+ {"range map else", "{{range .MSI | .MSort}}-{{.}}-{{else}}EMPTY{{end}}", "-one--three--two-", tVal, true},
+ {"range empty map else", "{{range .MSIEmpty}}-{{.}}-{{else}}EMPTY{{end}}", "EMPTY", tVal, true},
+ {"range empty interface", "{{range .Empty3}}-{{.}}-{{else}}EMPTY{{end}}", "-7--8-", tVal, true},
+
+ // Error handling.
+ {"error method, error", "{{.EPERM true}}", "", tVal, false},
+ {"error method, no error", "{{.EPERM false}}", "false", tVal, true},
+}
+
+func gopher() string {
+ return "gopher"
+}
+
+func testExecute(execTests []execTest, set *Set, t *testing.T) {
+ b := new(bytes.Buffer)
+ funcs := FuncMap{"gopher": gopher}
+ for _, test := range execTests {
+ tmpl := New(test.name).Funcs(funcs)
+ err := tmpl.Parse(test.input)
+ if err != nil {
+ t.Errorf("%s: parse error: %s", test.name, err)
+ continue
+ }
+ b.Reset()
+ err = tmpl.ExecuteInSet(b, test.data, set)
+ switch {
+ case !test.ok && err == nil:
+ t.Errorf("%s: expected error; got none", test.name)
+ continue
+ case test.ok && err != nil:
+ t.Errorf("%s: unexpected execute error: %s", test.name, err)
+ continue
+ case !test.ok && err != nil:
+ // expected error, got one
+ if *debug {
+ fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err)
+ }
+ }
+ result := b.String()
+ if result != test.output {
+ t.Errorf("%s: expected\n\t%q\ngot\n\t%q", test.name, test.output, result)
+ }
+ }
+}
+
+func TestExecute(t *testing.T) {
+ testExecute(execTests, nil, t)
+}
+
+// Check that an error from a method flows back to the top.
+func TestExecuteError(t *testing.T) {
+ b := new(bytes.Buffer)
+ tmpl := New("error")
+ err := tmpl.Parse("{{.EPERM true}}")
+ if err != nil {
+ t.Fatalf("parse error: %s", err)
+ }
+ err = tmpl.Execute(b, tVal)
+ if err == nil {
+ t.Errorf("expected error; got none")
+ } else if !strings.Contains(err.String(), os.EPERM.String()) {
+ t.Errorf("expected os.EPERM; got %s", err)
+ }
+}
+
+func TestJSEscaping(t *testing.T) {
+ testCases := []struct {
+ in, exp string
+ }{
+ {`a`, `a`},
+ {`'foo`, `\'foo`},
+ {`Go "jump" \`, `Go \"jump\" \\`},
+ {`Yukihiro says "今日は世界"`, `Yukihiro says \"今日は世界\"`},
+ {"unprintable \uFDFF", `unprintable \uFDFF`},
+ }
+ for _, tc := range testCases {
+ s := JSEscapeString(tc.in)
+ if s != tc.exp {
+ t.Errorf("JS escaping [%s] got [%s] want [%s]", tc.in, s, tc.exp)
+ }
+ }
+}
diff --git a/src/pkg/exp/template/funcs.go b/src/pkg/exp/template/funcs.go
new file mode 100644
index 000000000..66be40fd4
--- /dev/null
+++ b/src/pkg/exp/template/funcs.go
@@ -0,0 +1,294 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "os"
+ "reflect"
+ "strings"
+ "unicode"
+ "utf8"
+)
+
+// FuncMap is the type of the map defining the mapping from names to functions.
+// Each function must have either a single return value, or two return values of
+// which the second has type os.Error.
+type FuncMap map[string]interface{}
+
+var funcs = map[string]reflect.Value{
+ "and": reflect.ValueOf(and),
+ "html": reflect.ValueOf(HTMLEscaper),
+ "index": reflect.ValueOf(index),
+ "js": reflect.ValueOf(JSEscaper),
+ "not": reflect.ValueOf(not),
+ "or": reflect.ValueOf(or),
+ "printf": reflect.ValueOf(fmt.Sprintf),
+}
+
+// addFuncs adds to values the functions in funcs, converting them to reflect.Values.
+func addFuncs(values map[string]reflect.Value, funcMap FuncMap) {
+ for name, fn := range funcMap {
+ v := reflect.ValueOf(fn)
+ if v.Kind() != reflect.Func {
+ panic("value for " + name + " not a function")
+ }
+ if !goodFunc(v.Type()) {
+ panic(fmt.Errorf("can't handle multiple results from method/function %q", name))
+ }
+ values[name] = v
+ }
+}
+
+// goodFunc checks that the function or method has the right result signature.
+func goodFunc(typ reflect.Type) bool {
+ // We allow functions with 1 result or 2 results where the second is an os.Error.
+ switch {
+ case typ.NumOut() == 1:
+ return true
+ case typ.NumOut() == 2 && typ.Out(1) == osErrorType:
+ return true
+ }
+ return false
+}
+
+// findFunction looks for a function in the template, set, and global map.
+func findFunction(name string, tmpl *Template, set *Set) (reflect.Value, bool) {
+ if tmpl != nil {
+ if fn := tmpl.funcs[name]; fn.IsValid() {
+ return fn, true
+ }
+ }
+ if set != nil {
+ if fn := set.funcs[name]; fn.IsValid() {
+ return fn, true
+ }
+ }
+ if fn := funcs[name]; fn.IsValid() {
+ return fn, true
+ }
+ return reflect.Value{}, false
+}
+
+// Indexing.
+
+// index returns the result of indexing its first argument by the following
+// arguments. Thus "index x 1 2 3" is, in Go syntax, x[1][2][3]. Each
+// indexed item must be a map, slice, or array.
+func index(item interface{}, indices ...interface{}) (interface{}, os.Error) {
+ v := reflect.ValueOf(item)
+ for _, i := range indices {
+ index := reflect.ValueOf(i)
+ var isNil bool
+ if v, isNil = indirect(v); isNil {
+ return nil, fmt.Errorf("index of nil pointer")
+ }
+ switch v.Kind() {
+ case reflect.Array, reflect.Slice:
+ var x int64
+ switch index.Kind() {
+ case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+ x = index.Int()
+ case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+ x = int64(index.Uint())
+ default:
+ return nil, fmt.Errorf("cannot index slice/array with type %s", index.Type())
+ }
+ if x < 0 || x >= int64(v.Len()) {
+ return nil, fmt.Errorf("index out of range: %d", x)
+ }
+ v = v.Index(int(x))
+ case reflect.Map:
+ if !index.Type().AssignableTo(v.Type().Key()) {
+ return nil, fmt.Errorf("%s is not index type for %s", index.Type(), v.Type())
+ }
+ v = v.MapIndex(index)
+ if !v.IsValid() {
+ return nil, fmt.Errorf("index %v not present in map", index.Interface())
+ }
+ default:
+ return nil, fmt.Errorf("can't index item of type %s", index.Type())
+ }
+ }
+ return v.Interface(), nil
+}
+
+// Boolean logic.
+
+// and returns the Boolean AND of its arguments.
+func and(arg0 interface{}, args ...interface{}) (truth bool) {
+ truth, _ = isTrue(reflect.ValueOf(arg0))
+ for i := 0; truth && i < len(args); i++ {
+ truth, _ = isTrue(reflect.ValueOf(args[i]))
+ }
+ return
+}
+
+// or returns the Boolean OR of its arguments.
+func or(arg0 interface{}, args ...interface{}) (truth bool) {
+ truth, _ = isTrue(reflect.ValueOf(arg0))
+ for i := 0; !truth && i < len(args); i++ {
+ truth, _ = isTrue(reflect.ValueOf(args[i]))
+ }
+ return
+}
+
+// not returns the Boolean negation of its argument.
+func not(arg interface{}) (truth bool) {
+ truth, _ = isTrue(reflect.ValueOf(arg))
+ return !truth
+}
+
+// HTML escaping.
+
+var (
+ htmlQuot = []byte("&#34;") // shorter than "&quot;"
+ htmlApos = []byte("&#39;") // shorter than "&apos;"
+ htmlAmp = []byte("&amp;")
+ htmlLt = []byte("&lt;")
+ htmlGt = []byte("&gt;")
+)
+
+// HTMLEscape writes to w the escaped HTML equivalent of the plain text data b.
+func HTMLEscape(w io.Writer, b []byte) {
+ last := 0
+ for i, c := range b {
+ var html []byte
+ switch c {
+ case '"':
+ html = htmlQuot
+ case '\'':
+ html = htmlApos
+ case '&':
+ html = htmlAmp
+ case '<':
+ html = htmlLt
+ case '>':
+ html = htmlGt
+ default:
+ continue
+ }
+ w.Write(b[last:i])
+ w.Write(html)
+ last = i + 1
+ }
+ w.Write(b[last:])
+}
+
+// HTMLEscapeString returns the escaped HTML equivalent of the plain text data s.
+func HTMLEscapeString(s string) string {
+ // Avoid allocation if we can.
+ if strings.IndexAny(s, `'"&<>`) < 0 {
+ return s
+ }
+ var b bytes.Buffer
+ HTMLEscape(&b, []byte(s))
+ return b.String()
+}
+
+// HTMLEscaper returns the escaped HTML equivalent of the textual
+// representation of its arguments.
+func HTMLEscaper(args ...interface{}) string {
+ ok := false
+ var s string
+ if len(args) == 1 {
+ s, ok = args[0].(string)
+ }
+ if !ok {
+ s = fmt.Sprint(args...)
+ }
+ return HTMLEscapeString(s)
+}
+
+// JavaScript escaping.
+
+var (
+ jsLowUni = []byte(`\u00`)
+ hex = []byte("0123456789ABCDEF")
+
+ jsBackslash = []byte(`\\`)
+ jsApos = []byte(`\'`)
+ jsQuot = []byte(`\"`)
+)
+
+
+// JSEscape writes to w the escaped JavaScript equivalent of the plain text data b.
+func JSEscape(w io.Writer, b []byte) {
+ last := 0
+ for i := 0; i < len(b); i++ {
+ c := b[i]
+
+ if ' ' <= c && c < utf8.RuneSelf && c != '\\' && c != '"' && c != '\'' {
+ // fast path: nothing to do
+ continue
+ }
+ w.Write(b[last:i])
+
+ if c < utf8.RuneSelf {
+ // Quotes and slashes get quoted.
+ // Control characters get written as \u00XX.
+ switch c {
+ case '\\':
+ w.Write(jsBackslash)
+ case '\'':
+ w.Write(jsApos)
+ case '"':
+ w.Write(jsQuot)
+ default:
+ w.Write(jsLowUni)
+ t, b := c>>4, c&0x0f
+ w.Write(hex[t : t+1])
+ w.Write(hex[b : b+1])
+ }
+ } else {
+ // Unicode rune.
+ rune, size := utf8.DecodeRune(b[i:])
+ if unicode.IsPrint(rune) {
+ w.Write(b[i : i+size])
+ } else {
+ // TODO(dsymonds): Do this without fmt?
+ fmt.Fprintf(w, "\\u%04X", rune)
+ }
+ i += size - 1
+ }
+ last = i + 1
+ }
+ w.Write(b[last:])
+}
+
+// JSEscapeString returns the escaped JavaScript equivalent of the plain text data s.
+func JSEscapeString(s string) string {
+ // Avoid allocation if we can.
+ if strings.IndexFunc(s, jsIsSpecial) < 0 {
+ return s
+ }
+ var b bytes.Buffer
+ JSEscape(&b, []byte(s))
+ return b.String()
+}
+
+func jsIsSpecial(rune int) bool {
+ switch rune {
+ case '\\', '\'', '"':
+ return true
+ }
+ return rune < ' ' || utf8.RuneSelf <= rune
+}
+
+// JSEscaper returns the escaped JavaScript equivalent of the textual
+// representation of its arguments.
+func JSEscaper(args ...interface{}) string {
+ ok := false
+ var s string
+ if len(args) == 1 {
+ s, ok = args[0].(string)
+ }
+ if !ok {
+ s = fmt.Sprint(args...)
+ }
+ return JSEscapeString(s)
+}
diff --git a/src/pkg/exp/template/lex.go b/src/pkg/exp/template/lex.go
index 826d3eb88..d78152979 100644
--- a/src/pkg/exp/template/lex.go
+++ b/src/pkg/exp/template/lex.go
@@ -18,58 +18,71 @@ type item struct {
}
func (i item) String() string {
- switch i.typ {
- case itemEOF:
+ switch {
+ case i.typ == itemEOF:
return "EOF"
- case itemError:
+ case i.typ == itemError:
return i.val
- }
- if len(i.val) > 10 {
+ case i.typ > itemKeyword:
+ return fmt.Sprintf("<%s>", i.val)
+ case len(i.val) > 10:
return fmt.Sprintf("%.10q...", i.val)
}
return fmt.Sprintf("%q", i.val)
}
-// itemType identifies the type of lex item.
+// itemType identifies the type of lex items.
type itemType int
const (
- itemError itemType = iota // error occurred; value is text of error
- itemDot // the cursor, spelled '.'.
+ itemError itemType = iota // error occurred; value is text of error
+ itemBool // boolean constant
+ itemComplex // complex constant (1+2i); imaginary is just a number
itemEOF
- itemElse // else keyword
- itemEnd // end keyword
- itemField // alphanumeric identifier, starting with '.'.
+ itemField // alphanumeric identifier, starting with '.', possibly chained ('.x.y')
itemIdentifier // alphanumeric identifier
- itemIf // if keyword
- itemLeftMeta // left meta-string
- itemNumber // number
+ itemLeftDelim // left action delimiter
+ itemNumber // simple number, including imaginary
itemPipe // pipe symbol
- itemRange // range keyword
itemRawString // raw quoted string (includes quotes)
- itemRightMeta // right meta-string
+ itemRightDelim // right action delimiter
itemString // quoted string (includes quotes)
itemText // plain text
+ // Keywords appear after all the rest.
+ itemKeyword // used only to delimit the keywords
+ itemDot // the cursor, spelled '.'.
+ itemDefine // define keyword
+ itemElse // else keyword
+ itemEnd // end keyword
+ itemIf // if keyword
+ itemRange // range keyword
+ itemTemplate // template keyword
+ itemWith // with keyword
)
// Make the types prettyprint.
var itemName = map[itemType]string{
itemError: "error",
- itemDot: ".",
+ itemBool: "bool",
+ itemComplex: "complex",
itemEOF: "EOF",
- itemElse: "else",
- itemEnd: "end",
itemField: "field",
itemIdentifier: "identifier",
- itemIf: "if",
- itemLeftMeta: "left meta",
+ itemLeftDelim: "left delim",
itemNumber: "number",
itemPipe: "pipe",
- itemRange: "range",
itemRawString: "raw string",
- itemRightMeta: "rightMeta",
+ itemRightDelim: "right delim",
itemString: "string",
- itemText: "text",
+ // keywords
+ itemDot: ".",
+ itemDefine: "define",
+ itemElse: "else",
+ itemIf: "if",
+ itemEnd: "end",
+ itemRange: "range",
+ itemTemplate: "template",
+ itemWith: "with",
}
func (i itemType) String() string {
@@ -81,11 +94,14 @@ func (i itemType) String() string {
}
var key = map[string]itemType{
- ".": itemDot,
- "else": itemElse,
- "end": itemEnd,
- "if": itemIf,
- "range": itemRange,
+ ".": itemDot,
+ "define": itemDefine,
+ "else": itemElse,
+ "end": itemEnd,
+ "if": itemIf,
+ "range": itemRange,
+ "template": itemTemplate,
+ "with": itemWith,
}
const eof = -1
@@ -97,6 +113,7 @@ type stateFn func(*lexer) stateFn
type lexer struct {
name string // the name of the input; used only for error reports.
input string // the string being scanned.
+ state stateFn // the next lexing function to enter
pos int // current position in the input.
start int // start position of this item.
width int // width of last rune read from input.
@@ -166,38 +183,47 @@ func (l *lexer) errorf(format string, args ...interface{}) stateFn {
return nil
}
-// run lexes the input by executing state functions until nil.
-func (l *lexer) run() {
- for state := lexText; state != nil; {
- state = state(l)
+// nextItem returns the next item from the input.
+func (l *lexer) nextItem() item {
+ for {
+ select {
+ case item := <-l.items:
+ return item
+ default:
+ l.state = l.state(l)
+ }
}
- close(l.items)
+ panic("not reached")
}
-// lex launches a new scanner and returns the channel of items.
-func lex(name, input string) (*lexer, chan item) {
+// lex creates a new scanner for the input string.
+func lex(name, input string) *lexer {
l := &lexer{
name: name,
input: input,
- items: make(chan item),
+ state: lexText,
+ items: make(chan item, 2), // Two items of buffering is sufficient for all state functions
}
- go l.run()
- return l, l.items
+ return l
}
// state functions
-const leftMeta = "{{"
-const rightMeta = "}}"
+const (
+ leftDelim = "{{"
+ rightDelim = "}}"
+ leftComment = "{{/*"
+ rightComment = "*/}}"
+)
-// lexText scans until a metacharacter
+// lexText scans until an opening action delimiter, "{{".
func lexText(l *lexer) stateFn {
for {
- if strings.HasPrefix(l.input[l.pos:], leftMeta) {
+ if strings.HasPrefix(l.input[l.pos:], leftDelim) {
if l.pos > l.start {
l.emit(itemText)
}
- return lexLeftMeta
+ return lexLeftDelim
}
if l.next() == eof {
break
@@ -211,28 +237,42 @@ func lexText(l *lexer) stateFn {
return nil
}
-// leftMeta scans the left "metacharacter", which is known to be present.
-func lexLeftMeta(l *lexer) stateFn {
- l.pos += len(leftMeta)
- l.emit(itemLeftMeta)
+// lexLeftDelim scans the left delimiter, which is known to be present.
+func lexLeftDelim(l *lexer) stateFn {
+ if strings.HasPrefix(l.input[l.pos:], leftComment) {
+ return lexComment
+ }
+ l.pos += len(leftDelim)
+ l.emit(itemLeftDelim)
return lexInsideAction
}
-// rightMeta scans the right "metacharacter", which is known to be present.
-func lexRightMeta(l *lexer) stateFn {
- l.pos += len(rightMeta)
- l.emit(itemRightMeta)
+// lexComment scans a comment. The left comment marker is known to be present.
+func lexComment(l *lexer) stateFn {
+ i := strings.Index(l.input[l.pos:], rightComment)
+ if i < 0 {
+ return l.errorf("unclosed comment")
+ }
+ l.pos += i + len(rightComment)
+ l.ignore()
+ return lexText
+}
+
+// lexRightDelim scans the right delimiter, which is known to be present.
+func lexRightDelim(l *lexer) stateFn {
+ l.pos += len(rightDelim)
+ l.emit(itemRightDelim)
return lexText
}
-// lexInsideAction scans the elements inside "metacharacters".
+// lexInsideAction scans the elements inside action delimiters.
func lexInsideAction(l *lexer) stateFn {
// Either number, quoted string, or identifier.
// Spaces separate and are ignored.
// Pipe symbols separate and are emitted.
for {
- if strings.HasPrefix(l.input[l.pos:], rightMeta) {
- return lexRightMeta
+ if strings.HasPrefix(l.input[l.pos:], rightDelim) {
+ return lexRightDelim
}
switch r := l.next(); {
case r == eof || r == '\n':
@@ -273,15 +313,19 @@ Loop:
for {
switch r := l.next(); {
case isAlphaNumeric(r):
- // absorb
+ // absorb.
+ case r == '.' && l.input[l.start] == '.':
+ // field chaining; absorb into one token.
default:
l.backup()
word := l.input[l.start:l.pos]
switch {
- case key[word] != itemError:
+ case key[word] > itemKeyword:
l.emit(key[word])
case word[0] == '.':
l.emit(itemField)
+ case word == "true", word == "false":
+ l.emit(itemBool)
default:
l.emit(itemIdentifier)
}
@@ -295,8 +339,23 @@ Loop:
// isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
// and "089" - but when it's wrong the input is invalid and the parser (via
// strconv) will notice.
-// TODO: without expressions you can do imaginary but not complex.
func lexNumber(l *lexer) stateFn {
+ if !l.scanNumber() {
+ return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
+ }
+ if sign := l.peek(); sign == '+' || sign == '-' {
+ // Complex: 1+2i. No spaces, must end in 'i'.
+ if !l.scanNumber() || l.input[l.pos-1] != 'i' {
+ return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
+ }
+ l.emit(itemComplex)
+ } else {
+ l.emit(itemNumber)
+ }
+ return lexInsideAction
+}
+
+func (l *lexer) scanNumber() bool {
// Optional leading sign.
l.accept("+-")
// Is it hex?
@@ -317,10 +376,9 @@ func lexNumber(l *lexer) stateFn {
// Next thing mustn't be alphanumeric.
if isAlphaNumeric(l.peek()) {
l.next()
- return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
+ return false
}
- l.emit(itemNumber)
- return lexInsideAction
+ return true
}
// lexQuote scans a quoted string.
diff --git a/src/pkg/exp/template/lex_test.go b/src/pkg/exp/template/lex_test.go
index 184e833ef..256ec04d8 100644
--- a/src/pkg/exp/template/lex_test.go
+++ b/src/pkg/exp/template/lex_test.go
@@ -17,8 +17,8 @@ type lexTest struct {
var (
tEOF = item{itemEOF, ""}
- tLeft = item{itemLeftMeta, "{{"}
- tRight = item{itemRightMeta, "}}"}
+ tLeft = item{itemLeftDelim, "{{"}
+ tRight = item{itemRightDelim, "}}"}
tRange = item{itemRange, "range"}
tPipe = item{itemPipe, "|"}
tFor = item{itemIdentifier, "for"}
@@ -31,11 +31,16 @@ var lexTests = []lexTest{
{"empty", "", []item{tEOF}},
{"spaces", " \t\n", []item{{itemText, " \t\n"}, tEOF}},
{"text", `now is the time`, []item{{itemText, "now is the time"}, tEOF}},
+ {"text with comment", "hello-{{/* this is a comment */}}-world", []item{
+ {itemText, "hello-"},
+ {itemText, "-world"},
+ tEOF,
+ }},
{"empty action", `{{}}`, []item{tLeft, tRight, tEOF}},
{"for", `{{for }}`, []item{tLeft, tFor, tRight, tEOF}},
{"quote", `{{"abc \n\t\" "}}`, []item{tLeft, tQuote, tRight, tEOF}},
{"raw quote", "{{" + raw + "}}", []item{tLeft, tRawQuote, tRight, tEOF}},
- {"numbers", "{{1 02 0x14 -7.2i 1e3 +1.2e-4}}", []item{
+ {"numbers", "{{1 02 0x14 -7.2i 1e3 +1.2e-4 4.2i 1+2i}}", []item{
tLeft,
{itemNumber, "1"},
{itemNumber, "02"},
@@ -43,25 +48,40 @@ var lexTests = []lexTest{
{itemNumber, "-7.2i"},
{itemNumber, "1e3"},
{itemNumber, "+1.2e-4"},
+ {itemNumber, "4.2i"},
+ {itemComplex, "1+2i"},
tRight,
tEOF,
}},
- {"dots", "{{.x . .2 .x.y }}", []item{
+ {"bools", "{{true false}}", []item{
+ tLeft,
+ {itemBool, "true"},
+ {itemBool, "false"},
+ tRight,
+ tEOF,
+ }},
+ {"dot", "{{.}}", []item{
+ tLeft,
+ {itemDot, "."},
+ tRight,
+ tEOF,
+ }},
+ {"dots", "{{.x . .2 .x.y}}", []item{
tLeft,
{itemField, ".x"},
{itemDot, "."},
{itemNumber, ".2"},
- {itemField, ".x"},
- {itemField, ".y"},
+ {itemField, ".x.y"},
tRight,
tEOF,
}},
- {"keywords", "{{range if else end}}", []item{
+ {"keywords", "{{range if else end with}}", []item{
tLeft,
{itemRange, "range"},
{itemIf, "if"},
{itemElse, "else"},
{itemEnd, "end"},
+ {itemWith, "with"},
tRight,
tEOF,
}},
@@ -112,9 +132,13 @@ var lexTests = []lexTest{
// collect gathers the emitted items into a slice.
func collect(t *lexTest) (items []item) {
- _, tokens := lex(t.name, t.input)
- for i := range tokens {
- items = append(items, i)
+ l := lex(t.name, t.input)
+ for {
+ item := l.nextItem()
+ items = append(items, item)
+ if item.typ == itemEOF || item.typ == itemError {
+ break
+ }
}
return
}
diff --git a/src/pkg/exp/template/parse.go b/src/pkg/exp/template/parse.go
index 57ddb0084..8b2d60207 100644
--- a/src/pkg/exp/template/parse.go
+++ b/src/pkg/exp/template/parse.go
@@ -8,17 +8,21 @@ import (
"bytes"
"fmt"
"os"
+ "reflect"
"runtime"
"strconv"
+ "strings"
+ "unicode"
)
// Template is the representation of a parsed template.
type Template struct {
- // TODO: At the moment, these are all internal to parsing.
- name string
- root *listNode
+ name string
+ root *listNode
+ funcs map[string]reflect.Value
+ // Parsing only; cleared after parse.
+ set *Set
lex *lexer
- tokens chan item
token item // token lookahead for parser
havePeek bool
}
@@ -28,7 +32,7 @@ func (t *Template) next() item {
if t.havePeek {
t.havePeek = false
} else {
- t.token = <-t.tokens
+ t.token = t.lex.nextItem()
}
return t.token
}
@@ -43,7 +47,7 @@ func (t *Template) peek() item {
if t.havePeek {
return t.token
}
- t.token = <-t.tokens
+ t.token = t.lex.nextItem()
t.havePeek = true
return t.token
}
@@ -64,14 +68,18 @@ const (
nodeText nodeType = iota
nodeAction
nodeCommand
+ nodeDot
nodeElse
nodeEnd
nodeField
nodeIdentifier
+ nodeIf
nodeList
nodeNumber
nodeRange
nodeString
+ nodeTemplate
+ nodeWith
)
// Nodes.
@@ -103,25 +111,26 @@ func (l *listNode) String() string {
// textNode holds plain text.
type textNode struct {
nodeType
- text string
+ text []byte
}
func newText(text string) *textNode {
- return &textNode{nodeType: nodeText, text: text}
+ return &textNode{nodeType: nodeText, text: []byte(text)}
}
func (t *textNode) String() string {
return fmt.Sprintf("(text: %q)", t.text)
}
-// actionNode holds an action (something bounded by metacharacters).
+// actionNode holds an action (something bounded by delimiters).
type actionNode struct {
nodeType
+ line int
pipeline []*commandNode
}
-func newAction() *actionNode {
- return &actionNode{nodeType: nodeAction}
+func newAction(line int, pipeline []*commandNode) *actionNode {
+ return &actionNode{nodeType: nodeAction, line: line, pipeline: pipeline}
}
func (a *actionNode) append(command *commandNode) {
@@ -164,45 +173,88 @@ func (i *identifierNode) String() string {
return fmt.Sprintf("I=%s", i.ident)
}
-// fieldNode holds a field (identifier starting with '.'). The period is dropped from the ident.
+// dotNode holds the special identifier '.'. It is represented by a nil pointer.
+type dotNode bool
+
+func newDot() *dotNode {
+ return nil
+}
+
+func (d *dotNode) typ() nodeType {
+ return nodeDot
+}
+
+func (d *dotNode) String() string {
+ return "{{<.>}}"
+}
+
+// fieldNode holds a field (identifier starting with '.').
+// The names may be chained ('.x.y').
+// The period is dropped from each ident.
type fieldNode struct {
nodeType
- ident string
+ ident []string
}
func newField(ident string) *fieldNode {
- return &fieldNode{nodeType: nodeField, ident: ident[1:]} //drop period
+ return &fieldNode{nodeType: nodeField, ident: strings.Split(ident[1:], ".")} // [1:] to drop leading period
}
func (f *fieldNode) String() string {
- return fmt.Sprintf("F=.%s", f.ident)
+ return fmt.Sprintf("F=%s", f.ident)
+}
+
+// boolNode holds a boolean constant.
+type boolNode struct {
+ nodeType
+ true bool
+}
+
+func newBool(true bool) *boolNode {
+ return &boolNode{nodeType: nodeString, true: true}
+}
+
+func (b *boolNode) String() string {
+ if b.true {
+ return fmt.Sprintf("B=true")
+ }
+ return fmt.Sprintf("B=false")
}
-// numberNode holds a number, signed or unsigned, integer, floating, or imaginary.
+// numberNode holds a number, signed or unsigned integer, floating, or complex.
// The value is parsed and stored under all the types that can represent the value.
// This simulates in a small amount of code the behavior of Go's ideal constants.
-// TODO: booleans, complex numbers.
type numberNode struct {
nodeType
- isInt bool // number has an integral value
- isUint bool // number has an unsigned integral value
- isFloat bool // number has a floating-point value
- imaginary bool // number is imaginary
- int64 // the signed integer value
- uint64 // the unsigned integer value
- float64 // the positive floating-point value
- text string
-}
-
-func newNumber(text string) (*numberNode, os.Error) {
+ isInt bool // number has an integral value
+ isUint bool // number has an unsigned integral value
+ isFloat bool // number has a floating-point value
+ isComplex bool // number is complex
+ int64 // the signed integer value
+ uint64 // the unsigned integer value
+ float64 // the floating-point value
+ complex128 // the complex value
+ text string
+}
+
+func newNumber(text string, isComplex bool) (*numberNode, os.Error) {
n := &numberNode{nodeType: nodeNumber, text: text}
- // Imaginary constants can only be floating-point.
+ if isComplex {
+ // fmt.Sscan can parse the pair, so let it do the work.
+ if _, err := fmt.Sscan(text, &n.complex128); err != nil {
+ return nil, err
+ }
+ n.isComplex = true
+ n.simplifyComplex()
+ return n, nil
+ }
+ // Imaginary constants can only be complex unless they are zero.
if len(text) > 0 && text[len(text)-1] == 'i' {
f, err := strconv.Atof64(text[:len(text)-1])
if err == nil {
- n.imaginary = true
- n.isFloat = true
- n.float64 = f
+ n.isComplex = true
+ n.complex128 = complex(0, f)
+ n.simplifyComplex()
return n, nil
}
}
@@ -250,6 +302,23 @@ func newNumber(text string) (*numberNode, os.Error) {
return n, nil
}
+// simplifyComplex pulls out any other types that are represented by the complex number.
+// These all require that the imaginary part be zero.
+func (n *numberNode) simplifyComplex() {
+ n.isFloat = imag(n.complex128) == 0
+ if n.isFloat {
+ n.float64 = real(n.complex128)
+ n.isInt = float64(int64(n.float64)) == n.float64
+ if n.isInt {
+ n.int64 = int64(n.float64)
+ }
+ n.isUint = float64(uint64(n.float64)) == n.float64
+ if n.isUint {
+ n.uint64 = uint64(n.float64)
+ }
+ }
+}
+
func (n *numberNode) String() string {
return fmt.Sprintf("N=%s", n.text)
}
@@ -283,11 +352,14 @@ func (e *endNode) String() string {
return "{{end}}"
}
-// elseNode represents an {{else}} action. It is represented by a nil pointer.
-type elseNode bool
+// elseNode represents an {{else}} action.
+type elseNode struct {
+ nodeType
+ line int
+}
-func newElse() *elseNode {
- return nil
+func newElse(line int) *elseNode {
+ return &elseNode{nodeType: nodeElse, line: line}
}
func (e *elseNode) typ() nodeType {
@@ -297,37 +369,106 @@ func (e *elseNode) typ() nodeType {
func (e *elseNode) String() string {
return "{{else}}"
}
+// ifNode represents an {{if}} action and its commands.
+// TODO: what should evaluation look like? is a pipeline enough?
+type ifNode struct {
+ nodeType
+ line int
+ pipeline []*commandNode
+ list *listNode
+ elseList *listNode
+}
+
+func newIf(line int, pipeline []*commandNode, list, elseList *listNode) *ifNode {
+ return &ifNode{nodeType: nodeIf, line: line, pipeline: pipeline, list: list, elseList: elseList}
+}
+
+func (i *ifNode) String() string {
+ if i.elseList != nil {
+ return fmt.Sprintf("({{if %s}} %s {{else}} %s)", i.pipeline, i.list, i.elseList)
+ }
+ return fmt.Sprintf("({{if %s}} %s)", i.pipeline, i.list)
+}
-// rangeNode represents an {{range}} action and its commands.
+// rangeNode represents a {{range}} action and its commands.
type rangeNode struct {
nodeType
- field node
+ line int
+ pipeline []*commandNode
list *listNode
elseList *listNode
}
-func newRange(field node, list *listNode) *rangeNode {
- return &rangeNode{nodeType: nodeRange, field: field, list: list}
+func newRange(line int, pipeline []*commandNode, list, elseList *listNode) *rangeNode {
+ return &rangeNode{nodeType: nodeRange, line: line, pipeline: pipeline, list: list, elseList: elseList}
}
func (r *rangeNode) String() string {
if r.elseList != nil {
- return fmt.Sprintf("({{range %s}} %s {{else}} %s)", r.field, r.list, r.elseList)
+ return fmt.Sprintf("({{range %s}} %s {{else}} %s)", r.pipeline, r.list, r.elseList)
}
- return fmt.Sprintf("({{range %s}} %s)", r.field, r.list)
+ return fmt.Sprintf("({{range %s}} %s)", r.pipeline, r.list)
+}
+
+// templateNode represents a {{template}} action.
+type templateNode struct {
+ nodeType
+ line int
+ name node
+ pipeline []*commandNode
}
+func newTemplate(line int, name node, pipeline []*commandNode) *templateNode {
+ return &templateNode{nodeType: nodeTemplate, line: line, name: name, pipeline: pipeline}
+}
+
+func (t *templateNode) String() string {
+ return fmt.Sprintf("{{template %s %s}}", t.name, t.pipeline)
+}
+
+// withNode represents a {{with}} action and its commands.
+type withNode struct {
+ nodeType
+ line int
+ pipeline []*commandNode
+ list *listNode
+ elseList *listNode
+}
+
+func newWith(line int, pipeline []*commandNode, list, elseList *listNode) *withNode {
+ return &withNode{nodeType: nodeWith, line: line, pipeline: pipeline, list: list, elseList: elseList}
+}
+
+func (w *withNode) String() string {
+ if w.elseList != nil {
+ return fmt.Sprintf("({{with %s}} %s {{else}} %s)", w.pipeline, w.list, w.elseList)
+ }
+ return fmt.Sprintf("({{with %s}} %s)", w.pipeline, w.list)
+}
+
+
// Parsing.
// New allocates a new template with the given name.
func New(name string) *Template {
return &Template{
- name: name,
+ name: name,
+ funcs: make(map[string]reflect.Value),
}
}
+// Funcs adds to the template's function map the elements of the
+// argument map. It panics if a value in the map is not a function
+// with appropriate return type.
+// The return value is the template, so calls can be chained.
+func (t *Template) Funcs(funcMap FuncMap) *Template {
+ addFuncs(t.funcs, funcMap)
+ return t
+}
+
// errorf formats the error and terminates processing.
func (t *Template) errorf(format string, args ...interface{}) {
+ t.root = nil
format = fmt.Sprintf("template: %s:%d: %s", t.name, t.lex.lineNumber(), format)
panic(fmt.Errorf(format, args...))
}
@@ -351,25 +492,80 @@ func (t *Template) unexpected(token item, context string) {
t.errorf("unexpected %s in %s", token, context)
}
-// Parse parses the template definition string and constructs an efficient representation of the template.
-func (t *Template) Parse(s string) (err os.Error) {
- t.lex, t.tokens = lex(t.name, s)
- defer func() {
- e := recover()
- if e != nil {
- if _, ok := e.(runtime.Error); ok {
- panic(e)
+// recover is the handler that turns panics into returns from the top
+// level of Parse or Execute.
+func (t *Template) recover(errp *os.Error) {
+ e := recover()
+ if e != nil {
+ if _, ok := e.(runtime.Error); ok {
+ panic(e)
+ }
+ t.stopParse()
+ *errp = e.(os.Error)
+ }
+ return
+}
+
+// startParse starts the template parsing from the lexer.
+func (t *Template) startParse(set *Set, lex *lexer) {
+ t.root = nil
+ t.set = set
+ t.lex = lex
+}
+
+// stopParse terminates parsing.
+func (t *Template) stopParse() {
+ t.set, t.lex = nil, nil
+}
+
+// atEOF returns true if, possibly after spaces, we're at EOF.
+func (t *Template) atEOF() bool {
+ for {
+ token := t.peek()
+ switch token.typ {
+ case itemEOF:
+ return true
+ case itemText:
+ for _, r := range token.val {
+ if !unicode.IsSpace(r) {
+ return false
+ }
}
- err = e.(os.Error)
+ t.next() // skip spaces.
+ continue
}
- return
- }()
- var next node
+ break
+ }
+ return false
+}
+
+// Parse parses the template definition string to construct an internal representation
+// of the template for execution.
+func (t *Template) Parse(s string) (err os.Error) {
+ t.startParse(nil, lex(t.name, s))
+ defer t.recover(&err)
+ t.parse(true)
+ t.stopParse()
+ return
+}
+
+// ParseInSet parses the template definition string to construct an internal representation
+// of the template for execution. Function bindings are checked against those in the set.
+func (t *Template) ParseInSet(s string, set *Set) (err os.Error) {
+ t.startParse(set, lex(t.name, s))
+ defer t.recover(&err)
+ t.parse(true)
+ t.stopParse()
+ return
+}
+
+// parse is the helper for Parse. It triggers an error if we expect EOF but don't reach it.
+func (t *Template) parse(toEOF bool) (next node) {
t.root, next = t.itemList(true)
- if next != nil {
+ if toEOF && next != nil {
t.errorf("unexpected %s", next)
}
- return nil
+ return next
}
// itemList:
@@ -398,7 +594,7 @@ func (t *Template) textOrAction() node {
switch token := t.next(); token.typ {
case itemText:
return newText(token.val)
- case itemLeftMeta:
+ case itemLeftDelim:
return t.action()
default:
t.unexpected(token, "input")
@@ -409,63 +605,95 @@ func (t *Template) textOrAction() node {
// Action:
// control
// command ("|" command)*
-// Left meta is past. Now get actions.
+// Left delim is past. Now get actions.
+// First word could be a keyword such as range.
func (t *Template) action() (n node) {
- action := newAction()
switch token := t.next(); token.typ {
- case itemRange:
- return t.rangeControl()
case itemElse:
return t.elseControl()
case itemEnd:
return t.endControl()
+ case itemIf:
+ return t.ifControl()
+ case itemRange:
+ return t.rangeControl()
+ case itemTemplate:
+ return t.templateControl()
+ case itemWith:
+ return t.withControl()
}
t.backup()
-Loop:
+ return newAction(t.lex.lineNumber(), t.pipeline("command"))
+}
+
+// Pipeline:
+// field or command
+// pipeline "|" pipeline
+func (t *Template) pipeline(context string) (pipe []*commandNode) {
for {
switch token := t.next(); token.typ {
- case itemRightMeta:
- break Loop
- case itemIdentifier, itemField:
- t.backup()
- cmd, err := t.command()
- if err != nil {
- t.error(err)
+ case itemRightDelim:
+ if len(pipe) == 0 {
+ t.errorf("missing value for %s", context)
}
- action.append(cmd)
+ return
+ case itemBool, itemComplex, itemDot, itemField, itemIdentifier, itemNumber, itemRawString, itemString:
+ t.backup()
+ pipe = append(pipe, t.command())
default:
- t.unexpected(token, "command")
+ t.unexpected(token, context)
}
}
- return action
+ return
}
-// Range:
-// {{range field}} itemList {{end}}
-// {{range field}} itemList {{else}} itemList {{end}}
-// Range keyword is past.
-func (t *Template) rangeControl() node {
- field := t.expect(itemField, "range")
- t.expect(itemRightMeta, "range")
- list, next := t.itemList(false)
- r := newRange(newField(field.val), list)
+func (t *Template) parseControl(context string) (lineNum int, pipe []*commandNode, list, elseList *listNode) {
+ lineNum = t.lex.lineNumber()
+ pipe = t.pipeline(context)
+ var next node
+ list, next = t.itemList(false)
switch next.typ() {
case nodeEnd: //done
case nodeElse:
- elseList, next := t.itemList(false)
+ elseList, next = t.itemList(false)
if next.typ() != nodeEnd {
t.errorf("expected end; found %s", next)
}
- r.elseList = elseList
+ elseList = elseList
}
- return r
+ return lineNum, pipe, list, elseList
+}
+
+// If:
+// {{if pipeline}} itemList {{end}}
+// {{if pipeline}} itemList {{else}} itemList {{end}}
+// If keyword is past.
+func (t *Template) ifControl() node {
+ return newIf(t.parseControl("if"))
+}
+
+// Range:
+// {{range pipeline}} itemList {{end}}
+// {{range pipeline}} itemList {{else}} itemList {{end}}
+// Range keyword is past.
+func (t *Template) rangeControl() node {
+ return newRange(t.parseControl("range"))
}
+// With:
+// {{with pipeline}} itemList {{end}}
+// {{with pipeline}} itemList {{else}} itemList {{end}}
+// If keyword is past.
+func (t *Template) withControl() node {
+ return newWith(t.parseControl("with"))
+}
+
+
// End:
// {{end}}
// End keyword is past.
func (t *Template) endControl() node {
- t.expect(itemRightMeta, "end")
+ t.expect(itemRightDelim, "end")
return newEnd()
}
@@ -473,50 +701,83 @@ func (t *Template) endControl() node {
// {{else}}
// Else keyword is past.
func (t *Template) elseControl() node {
- t.expect(itemRightMeta, "else")
- return newElse()
+ t.expect(itemRightDelim, "else")
+ return newElse(t.lex.lineNumber())
+}
+
+// Template:
+// {{template stringValue pipeline}}
+// Template keyword is past. The name must be something that can evaluate
+// to a string.
+func (t *Template) templateControl() node {
+ var name node
+ switch token := t.next(); token.typ {
+ case itemIdentifier:
+ if _, ok := findFunction(token.val, t, t.set); !ok {
+ t.errorf("function %q not defined", token.val)
+ }
+ name = newIdentifier(token.val)
+ case itemDot:
+ name = newDot()
+ case itemField:
+ name = newField(token.val)
+ case itemString, itemRawString:
+ s, err := strconv.Unquote(token.val)
+ if err != nil {
+ t.error(err)
+ }
+ name = newString(s)
+ default:
+ t.unexpected(token, "template invocation")
+ }
+ pipeline := t.pipeline("template")
+ return newTemplate(t.lex.lineNumber(), name, pipeline)
}
// command:
-// space-separated arguments up to a pipeline character or right metacharacter.
-// we consume the pipe character but leave the right meta to terminate the action.
-func (t *Template) command() (*commandNode, os.Error) {
+// space-separated arguments up to a pipeline character or right delimiter.
+// we consume the pipe character but leave the right delim to terminate the action.
+func (t *Template) command() *commandNode {
cmd := newCommand()
Loop:
for {
switch token := t.next(); token.typ {
- case itemRightMeta:
+ case itemRightDelim:
t.backup()
break Loop
case itemPipe:
break Loop
case itemError:
- return nil, os.NewError(token.val)
+ t.errorf("%s", token.val)
case itemIdentifier:
+ if _, ok := findFunction(token.val, t, t.set); !ok {
+ t.errorf("function %q not defined", token.val)
+ }
cmd.append(newIdentifier(token.val))
+ case itemDot:
+ cmd.append(newDot())
case itemField:
cmd.append(newField(token.val))
- case itemNumber:
- if len(cmd.args) == 0 {
- t.errorf("command cannot be %q", token.val)
- }
- number, err := newNumber(token.val)
+ case itemBool:
+ cmd.append(newBool(token.val == "true"))
+ case itemComplex, itemNumber:
+ number, err := newNumber(token.val, token.typ == itemComplex)
if err != nil {
t.error(err)
}
cmd.append(number)
case itemString, itemRawString:
- if len(cmd.args) == 0 {
- t.errorf("command cannot be %q", token.val)
- }
s, err := strconv.Unquote(token.val)
if err != nil {
- return nil, err
+ t.error(err)
}
cmd.append(newString(s))
default:
t.unexpected(token, "command")
}
}
- return cmd, nil
+ if len(cmd.args) == 0 {
+ t.errorf("empty command")
+ }
+ return cmd
}
diff --git a/src/pkg/exp/template/parse_test.go b/src/pkg/exp/template/parse_test.go
index f89eaa6ce..71580f8b6 100644
--- a/src/pkg/exp/template/parse_test.go
+++ b/src/pkg/exp/template/parse_test.go
@@ -5,52 +5,65 @@
package template
import (
+ "flag"
"fmt"
"testing"
)
-const dumpErrors = true
+var debug = flag.Bool("debug", false, "show the errors produced by the tests")
type numberTest struct {
text string
isInt bool
isUint bool
isFloat bool
- imaginary bool
+ isComplex bool
int64
uint64
float64
+ complex128
}
var numberTests = []numberTest{
// basics
- {"0", true, true, true, false, 0, 0, 0},
- {"-0", true, true, true, false, 0, 0, 0}, // check that -0 is a uint.
- {"73", true, true, true, false, 73, 73, 73},
- {"-73", true, false, true, false, -73, 0, -73},
- {"+73", true, false, true, false, 73, 0, 73},
- {"100", true, true, true, false, 100, 100, 100},
- {"1e9", true, true, true, false, 1e9, 1e9, 1e9},
- {"-1e9", true, false, true, false, -1e9, 0, -1e9},
- {"-1.2", false, false, true, false, 0, 0, -1.2},
- {"1e19", false, true, true, false, 0, 1e19, 1e19},
- {"-1e19", false, false, true, false, 0, 0, -1e19},
- {"4i", false, false, true, true, 0, 0, 4},
+ {"0", true, true, true, false, 0, 0, 0, 0},
+ {"-0", true, true, true, false, 0, 0, 0, 0}, // check that -0 is a uint.
+ {"73", true, true, true, false, 73, 73, 73, 0},
+ {"-73", true, false, true, false, -73, 0, -73, 0},
+ {"+73", true, false, true, false, 73, 0, 73, 0},
+ {"100", true, true, true, false, 100, 100, 100, 0},
+ {"1e9", true, true, true, false, 1e9, 1e9, 1e9, 0},
+ {"-1e9", true, false, true, false, -1e9, 0, -1e9, 0},
+ {"-1.2", false, false, true, false, 0, 0, -1.2, 0},
+ {"1e19", false, true, true, false, 0, 1e19, 1e19, 0},
+ {"-1e19", false, false, true, false, 0, 0, -1e19, 0},
+ {"4i", false, false, false, true, 0, 0, 0, 4i},
+ {"-1.2+4.2i", false, false, false, true, 0, 0, 0, -1.2 + 4.2i},
+ // complex with 0 imaginary are float (and maybe integer)
+ {"0i", true, true, true, true, 0, 0, 0, 0},
+ {"-1.2+0i", false, false, true, true, 0, 0, -1.2, -1.2},
+ {"-12+0i", true, false, true, true, -12, 0, -12, -12},
+ {"13+0i", true, true, true, true, 13, 13, 13, 13},
// funny bases
- {"0123", true, true, true, false, 0123, 0123, 0123},
- {"-0x0", true, true, true, false, 0, 0, 0},
- {"0xdeadbeef", true, true, true, false, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef},
+ {"0123", true, true, true, false, 0123, 0123, 0123, 0},
+ {"-0x0", true, true, true, false, 0, 0, 0, 0},
+ {"0xdeadbeef", true, true, true, false, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0},
// some broken syntax
{text: "+-2"},
{text: "0x123."},
{text: "1e."},
{text: "0xi."},
+ {text: "1+2."},
}
func TestNumberParse(t *testing.T) {
for _, test := range numberTests {
- n, err := newNumber(test.text)
- ok := test.isInt || test.isUint || test.isFloat
+ // If fmt.Sscan thinks it's complex, it's complex. We can't trust the output
+ // because imaginary comes out as a number.
+ var c complex128
+ _, err := fmt.Sscan(test.text, &c)
+ n, err := newNumber(test.text, err == nil)
+ ok := test.isInt || test.isUint || test.isFloat || test.isComplex
if ok && err != nil {
t.Errorf("unexpected error for %q", test.text)
continue
@@ -62,8 +75,8 @@ func TestNumberParse(t *testing.T) {
if !ok {
continue
}
- if n.imaginary != test.imaginary {
- t.Errorf("imaginary incorrect for %q; should be %t", test.text, test.imaginary)
+ if n.isComplex != test.isComplex {
+ t.Errorf("complex incorrect for %q; should be %t", test.text, test.isComplex)
}
if test.isInt {
if !n.isInt {
@@ -95,17 +108,19 @@ func TestNumberParse(t *testing.T) {
} else if n.isFloat {
t.Errorf("did not expect float for %q", test.text)
}
+ if test.isComplex {
+ if !n.isComplex {
+ t.Errorf("expected complex for %q", test.text)
+ }
+ if n.complex128 != test.complex128 {
+ t.Errorf("complex128 for %q should be %g is %g", test.text, test.complex128, n.complex128)
+ }
+ } else if n.isComplex {
+ t.Errorf("did not expect complex for %q", test.text)
+ }
}
}
-func num(s string) *numberNode {
- n, err := newNumber(s)
- if err != nil {
- panic(err)
- }
- return n
-}
-
type parseTest struct {
name string
input string
@@ -125,29 +140,45 @@ var parseTests = []parseTest{
`[(text: " \t\n")]`},
{"text", "some text", noError,
`[(text: "some text")]`},
- {"emptyMeta", "{{}}", noError,
+ {"emptyAction", "{{}}", hasError,
`[(action: [])]`},
- {"simple command", "{{hello}}", noError,
- `[(action: [(command: [I=hello])])]`},
- {"multi-word command", "{{hello world}}", noError,
- `[(action: [(command: [I=hello I=world])])]`},
- {"multi-word command with number", "{{hello 80}}", noError,
- `[(action: [(command: [I=hello N=80])])]`},
- {"multi-word command with string", "{{hello `quoted text`}}", noError,
- "[(action: [(command: [I=hello S=`quoted text`])])]"},
- {"pipeline", "{{hello|world}}", noError,
- `[(action: [(command: [I=hello]) (command: [I=world])])]`},
- {"simple range", "{{range .x}}hello{{end}}", noError,
- `[({{range F=.x}} [(text: "hello")])]`},
- {"nested range", "{{range .x}}hello{{range .y}}goodbye{{end}}{{end}}", noError,
- `[({{range F=.x}} [(text: "hello")({{range F=.y}} [(text: "goodbye")])])]`},
- {"range with else", "{{range .x}}true{{else}}false{{end}}", noError,
- `[({{range F=.x}} [(text: "true")] {{else}} [(text: "false")])]`},
+ {"field", "{{.X}}", noError,
+ `[(action: [(command: [F=[X]])])]`},
+ {"simple command", "{{printf}}", noError,
+ `[(action: [(command: [I=printf])])]`},
+ {"multi-word command", "{{printf `%d` 23}}", noError,
+ "[(action: [(command: [I=printf S=`%d` N=23])])]"},
+ {"pipeline", "{{.X|.Y}}", noError,
+ `[(action: [(command: [F=[X]]) (command: [F=[Y]])])]`},
+ {"simple if", "{{if .X}}hello{{end}}", noError,
+ `[({{if [(command: [F=[X]])]}} [(text: "hello")])]`},
+ {"if with else", "{{if .X}}true{{else}}false{{end}}", noError,
+ `[({{if [(command: [F=[X]])]}} [(text: "true")] {{else}} [(text: "false")])]`},
+ {"simple range", "{{range .X}}hello{{end}}", noError,
+ `[({{range [(command: [F=[X]])]}} [(text: "hello")])]`},
+ {"chained field range", "{{range .X.Y.Z}}hello{{end}}", noError,
+ `[({{range [(command: [F=[X Y Z]])]}} [(text: "hello")])]`},
+ {"nested range", "{{range .X}}hello{{range .Y}}goodbye{{end}}{{end}}", noError,
+ `[({{range [(command: [F=[X]])]}} [(text: "hello")({{range [(command: [F=[Y]])]}} [(text: "goodbye")])])]`},
+ {"range with else", "{{range .X}}true{{else}}false{{end}}", noError,
+ `[({{range [(command: [F=[X]])]}} [(text: "true")] {{else}} [(text: "false")])]`},
+ {"range over pipeline", "{{range .X|.M}}true{{else}}false{{end}}", noError,
+ `[({{range [(command: [F=[X]]) (command: [F=[M]])]}} [(text: "true")] {{else}} [(text: "false")])]`},
+ {"range []int", "{{range .SI}}{{.}}{{end}}", noError,
+ `[({{range [(command: [F=[SI]])]}} [(action: [(command: [{{<.>}}])])])]`},
+ {"constants", "{{range .SI 1 -3.2i true false }}{{end}}", noError,
+ `[({{range [(command: [F=[SI] N=1 N=-3.2i B=true B=false])]}} [])]`},
+ {"template", "{{template `x` .Y}}", noError,
+ "[{{template S=`x` [(command: [F=[Y]])]}}]"},
+ {"with", "{{with .X}}hello{{end}}", noError,
+ `[({{with [(command: [F=[X]])]}} [(text: "hello")])]`},
+ {"with with else", "{{with .X}}hello{{else}}goodbye{{end}}", noError,
+ `[({{with [(command: [F=[X]])]}} [(text: "hello")] {{else}} [(text: "goodbye")])]`},
// Errors.
{"unclosed action", "hello{{range", hasError, ""},
- {"not a field", "hello{{range x}}{{end}}", hasError, ""},
{"missing end", "hello{{range .x}}", hasError, ""},
{"missing end after else", "hello{{range .x}}{{else}}", hasError, ""},
+ {"undefined function", "hello{{undefined}}", hasError, ""},
}
func TestParse(t *testing.T) {
@@ -163,7 +194,7 @@ func TestParse(t *testing.T) {
continue
case err != nil && !test.ok:
// expected error, got one
- if dumpErrors {
+ if *debug {
fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err)
}
continue
diff --git a/src/pkg/exp/template/set.go b/src/pkg/exp/template/set.go
new file mode 100644
index 000000000..492e270e1
--- /dev/null
+++ b/src/pkg/exp/template/set.go
@@ -0,0 +1,115 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+ "io"
+ "os"
+ "reflect"
+ "runtime"
+ "strconv"
+)
+
+// Set holds a set of related templates that can refer to one another by name.
+// A template may be a member of multiple sets.
+type Set struct {
+ tmpl map[string]*Template
+ funcs map[string]reflect.Value
+}
+
+// NewSet allocates a new, empty template set.
+func NewSet() *Set {
+ return &Set{
+ tmpl: make(map[string]*Template),
+ funcs: make(map[string]reflect.Value),
+ }
+}
+
+// Funcs adds to the set's function map the elements of the
+// argument map. It panics if a value in the map is not a function
+// with appropriate return type.
+// The return value is the set, so calls can be chained.
+func (s *Set) Funcs(funcMap FuncMap) *Set {
+ addFuncs(s.funcs, funcMap)
+ return s
+}
+
+// Add adds the argument templates to the set. It panics if the call
+// attempts to reuse a name defined in the template.
+// The return value is the set, so calls can be chained.
+func (s *Set) Add(templates ...*Template) *Set {
+ for _, t := range templates {
+ if _, ok := s.tmpl[t.name]; ok {
+ panic(fmt.Errorf("template: %q already defined in set", t.name))
+ }
+ s.tmpl[t.name] = t
+ }
+ return s
+}
+
+// Template returns the template with the given name in the set,
+// or nil if there is no such template.
+func (s *Set) Template(name string) *Template {
+ return s.tmpl[name]
+}
+
+// Execute looks for the named template in the set and then applies that
+// template to the specified data object, writing the output to wr. Nested
+// template invocations will be resolved from the set.
+func (s *Set) Execute(name string, wr io.Writer, data interface{}) os.Error {
+ tmpl := s.tmpl[name]
+ if tmpl == nil {
+ return fmt.Errorf("template: no template %q in set", name)
+ }
+ return tmpl.ExecuteInSet(wr, data, s)
+}
+
+// recover is the handler that turns panics into returns from the top
+// level of Parse.
+func (s *Set) recover(errp *os.Error) {
+ e := recover()
+ if e != nil {
+ if _, ok := e.(runtime.Error); ok {
+ panic(e)
+ }
+ s.tmpl = nil
+ *errp = e.(os.Error)
+ }
+ return
+}
+
+// Parse parses the file into a set of named templates.
+func (s *Set) Parse(text string) (err os.Error) {
+ defer s.recover(&err)
+ lex := lex("set", text)
+ const context = "define clause"
+ for {
+ t := New("set") // name will be updated once we know it.
+ t.startParse(s, lex)
+ // Expect EOF or "{{ define name }}".
+ if t.atEOF() {
+ return
+ }
+ t.expect(itemLeftDelim, context)
+ t.expect(itemDefine, context)
+ name := t.expect(itemString, context)
+ t.name, err = strconv.Unquote(name.val)
+ if err != nil {
+ t.error(err)
+ }
+ t.expect(itemRightDelim, context)
+ end := t.parse(false)
+ if end == nil {
+ t.errorf("unexpected EOF in %s", context)
+ }
+ if end.typ() != nodeEnd {
+ t.errorf("unexpected %s in %s", end, context)
+ }
+ t.stopParse()
+ s.tmpl[t.name] = t
+ }
+ return nil
+}
diff --git a/src/pkg/exp/template/set_test.go b/src/pkg/exp/template/set_test.go
new file mode 100644
index 000000000..c0115ec0a
--- /dev/null
+++ b/src/pkg/exp/template/set_test.go
@@ -0,0 +1,101 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "fmt"
+ "testing"
+)
+
+type setParseTest struct {
+ name string
+ input string
+ ok bool
+ names []string
+ results []string
+}
+
+var setParseTests = []setParseTest{
+ {"empty", "", noError,
+ nil,
+ nil},
+ {"one", `{{define "foo"}} FOO {{end}}`, noError,
+ []string{"foo"},
+ []string{`[(text: " FOO ")]`}},
+ {"two", `{{define "foo"}} FOO {{end}}{{define "bar"}} BAR {{end}}`, noError,
+ []string{"foo", "bar"},
+ []string{`[(text: " FOO ")]`, `[(text: " BAR ")]`}},
+ // errors
+ {"missing end", `{{define "foo"}} FOO `, hasError,
+ nil,
+ nil},
+ {"malformed name", `{{define "foo}} FOO `, hasError,
+ nil,
+ nil},
+}
+
+func TestSetParse(t *testing.T) {
+ for _, test := range setParseTests {
+ set := NewSet()
+ err := set.Parse(test.input)
+ switch {
+ case err == nil && !test.ok:
+ t.Errorf("%q: expected error; got none", test.name)
+ continue
+ case err != nil && test.ok:
+ t.Errorf("%q: unexpected error: %v", test.name, err)
+ continue
+ case err != nil && !test.ok:
+ // expected error, got one
+ if *debug {
+ fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err)
+ }
+ continue
+ }
+ if len(set.tmpl) != len(test.names) {
+ t.Errorf("%s: wrong number of templates; wanted %d got %d", test.name, len(test.names), len(set.tmpl))
+ continue
+ }
+ for i, name := range test.names {
+ tmpl, ok := set.tmpl[name]
+ if !ok {
+ t.Errorf("%s: can't find template %q", test.name, name)
+ continue
+ }
+ result := tmpl.root.String()
+ if result != test.results[i] {
+ t.Errorf("%s=(%q): got\n\t%v\nexpected\n\t%v", test.name, test.input, result, test.results[i])
+ }
+ }
+ }
+}
+
+
+var setExecTests = []execTest{
+ {"empty", "", "", nil, true},
+ {"text", "some text", "some text", nil, true},
+ {"invoke text", `{{template "text" .SI}}`, "TEXT", tVal, true},
+ {"invoke dot int", `{{template "dot" .I}}`, "17", tVal, true},
+ {"invoke dot []int", `{{template "dot" .SI}}`, "[3 4 5]", tVal, true},
+ {"invoke dotV", `{{template "dotV" .U}}`, "v", tVal, true},
+ {"invoke nested int", `{{template "nested" .I}}`, "17", tVal, true},
+}
+
+const setText = `
+ {{define "text"}}TEXT{{end}}
+ {{define "dotV"}}{{.V}}{{end}}
+ {{define "dot"}}{{.}}{{end}}
+ {{define "nested"}}{{template "dot" .}}{{end}}
+`
+
+func TestSetExecute(t *testing.T) {
+ // Declare a set with a couple of templates first.
+ set := NewSet()
+ err := set.Parse(setText)
+ if err != nil {
+ t.Fatalf("error parsing set: %s", err)
+ }
+ testExecute(setExecTests, set, t)
+}