diff options
Diffstat (limited to 'src/pkg/exp')
| -rw-r--r-- | src/pkg/exp/ogle/cmd.go | 2 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/Makefile | 3 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/compile.go | 264 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/parse.go | 754 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/parse_test.go | 310 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/prog.go | 182 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/prog_test.go | 91 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/regexp.go | 78 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/simplify.go | 151 | ||||
| -rw-r--r-- | src/pkg/exp/regexp/syntax/simplify_test.go | 151 | ||||
| -rw-r--r-- | src/pkg/exp/template/Makefile | 5 | ||||
| -rw-r--r-- | src/pkg/exp/template/exec.go | 508 | ||||
| -rw-r--r-- | src/pkg/exp/template/exec_test.go | 342 | ||||
| -rw-r--r-- | src/pkg/exp/template/funcs.go | 294 | ||||
| -rw-r--r-- | src/pkg/exp/template/lex.go | 180 | ||||
| -rw-r--r-- | src/pkg/exp/template/lex_test.go | 44 | ||||
| -rw-r--r-- | src/pkg/exp/template/parse.go | 467 | ||||
| -rw-r--r-- | src/pkg/exp/template/parse_test.go | 127 | ||||
| -rw-r--r-- | src/pkg/exp/template/set.go | 115 | ||||
| -rw-r--r-- | src/pkg/exp/template/set_test.go | 101 | 
20 files changed, 3739 insertions, 430 deletions
| diff --git a/src/pkg/exp/ogle/cmd.go b/src/pkg/exp/ogle/cmd.go index a8db523ea..ff0d24c69 100644 --- a/src/pkg/exp/ogle/cmd.go +++ b/src/pkg/exp/ogle/cmd.go @@ -154,7 +154,7 @@ func cmdLoad(args []byte) os.Error {  		}  		println("Attached to", pid)  	} else { -		parts := strings.Split(path, " ", -1) +		parts := strings.Split(path, " ")  		if len(parts) == 0 {  			fname = ""  		} else { diff --git a/src/pkg/exp/regexp/syntax/Makefile b/src/pkg/exp/regexp/syntax/Makefile index 8e0b4c1e6..97d4ad6ca 100644 --- a/src/pkg/exp/regexp/syntax/Makefile +++ b/src/pkg/exp/regexp/syntax/Makefile @@ -6,8 +6,11 @@ include ../../../../Make.inc  TARG=exp/regexp/syntax  GOFILES=\ +	compile.go\  	parse.go\  	perl_groups.go\ +	prog.go\  	regexp.go\ +	simplify.go\  include ../../../../Make.pkg diff --git a/src/pkg/exp/regexp/syntax/compile.go b/src/pkg/exp/regexp/syntax/compile.go new file mode 100644 index 000000000..ec9556fde --- /dev/null +++ b/src/pkg/exp/regexp/syntax/compile.go @@ -0,0 +1,264 @@ +package syntax + +import ( +	"os" +	"unicode" +) + +// A patchList is a list of instruction pointers that need to be filled in (patched). +// Because the pointers haven't been filled in yet, we can reuse their storage +// to hold the list.  It's kind of sleazy, but works well in practice. +// See http://swtch.com/~rsc/regexp/regexp1.html for inspiration. +//  +// These aren't really pointers: they're integers, so we can reinterpret them +// this way without using package unsafe.  A value l denotes +// p.inst[l>>1].Out (l&1==0) or .Arg (l&1==1).  +// l == 0 denotes the empty list, okay because we start every program +// with a fail instruction, so we'll never want to point at its output link. +type patchList uint32 + +func (l patchList) next(p *Prog) patchList { +	i := &p.Inst[l>>1] +	if l&1 == 0 { +		return patchList(i.Out) +	} +	return patchList(i.Arg) +} + +func (l patchList) patch(p *Prog, val uint32) { +	for l != 0 { +		i := &p.Inst[l>>1] +		if l&1 == 0 { +			l = patchList(i.Out) +			i.Out = val +		} else { +			l = patchList(i.Arg) +			i.Arg = val +		} +	} +} + +func (l1 patchList) append(p *Prog, l2 patchList) patchList { +	if l1 == 0 { +		return l2 +	} +	if l2 == 0 { +		return l1 +	} + +	last := l1 +	for { +		next := last.next(p) +		if next == 0 { +			break +		} +		last = next +	} + +	i := &p.Inst[last>>1] +	if last&1 == 0 { +		i.Out = uint32(l2) +	} else { +		i.Arg = uint32(l2) +	} +	return l1 +} + +// A frag represents a compiled program fragment. +type frag struct { +	i   uint32    // index of first instruction +	out patchList // where to record end instruction +} + +type compiler struct { +	p *Prog +} + +// Compile compiles the regexp into a program to be executed. +func Compile(re *Regexp) (*Prog, os.Error) { +	var c compiler +	c.init() +	f := c.compile(re) +	f.out.patch(c.p, c.inst(InstMatch).i) +	c.p.Start = int(f.i) +	return c.p, nil +} + +func (c *compiler) init() { +	c.p = new(Prog) +	c.inst(InstFail) +} + +var anyRuneNotNL = []int{0, '\n' - 1, '\n' - 1, unicode.MaxRune} +var anyRune = []int{0, unicode.MaxRune} + +func (c *compiler) compile(re *Regexp) frag { +	switch re.Op { +	case OpNoMatch: +		return c.fail() +	case OpEmptyMatch: +		return c.nop() +	case OpLiteral: +		if len(re.Rune) == 0 { +			return c.nop() +		} +		var f frag +		for j := range re.Rune { +			f1 := c.rune(re.Rune[j : j+1]) +			if j == 0 { +				f = f1 +			} else { +				f = c.cat(f, f1) +			} +		} +		return f +	case OpCharClass: +		return c.rune(re.Rune) +	case OpAnyCharNotNL: +		return c.rune(anyRuneNotNL) +	case OpAnyChar: +		return c.rune(anyRune) +	case OpBeginLine: +		return c.empty(EmptyBeginLine) +	case OpEndLine: +		return c.empty(EmptyEndLine) +	case OpBeginText: +		return c.empty(EmptyBeginText) +	case OpEndText: +		return c.empty(EmptyEndText) +	case OpWordBoundary: +		return c.empty(EmptyWordBoundary) +	case OpNoWordBoundary: +		return c.empty(EmptyNoWordBoundary) +	case OpCapture: +		bra := c.cap(uint32(re.Cap << 1)) +		sub := c.compile(re.Sub[0]) +		ket := c.cap(uint32(re.Cap<<1 | 1)) +		return c.cat(c.cat(bra, sub), ket) +	case OpStar: +		return c.star(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) +	case OpPlus: +		return c.plus(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) +	case OpQuest: +		return c.quest(c.compile(re.Sub[0]), re.Flags&NonGreedy != 0) +	case OpConcat: +		if len(re.Sub) == 0 { +			return c.nop() +		} +		var f frag +		for i, sub := range re.Sub { +			if i == 0 { +				f = c.compile(sub) +			} else { +				f = c.cat(f, c.compile(sub)) +			} +		} +		return f +	case OpAlternate: +		var f frag +		for _, sub := range re.Sub { +			f = c.alt(f, c.compile(sub)) +		} +		return f +	} +	panic("regexp: unhandled case in compile") +} + +func (c *compiler) inst(op InstOp) frag { +	// TODO: impose length limit +	f := frag{i: uint32(len(c.p.Inst))} +	c.p.Inst = append(c.p.Inst, Inst{Op: op}) +	return f +} + +func (c *compiler) nop() frag { +	f := c.inst(InstNop) +	f.out = patchList(f.i << 1) +	return f +} + +func (c *compiler) fail() frag { +	return frag{} +} + +func (c *compiler) cap(arg uint32) frag { +	f := c.inst(InstCapture) +	f.out = patchList(f.i << 1) +	c.p.Inst[f.i].Arg = arg +	return f +} + +func (c *compiler) cat(f1, f2 frag) frag { +	// concat of failure is failure +	if f1.i == 0 || f2.i == 0 { +		return frag{} +	} + +	// TODO: elide nop + +	f1.out.patch(c.p, f2.i) +	return frag{f1.i, f2.out} +} + +func (c *compiler) alt(f1, f2 frag) frag { +	// alt of failure is other +	if f1.i == 0 { +		return f2 +	} +	if f2.i == 0 { +		return f1 +	} + +	f := c.inst(InstAlt) +	i := &c.p.Inst[f.i] +	i.Out = f1.i +	i.Arg = f2.i +	f.out = f1.out.append(c.p, f2.out) +	return f +} + +func (c *compiler) quest(f1 frag, nongreedy bool) frag { +	f := c.inst(InstAlt) +	i := &c.p.Inst[f.i] +	if nongreedy { +		i.Arg = f1.i +		f.out = patchList(f.i << 1) +	} else { +		i.Out = f1.i +		f.out = patchList(f.i<<1 | 1) +	} +	f.out = f.out.append(c.p, f1.out) +	return f +} + +func (c *compiler) star(f1 frag, nongreedy bool) frag { +	f := c.inst(InstAlt) +	i := &c.p.Inst[f.i] +	if nongreedy { +		i.Arg = f1.i +		f.out = patchList(f.i << 1) +	} else { +		i.Out = f1.i +		f.out = patchList(f.i<<1 | 1) +	} +	f1.out.patch(c.p, f.i) +	return f +} + +func (c *compiler) plus(f1 frag, nongreedy bool) frag { +	return frag{f1.i, c.star(f1, nongreedy).out} +} + +func (c *compiler) empty(op EmptyOp) frag { +	f := c.inst(InstEmptyWidth) +	c.p.Inst[f.i].Arg = uint32(op) +	f.out = patchList(f.i << 1) +	return f +} + +func (c *compiler) rune(rune []int) frag { +	f := c.inst(InstRune) +	c.p.Inst[f.i].Rune = rune +	f.out = patchList(f.i << 1) +	return f +} diff --git a/src/pkg/exp/regexp/syntax/parse.go b/src/pkg/exp/regexp/syntax/parse.go index d04f25097..b6c91f7e1 100644 --- a/src/pkg/exp/regexp/syntax/parse.go +++ b/src/pkg/exp/regexp/syntax/parse.go @@ -79,28 +79,108 @@ const (  type parser struct {  	flags       Flags     // parse mode flags  	stack       []*Regexp // stack of parsed expressions -	numCap      int       // number of capturing groups seen +	free        *Regexp +	numCap      int // number of capturing groups seen  	wholeRegexp string +	tmpClass    []int // temporary char class work space +} + +func (p *parser) newRegexp(op Op) *Regexp { +	re := p.free +	if re != nil { +		p.free = re.Sub0[0] +		*re = Regexp{} +	} else { +		re = new(Regexp) +	} +	re.Op = op +	return re +} + +func (p *parser) reuse(re *Regexp) { +	re.Sub0[0] = p.free +	p.free = re  }  // Parse stack manipulation.  // push pushes the regexp re onto the parse stack and returns the regexp.  func (p *parser) push(re *Regexp) *Regexp { -	// TODO: automatic concatenation -	// TODO: turn character class into literal -	// TODO: compute simple +	if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] { +		// Single rune. +		if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) { +			return nil +		} +		re.Op = OpLiteral +		re.Rune = re.Rune[:1] +		re.Flags = p.flags &^ FoldCase +	} else if re.Op == OpCharClass && len(re.Rune) == 4 && +		re.Rune[0] == re.Rune[1] && re.Rune[2] == re.Rune[3] && +		unicode.SimpleFold(re.Rune[0]) == re.Rune[2] && +		unicode.SimpleFold(re.Rune[2]) == re.Rune[0] || +		re.Op == OpCharClass && len(re.Rune) == 2 && +			re.Rune[0]+1 == re.Rune[1] && +			unicode.SimpleFold(re.Rune[0]) == re.Rune[1] && +			unicode.SimpleFold(re.Rune[1]) == re.Rune[0] { +		// Case-insensitive rune like [Aa] or [Δδ]. +		if p.maybeConcat(re.Rune[0], p.flags|FoldCase) { +			return nil +		} + +		// Rewrite as (case-insensitive) literal. +		re.Op = OpLiteral +		re.Rune = re.Rune[:1] +		re.Flags = p.flags | FoldCase +	} else { +		// Incremental concatenation. +		p.maybeConcat(-1, 0) +	}  	p.stack = append(p.stack, re)  	return re  } -// newLiteral returns a new OpLiteral Regexp with the given flags -func newLiteral(r int, flags Flags) *Regexp { -	re := &Regexp{ -		Op:    OpLiteral, -		Flags: flags, +// maybeConcat implements incremental concatenation +// of literal runes into string nodes.  The parser calls this +// before each push, so only the top fragment of the stack +// might need processing.  Since this is called before a push, +// the topmost literal is no longer subject to operators like * +// (Otherwise ab* would turn into (ab)*.) +// If r >= 0 and there's a node left over, maybeConcat uses it +// to push r with the given flags. +// maybeConcat reports whether r was pushed. +func (p *parser) maybeConcat(r int, flags Flags) bool { +	n := len(p.stack) +	if n < 2 { +		return false  	} + +	re1 := p.stack[n-1] +	re2 := p.stack[n-2] +	if re1.Op != OpLiteral || re2.Op != OpLiteral || re1.Flags&FoldCase != re2.Flags&FoldCase { +		return false +	} + +	// Push re1 into re2. +	re2.Rune = append(re2.Rune, re1.Rune...) + +	// Reuse re1 if possible. +	if r >= 0 { +		re1.Rune = re1.Rune0[:1] +		re1.Rune[0] = r +		re1.Flags = flags +		return true +	} + +	p.stack = p.stack[:n-1] +	p.reuse(re1) +	return false // did not push r +} + +// newLiteral returns a new OpLiteral Regexp with the given flags +func (p *parser) newLiteral(r int, flags Flags) *Regexp { +	re := p.newRegexp(OpLiteral) +	re.Flags = flags  	re.Rune0[0] = r  	re.Rune = re.Rune0[:1]  	return re @@ -108,14 +188,16 @@ func newLiteral(r int, flags Flags) *Regexp {  // literal pushes a literal regexp for the rune r on the stack  // and returns that regexp. -func (p *parser) literal(r int) *Regexp { -	return p.push(newLiteral(r, p.flags)) +func (p *parser) literal(r int) { +	p.push(p.newLiteral(r, p.flags))  }  // op pushes a regexp with the given op onto the stack  // and returns that regexp.  func (p *parser) op(op Op) *Regexp { -	return p.push(&Regexp{Op: op, Flags: p.flags}) +	re := p.newRegexp(op) +	re.Flags = p.flags +	return p.push(re)  }  // repeat replaces the top stack element with itself repeated @@ -139,12 +221,10 @@ func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (strin  		return "", &Error{ErrMissingRepeatArgument, opstr}  	}  	sub := p.stack[n-1] -	re := &Regexp{ -		Op:    op, -		Min:   min, -		Max:   max, -		Flags: flags, -	} +	re := p.newRegexp(op) +	re.Min = min +	re.Max = max +	re.Flags = flags  	re.Sub = re.Sub0[:1]  	re.Sub[0] = sub  	p.stack[n-1] = re @@ -153,60 +233,385 @@ func (p *parser) repeat(op Op, min, max int, opstr, t, lastRepeat string) (strin  // concat replaces the top of the stack (above the topmost '|' or '(') with its concatenation.  func (p *parser) concat() *Regexp { -	// TODO: Flatten concats. +	p.maybeConcat(-1, 0)  	// Scan down to find pseudo-operator | or (.  	i := len(p.stack)  	for i > 0 && p.stack[i-1].Op < opPseudo {  		i--  	} -	sub := p.stack[i:] +	subs := p.stack[i:]  	p.stack = p.stack[:i] -	var re *Regexp -	switch len(sub) { -	case 0: -		re = &Regexp{Op: OpEmptyMatch} -	case 1: -		re = sub[0] -	default: -		re = &Regexp{Op: OpConcat} -		re.Sub = append(re.Sub0[:0], sub...) +	// Empty concatenation is special case. +	if len(subs) == 0 { +		return p.push(p.newRegexp(OpEmptyMatch))  	} -	return p.push(re) + +	return p.push(p.collapse(subs, OpConcat))  }  // alternate replaces the top of the stack (above the topmost '(') with its alternation.  func (p *parser) alternate() *Regexp { -	// TODO: Flatten alternates. -  	// Scan down to find pseudo-operator (.  	// There are no | above (.  	i := len(p.stack)  	for i > 0 && p.stack[i-1].Op < opPseudo {  		i--  	} -	sub := p.stack[i:] +	subs := p.stack[i:]  	p.stack = p.stack[:i] -	var re *Regexp -	switch len(sub) { -	case 0: -		re = &Regexp{Op: OpNoMatch} -	case 1: -		re = sub[0] -	default: -		re = &Regexp{Op: OpAlternate} -		re.Sub = append(re.Sub0[:0], sub...) +	// Make sure top class is clean. +	// All the others already are (see swapVerticalBar). +	if len(subs) > 0 { +		cleanAlt(subs[len(subs)-1])  	} -	return p.push(re) + +	// Empty alternate is special case +	// (shouldn't happen but easy to handle). +	if len(subs) == 0 { +		return p.push(p.newRegexp(OpNoMatch)) +	} + +	return p.push(p.collapse(subs, OpAlternate))  } -func literalRegexp(s string, flags Flags) *Regexp { -	re := &Regexp{ -		Op:    OpLiteral, -		Flags: flags, +// cleanAlt cleans re for eventual inclusion in an alternation. +func cleanAlt(re *Regexp) { +	switch re.Op { +	case OpCharClass: +		re.Rune = cleanClass(&re.Rune) +		if len(re.Rune) == 2 && re.Rune[0] == 0 && re.Rune[1] == unicode.MaxRune { +			re.Rune = nil +			re.Op = OpAnyChar +			return +		} +		if len(re.Rune) == 4 && re.Rune[0] == 0 && re.Rune[1] == '\n'-1 && re.Rune[2] == '\n'+1 && re.Rune[3] == unicode.MaxRune { +			re.Rune = nil +			re.Op = OpAnyCharNotNL +			return +		} +		if cap(re.Rune)-len(re.Rune) > 100 { +			// re.Rune will not grow any more. +			// Make a copy or inline to reclaim storage. +			re.Rune = append(re.Rune0[:0], re.Rune...) +		} +	} +} + +// collapse returns the result of applying op to sub. +// If sub contains op nodes, they all get hoisted up +// so that there is never a concat of a concat or an +// alternate of an alternate. +func (p *parser) collapse(subs []*Regexp, op Op) *Regexp { +	if len(subs) == 1 { +		return subs[0] +	} +	re := p.newRegexp(op) +	re.Sub = re.Sub0[:0] +	for _, sub := range subs { +		if sub.Op == op { +			re.Sub = append(re.Sub, sub.Sub...) +			p.reuse(sub) +		} else { +			re.Sub = append(re.Sub, sub) +		} +	} +	if op == OpAlternate { +		re.Sub = p.factor(re.Sub, re.Flags) +		if len(re.Sub) == 1 { +			old := re +			re = re.Sub[0] +			p.reuse(old) +		} +	} +	return re +} + +// factor factors common prefixes from the alternation list sub. +// It returns a replacement list that reuses the same storage and +// frees (passes to p.reuse) any removed *Regexps. +// +// For example, +//     ABC|ABD|AEF|BCX|BCY +// simplifies by literal prefix extraction to +//     A(B(C|D)|EF)|BC(X|Y) +// which simplifies by character class introduction to +//     A(B[CD]|EF)|BC[XY] +// +func (p *parser) factor(sub []*Regexp, flags Flags) []*Regexp { +	if len(sub) < 2 { +		return sub +	} + +	// Round 1: Factor out common literal prefixes. +	var str []int +	var strflags Flags +	start := 0 +	out := sub[:0] +	for i := 0; i <= len(sub); i++ { +		// Invariant: the Regexps that were in sub[0:start] have been +		// used or marked for reuse, and the slice space has been reused +		// for out (len(out) <= start). +		// +		// Invariant: sub[start:i] consists of regexps that all begin +		// with str as modified by strflags. +		var istr []int +		var iflags Flags +		if i < len(sub) { +			istr, iflags = p.leadingString(sub[i]) +			if iflags == strflags { +				same := 0 +				for same < len(str) && same < len(istr) && str[same] == istr[same] { +					same++ +				} +				if same > 0 { +					// Matches at least one rune in current range. +					// Keep going around. +					str = str[:same] +					continue +				} +			} +		} + +		// Found end of a run with common leading literal string: +		// sub[start:i] all begin with str[0:len(str)], but sub[i] +		// does not even begin with str[0]. +		// +		// Factor out common string and append factored expression to out. +		if i == start { +			// Nothing to do - run of length 0. +		} else if i == start+1 { +			// Just one: don't bother factoring. +			out = append(out, sub[start]) +		} else { +			// Construct factored form: prefix(suffix1|suffix2|...) +			prefix := p.newRegexp(OpLiteral) +			prefix.Flags = strflags +			prefix.Rune = append(prefix.Rune[:0], str...) + +			for j := start; j < i; j++ { +				sub[j] = p.removeLeadingString(sub[j], len(str)) +			} +			suffix := p.collapse(sub[start:i], OpAlternate) // recurse + +			re := p.newRegexp(OpConcat) +			re.Sub = append(re.Sub[:0], prefix, suffix) +			out = append(out, re) +		} + +		// Prepare for next iteration. +		start = i +		str = istr +		strflags = iflags +	} +	sub = out + +	// Round 2: Factor out common complex prefixes, +	// just the first piece of each concatenation, +	// whatever it is.  This is good enough a lot of the time. +	start = 0 +	out = sub[:0] +	var first *Regexp +	for i := 0; i <= len(sub); i++ { +		// Invariant: the Regexps that were in sub[0:start] have been +		// used or marked for reuse, and the slice space has been reused +		// for out (len(out) <= start). +		// +		// Invariant: sub[start:i] consists of regexps that all begin +		// with str as modified by strflags. +		var ifirst *Regexp +		if i < len(sub) { +			ifirst = p.leadingRegexp(sub[i]) +			if first != nil && first.Equal(ifirst) { +				continue +			} +		} + +		// Found end of a run with common leading regexp: +		// sub[start:i] all begin with first but sub[i] does not. +		// +		// Factor out common regexp and append factored expression to out. +		if i == start { +			// Nothing to do - run of length 0. +		} else if i == start+1 { +			// Just one: don't bother factoring. +			out = append(out, sub[start]) +		} else { +			// Construct factored form: prefix(suffix1|suffix2|...) +			prefix := first + +			for j := start; j < i; j++ { +				reuse := j != start // prefix came from sub[start]  +				sub[j] = p.removeLeadingRegexp(sub[j], reuse) +			} +			suffix := p.collapse(sub[start:i], OpAlternate) // recurse + +			re := p.newRegexp(OpConcat) +			re.Sub = append(re.Sub[:0], prefix, suffix) +			out = append(out, re) +		} + +		// Prepare for next iteration. +		start = i +		first = ifirst +	} +	sub = out + +	// Round 3: Collapse runs of single literals into character classes. +	start = 0 +	out = sub[:0] +	for i := 0; i <= len(sub); i++ { +		// Invariant: the Regexps that were in sub[0:start] have been +		// used or marked for reuse, and the slice space has been reused +		// for out (len(out) <= start). +		// +		// Invariant: sub[start:i] consists of regexps that are either +		// literal runes or character classes. +		if i < len(sub) && isCharClass(sub[i]) { +			continue +		} + +		// sub[i] is not a char or char class; +		// emit char class for sub[start:i]... +		if i == start { +			// Nothing to do - run of length 0. +		} else if i == start+1 { +			out = append(out, sub[start]) +		} else { +			// Make new char class. +			// Start with most complex regexp in sub[start]. +			max := start +			for j := start + 1; j < i; j++ { +				if sub[max].Op < sub[j].Op || sub[max].Op == sub[j].Op && len(sub[max].Rune) < len(sub[j].Rune) { +					max = j +				} +			} +			sub[start], sub[max] = sub[max], sub[start] + +			for j := start + 1; j < i; j++ { +				mergeCharClass(sub[start], sub[j]) +				p.reuse(sub[j]) +			} +			cleanAlt(sub[start]) +			out = append(out, sub[start]) +		} + +		// ... and then emit sub[i]. +		if i < len(sub) { +			out = append(out, sub[i]) +		} +		start = i + 1 +	} +	sub = out + +	// Round 4: Collapse runs of empty matches into a single empty match. +	start = 0 +	out = sub[:0] +	for i := range sub { +		if i+1 < len(sub) && sub[i].Op == OpEmptyMatch && sub[i+1].Op == OpEmptyMatch { +			continue +		} +		out = append(out, sub[i]) +	} +	sub = out + +	return sub +} + +// leadingString returns the leading literal string that re begins with. +// The string refers to storage in re or its children. +func (p *parser) leadingString(re *Regexp) ([]int, Flags) { +	if re.Op == OpConcat && len(re.Sub) > 0 { +		re = re.Sub[0] +	} +	if re.Op != OpLiteral { +		return nil, 0 +	} +	return re.Rune, re.Flags & FoldCase +} + +// removeLeadingString removes the first n leading runes +// from the beginning of re.  It returns the replacement for re. +func (p *parser) removeLeadingString(re *Regexp, n int) *Regexp { +	if re.Op == OpConcat && len(re.Sub) > 0 { +		// Removing a leading string in a concatenation +		// might simplify the concatenation. +		sub := re.Sub[0] +		sub = p.removeLeadingString(sub, n) +		re.Sub[0] = sub +		if sub.Op == OpEmptyMatch { +			p.reuse(sub) +			switch len(re.Sub) { +			case 0, 1: +				// Impossible but handle. +				re.Op = OpEmptyMatch +				re.Sub = nil +			case 2: +				old := re +				re = re.Sub[1] +				p.reuse(old) +			default: +				copy(re.Sub, re.Sub[1:]) +				re.Sub = re.Sub[:len(re.Sub)-1] +			} +		} +		return re  	} + +	if re.Op == OpLiteral { +		re.Rune = re.Rune[:copy(re.Rune, re.Rune[n:])] +		if len(re.Rune) == 0 { +			re.Op = OpEmptyMatch +		} +	} +	return re +} + +// leadingRegexp returns the leading regexp that re begins with. +// The regexp refers to storage in re or its children. +func (p *parser) leadingRegexp(re *Regexp) *Regexp { +	if re.Op == OpEmptyMatch { +		return nil +	} +	if re.Op == OpConcat && len(re.Sub) > 0 { +		sub := re.Sub[0] +		if sub.Op == OpEmptyMatch { +			return nil +		} +		return sub +	} +	return re +} + +// removeLeadingRegexp removes the leading regexp in re. +// It returns the replacement for re. +// If reuse is true, it passes the removed regexp (if no longer needed) to p.reuse. +func (p *parser) removeLeadingRegexp(re *Regexp, reuse bool) *Regexp { +	if re.Op == OpConcat && len(re.Sub) > 0 { +		if reuse { +			p.reuse(re.Sub[0]) +		} +		re.Sub = re.Sub[:copy(re.Sub, re.Sub[1:])] +		switch len(re.Sub) { +		case 0: +			re.Op = OpEmptyMatch +			re.Sub = nil +		case 1: +			old := re +			re = re.Sub[0] +			p.reuse(old) +		} +		return re +	} +	re.Op = OpEmptyMatch +	return re +} + +func literalRegexp(s string, flags Flags) *Regexp { +	re := &Regexp{Op: OpLiteral} +	re.Flags = flags  	re.Rune = re.Rune0[:0] // use local storage for small strings  	for _, c := range s {  		if len(re.Rune) >= cap(re.Rune) { @@ -264,7 +669,6 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {  			p.op(opLeftParen).Cap = p.numCap  			t = t[1:]  		case '|': -			p.concat()  			if err = p.parseVerticalBar(); err != nil {  				return nil, err  			} @@ -360,7 +764,8 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {  				}  			} -			re := &Regexp{Op: OpCharClass, Flags: p.flags} +			re := p.newRegexp(OpCharClass) +			re.Flags = p.flags  			// Look for Unicode character group like \p{Han}  			if len(t) >= 2 && (t[1] == 'p' || t[1] == 'P') { @@ -371,7 +776,6 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {  				if r != nil {  					re.Rune = r  					t = rest -					// TODO: Handle FoldCase flag.  					p.push(re)  					break BigSwitch  				} @@ -381,12 +785,10 @@ func Parse(s string, flags Flags) (*Regexp, os.Error) {  			if r, rest := p.parsePerlClassEscape(t, re.Rune0[:0]); r != nil {  				re.Rune = r  				t = rest -				// TODO: Handle FoldCase flag.  				p.push(re)  				break BigSwitch  			} - -			// TODO: Give re back to parser's pool. +			p.reuse(re)  			// Ordinary single-character escape.  			if c, t, err = p.parseEscape(t); err != nil { @@ -592,6 +994,35 @@ func (p *parser) parseInt(s string) (n int, rest string, ok bool) {  	return  } +// can this be represented as a character class? +// single-rune literal string, char class, ., and .|\n. +func isCharClass(re *Regexp) bool { +	return re.Op == OpLiteral && len(re.Rune) == 1 || +		re.Op == OpCharClass || +		re.Op == OpAnyCharNotNL || +		re.Op == OpAnyChar +} + +// does re match r? +func matchRune(re *Regexp, r int) bool { +	switch re.Op { +	case OpLiteral: +		return len(re.Rune) == 1 && re.Rune[0] == r +	case OpCharClass: +		for i := 0; i < len(re.Rune); i += 2 { +			if re.Rune[i] <= r && r <= re.Rune[i+1] { +				return true +			} +		} +		return false +	case OpAnyCharNotNL: +		return r != '\n' +	case OpAnyChar: +		return true +	} +	return false +} +  // parseVerticalBar handles a | in the input.  func (p *parser) parseVerticalBar() os.Error {  	p.concat() @@ -607,14 +1038,66 @@ func (p *parser) parseVerticalBar() os.Error {  	return nil  } +// mergeCharClass makes dst = dst|src. +// The caller must ensure that dst.Op >= src.Op, +// to reduce the amount of copying. +func mergeCharClass(dst, src *Regexp) { +	switch dst.Op { +	case OpAnyChar: +		// src doesn't add anything. +	case OpAnyCharNotNL: +		// src might add \n +		if matchRune(src, '\n') { +			dst.Op = OpAnyChar +		} +	case OpCharClass: +		// src is simpler, so either literal or char class +		if src.Op == OpLiteral { +			dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0]) +		} else { +			dst.Rune = appendClass(dst.Rune, src.Rune) +		} +	case OpLiteral: +		// both literal +		if src.Rune[0] == dst.Rune[0] { +			break +		} +		dst.Op = OpCharClass +		dst.Rune = append(dst.Rune, dst.Rune[0]) +		dst.Rune = appendRange(dst.Rune, src.Rune[0], src.Rune[0]) +	} +} +  // If the top of the stack is an element followed by an opVerticalBar  // swapVerticalBar swaps the two and returns true.  // Otherwise it returns false.  func (p *parser) swapVerticalBar() bool { -	if n := len(p.stack); n >= 2 { +	// If above and below vertical bar are literal or char class, +	// can merge into a single char class. +	n := len(p.stack) +	if n >= 3 && p.stack[n-2].Op == opVerticalBar && isCharClass(p.stack[n-1]) && isCharClass(p.stack[n-3]) { +		re1 := p.stack[n-1] +		re3 := p.stack[n-3] +		// Make re3 the more complex of the two. +		if re1.Op > re3.Op { +			re1, re3 = re3, re1 +			p.stack[n-3] = re3 +		} +		mergeCharClass(re3, re1) +		p.reuse(re1) +		p.stack = p.stack[:n-1] +		return true +	} + +	if n >= 2 {  		re1 := p.stack[n-1]  		re2 := p.stack[n-2]  		if re2.Op == opVerticalBar { +			if n >= 3 { +				// Now out of reach. +				// Clean opportunistically. +				cleanAlt(p.stack[n-3]) +			}  			p.stack[n-2] = re1  			p.stack[n-1] = re2  			return true @@ -729,6 +1212,7 @@ Switch:  				if r > unicode.MaxRune {  					break Switch  				} +				nhex++  			}  			if nhex == 0 {  				break Switch @@ -801,12 +1285,7 @@ func (p *parser) parsePerlClassEscape(s string, r []int) (out []int, rest string  	if g.sign == 0 {  		return  	} -	if g.sign < 0 { -		r = appendNegatedClass(r, g.class) -	} else { -		r = appendClass(r, g.class) -	} -	return r, s[2:] +	return p.appendGroup(r, g), s[2:]  }  // parseNamedClass parses a leading POSIX named character class like [:alnum:] @@ -827,23 +1306,40 @@ func (p *parser) parseNamedClass(s string, r []int) (out []int, rest string, err  	if g.sign == 0 {  		return nil, "", &Error{ErrInvalidCharRange, name}  	} -	if g.sign < 0 { -		r = appendNegatedClass(r, g.class) +	return p.appendGroup(r, g), s, nil +} + +func (p *parser) appendGroup(r []int, g charGroup) []int { +	if p.flags&FoldCase == 0 { +		if g.sign < 0 { +			r = appendNegatedClass(r, g.class) +		} else { +			r = appendClass(r, g.class) +		}  	} else { -		r = appendClass(r, g.class) +		tmp := p.tmpClass[:0] +		tmp = appendFoldedClass(tmp, g.class) +		p.tmpClass = tmp +		tmp = cleanClass(&p.tmpClass) +		if g.sign < 0 { +			r = appendNegatedClass(r, tmp) +		} else { +			r = appendClass(r, tmp) +		}  	} -	return r, s, nil +	return r  } -// unicodeTable returns the unicode.RangeTable identified by name. -func unicodeTable(name string) *unicode.RangeTable { +// unicodeTable returns the unicode.RangeTable identified by name +// and the table of additional fold-equivalent code points. +func unicodeTable(name string) (*unicode.RangeTable, *unicode.RangeTable) {  	if t := unicode.Categories[name]; t != nil { -		return t +		return t, unicode.FoldCategory[name]  	}  	if t := unicode.Scripts[name]; t != nil { -		return t +		return t, unicode.FoldScript[name]  	} -	return nil +	return nil, nil  }  // parseUnicodeClass parses a leading Unicode character class like \p{Han} @@ -891,14 +1387,31 @@ func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, e  		name = name[1:]  	} -	tab := unicodeTable(name) +	tab, fold := unicodeTable(name)  	if tab == nil {  		return nil, "", &Error{ErrInvalidCharRange, seq}  	} -	if sign > 0 { -		r = appendTable(r, tab) + +	if p.flags&FoldCase == 0 || fold == nil { +		if sign > 0 { +			r = appendTable(r, tab) +		} else { +			r = appendNegatedTable(r, tab) +		}  	} else { -		r = appendNegatedTable(r, tab) +		// Merge and clean tab and fold in a temporary buffer. +		// This is necessary for the negative case and just tidy +		// for the positive case. +		tmp := p.tmpClass[:0] +		tmp = appendTable(tmp, tab) +		tmp = appendTable(tmp, fold) +		p.tmpClass = tmp +		tmp = cleanClass(&p.tmpClass) +		if sign > 0 { +			r = appendClass(r, tmp) +		} else { +			r = appendNegatedClass(r, tmp) +		}  	}  	return r, t, nil  } @@ -907,7 +1420,8 @@ func (p *parser) parseUnicodeClass(s string, r []int) (out []int, rest string, e  // and pushes it onto the parse stack.  func (p *parser) parseClass(s string) (rest string, err os.Error) {  	t := s[1:] // chop [ -	re := &Regexp{Op: OpCharClass, Flags: p.flags} +	re := p.newRegexp(OpCharClass) +	re.Flags = p.flags  	re.Rune = re.Rune0[:0]  	sign := +1 @@ -979,12 +1493,14 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {  				return "", &Error{Code: ErrInvalidCharRange, Expr: rng}  			}  		} -		class = appendRange(class, lo, hi) +		if p.flags&FoldCase == 0 { +			class = appendRange(class, lo, hi) +		} else { +			class = appendFoldedRange(class, lo, hi) +		}  	}  	t = t[1:] // chop ] -	// TODO: Handle FoldCase flag. -  	// Use &re.Rune instead of &class to avoid allocation.  	re.Rune = class  	class = cleanClass(&re.Rune) @@ -999,10 +1515,15 @@ func (p *parser) parseClass(s string) (rest string, err os.Error) {  // cleanClass sorts the ranges (pairs of elements of r),  // merges them, and eliminates duplicates.  func cleanClass(rp *[]int) []int { +  	// Sort by lo increasing, hi decreasing to break ties.  	sort.Sort(ranges{rp})  	r := *rp +	if len(r) < 2 { +		return r +	} +  	// Merge abutting, overlapping.  	w := 2 // write index  	for i := 2; i < len(r); i += 2 { @@ -1025,23 +1546,71 @@ func cleanClass(rp *[]int) []int {  // appendRange returns the result of appending the range lo-hi to the class r.  func appendRange(r []int, lo, hi int) []int { -	// Expand last range if overlaps or abuts. -	if n := len(r); n > 0 { -		rlo, rhi := r[n-2], r[n-1] -		if lo <= rhi+1 && rlo <= hi+1 { -			if lo < rlo { -				r[n-2] = lo -			} -			if hi > rhi { -				r[n-1] = hi +	// Expand last range or next to last range if it overlaps or abuts. +	// Checking two ranges helps when appending case-folded +	// alphabets, so that one range can be expanding A-Z and the +	// other expanding a-z. +	n := len(r) +	for i := 2; i <= 4; i += 2 { // twice, using i=2, i=4 +		if n >= i { +			rlo, rhi := r[n-i], r[n-i+1] +			if lo <= rhi+1 && rlo <= hi+1 { +				if lo < rlo { +					r[n-i] = lo +				} +				if hi > rhi { +					r[n-i+1] = hi +				} +				return r  			} -			return r  		}  	}  	return append(r, lo, hi)  } +const ( +	// minimum and maximum runes involved in folding. +	// checked during test. +	minFold = 0x0041 +	maxFold = 0x1044f +) + +// appendFoldedRange returns the result of appending the range lo-hi +// and its case folding-equivalent runes to the class r. +func appendFoldedRange(r []int, lo, hi int) []int { +	// Optimizations. +	if lo <= minFold && hi >= maxFold { +		// Range is full: folding can't add more. +		return appendRange(r, lo, hi) +	} +	if hi < minFold || lo > maxFold { +		// Range is outside folding possibilities. +		return appendRange(r, lo, hi) +	} +	if lo < minFold { +		// [lo, minFold-1] needs no folding. +		r = appendRange(r, lo, minFold-1) +		lo = minFold +	} +	if hi > maxFold { +		// [maxFold+1, hi] needs no folding. +		r = appendRange(r, maxFold+1, hi) +		hi = maxFold +	} + +	// Brute force.  Depend on appendRange to coalesce ranges on the fly. +	for c := lo; c <= hi; c++ { +		r = appendRange(r, c, c) +		f := unicode.SimpleFold(c) +		for f != c { +			r = appendRange(r, f, f) +			f = unicode.SimpleFold(f) +		} +	} +	return r +} +  // appendClass returns the result of appending the class x to the class r.  // It assume x is clean.  func appendClass(r []int, x []int) []int { @@ -1051,6 +1620,14 @@ func appendClass(r []int, x []int) []int {  	return r  } +// appendFolded returns the result of appending the case folding of the class x to the class r. +func appendFoldedClass(r []int, x []int) []int { +	for i := 0; i < len(x); i += 2 { +		r = appendFoldedRange(r, x[i], x[i+1]) +	} +	return r +} +  // appendNegatedClass returns the result of appending the negation of the class x to the class r.  // It assumes x is clean.  func appendNegatedClass(r []int, x []int) []int { @@ -1148,10 +1725,11 @@ func negateClass(r []int) []int {  		}  		nextLo = hi + 1  	} +	r = r[:w]  	if nextLo <= unicode.MaxRune {  		// It's possible for the negation to have one more  		// range - this one - than the original class, so use append. -		r = append(r[:w], nextLo, unicode.MaxRune) +		r = append(r, nextLo, unicode.MaxRune)  	}  	return r  } diff --git a/src/pkg/exp/regexp/syntax/parse_test.go b/src/pkg/exp/regexp/syntax/parse_test.go index b52cab1a1..779b9afde 100644 --- a/src/pkg/exp/regexp/syntax/parse_test.go +++ b/src/pkg/exp/regexp/syntax/parse_test.go @@ -16,141 +16,152 @@ var parseTests = []struct {  	Dump   string  }{  	// Base cases -	{"a", "lit{a}"}, -	{"a.", "cat{lit{a}dot{}}"}, -	{"a.b", "cat{lit{a}dot{}lit{b}}"}, -	//	{ "ab", "str{ab}" }, -	{"ab", "cat{lit{a}lit{b}}"}, -	{"a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}"}, -	//	{ "abc", "str{abc}" }, -	{"abc", "cat{lit{a}lit{b}lit{c}}"}, -	{"a|^", "alt{lit{a}bol{}}"}, -	//	{ "a|b", "cc{0x61-0x62}" }, -	{"a|b", "alt{lit{a}lit{b}}"}, -	{"(a)", "cap{lit{a}}"}, -	{"(a)|b", "alt{cap{lit{a}}lit{b}}"}, -	{"a*", "star{lit{a}}"}, -	{"a+", "plus{lit{a}}"}, -	{"a?", "que{lit{a}}"}, -	{"a{2}", "rep{2,2 lit{a}}"}, -	{"a{2,3}", "rep{2,3 lit{a}}"}, -	{"a{2,}", "rep{2,-1 lit{a}}"}, -	{"a*?", "nstar{lit{a}}"}, -	{"a+?", "nplus{lit{a}}"}, -	{"a??", "nque{lit{a}}"}, -	{"a{2}?", "nrep{2,2 lit{a}}"}, -	{"a{2,3}?", "nrep{2,3 lit{a}}"}, -	{"a{2,}?", "nrep{2,-1 lit{a}}"}, -	{"", "emp{}"}, -	//	{ "|", "emp{}" },  // alt{emp{}emp{}} but got factored -	{"|", "alt{emp{}emp{}}"}, -	{"|x|", "alt{emp{}lit{x}emp{}}"}, -	{".", "dot{}"}, -	{"^", "bol{}"}, -	{"$", "eol{}"}, -	{"\\|", "lit{|}"}, -	{"\\(", "lit{(}"}, -	{"\\)", "lit{)}"}, -	{"\\*", "lit{*}"}, -	{"\\+", "lit{+}"}, -	{"\\?", "lit{?}"}, -	{"{", "lit{{}"}, -	{"}", "lit{}}"}, -	{"\\.", "lit{.}"}, -	{"\\^", "lit{^}"}, -	{"\\$", "lit{$}"}, -	{"\\\\", "lit{\\}"}, -	{"[ace]", "cc{0x61 0x63 0x65}"}, -	{"[abc]", "cc{0x61-0x63}"}, -	{"[a-z]", "cc{0x61-0x7a}"}, -	//	{ "[a]", "lit{a}" }, -	{"[a]", "cc{0x61}"}, -	{"\\-", "lit{-}"}, -	{"-", "lit{-}"}, -	{"\\_", "lit{_}"}, +	{`a`, `lit{a}`}, +	{`a.`, `cat{lit{a}dot{}}`}, +	{`a.b`, `cat{lit{a}dot{}lit{b}}`}, +	{`ab`, `str{ab}`}, +	{`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`}, +	{`abc`, `str{abc}`}, +	{`a|^`, `alt{lit{a}bol{}}`}, +	{`a|b`, `cc{0x61-0x62}`}, +	{`(a)`, `cap{lit{a}}`}, +	{`(a)|b`, `alt{cap{lit{a}}lit{b}}`}, +	{`a*`, `star{lit{a}}`}, +	{`a+`, `plus{lit{a}}`}, +	{`a?`, `que{lit{a}}`}, +	{`a{2}`, `rep{2,2 lit{a}}`}, +	{`a{2,3}`, `rep{2,3 lit{a}}`}, +	{`a{2,}`, `rep{2,-1 lit{a}}`}, +	{`a*?`, `nstar{lit{a}}`}, +	{`a+?`, `nplus{lit{a}}`}, +	{`a??`, `nque{lit{a}}`}, +	{`a{2}?`, `nrep{2,2 lit{a}}`}, +	{`a{2,3}?`, `nrep{2,3 lit{a}}`}, +	{`a{2,}?`, `nrep{2,-1 lit{a}}`}, +	{``, `emp{}`}, +	{`|`, `emp{}`}, // alt{emp{}emp{}} but got factored +	{`|x|`, `alt{emp{}lit{x}emp{}}`}, +	{`.`, `dot{}`}, +	{`^`, `bol{}`}, +	{`$`, `eol{}`}, +	{`\|`, `lit{|}`}, +	{`\(`, `lit{(}`}, +	{`\)`, `lit{)}`}, +	{`\*`, `lit{*}`}, +	{`\+`, `lit{+}`}, +	{`\?`, `lit{?}`}, +	{`{`, `lit{{}`}, +	{`}`, `lit{}}`}, +	{`\.`, `lit{.}`}, +	{`\^`, `lit{^}`}, +	{`\$`, `lit{$}`}, +	{`\\`, `lit{\}`}, +	{`[ace]`, `cc{0x61 0x63 0x65}`}, +	{`[abc]`, `cc{0x61-0x63}`}, +	{`[a-z]`, `cc{0x61-0x7a}`}, +	{`[a]`, `lit{a}`}, +	{`\-`, `lit{-}`}, +	{`-`, `lit{-}`}, +	{`\_`, `lit{_}`}, +	{`abc`, `str{abc}`}, +	{`abc|def`, `alt{str{abc}str{def}}`}, +	{`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},  	// Posix and Perl extensions -	{"[[:lower:]]", "cc{0x61-0x7a}"}, -	{"[a-z]", "cc{0x61-0x7a}"}, -	{"[^[:lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"}, -	{"[[:^lower:]]", "cc{0x0-0x60 0x7b-0x10ffff}"}, -	//	{ "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, -	//	{ "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" }, -	//	{ "(?i)[^[:lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" }, -	//	{ "(?i)[[:^lower:]]", "cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" }, -	{"\\d", "cc{0x30-0x39}"}, -	{"\\D", "cc{0x0-0x2f 0x3a-0x10ffff}"}, -	{"\\s", "cc{0x9-0xa 0xc-0xd 0x20}"}, -	{"\\S", "cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}"}, -	{"\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}"}, -	{"\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}"}, -	//	{ "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" }, -	//	{ "(?i)\\W", "cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" }, -	{"[^\\\\]", "cc{0x0-0x5b 0x5d-0x10ffff}"}, -	//	{ "\\C", "byte{}" }, +	{`[[:lower:]]`, `cc{0x61-0x7a}`}, +	{`[a-z]`, `cc{0x61-0x7a}`}, +	{`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`}, +	{`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`}, +	{`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`}, +	{`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`}, +	{`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`}, +	{`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`}, +	{`\d`, `cc{0x30-0x39}`}, +	{`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`}, +	{`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`}, +	{`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`}, +	{`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`}, +	{`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`}, +	{`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`}, +	{`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`}, +	{`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`}, +	//	{ `\C`, `byte{}` },  // probably never  	// Unicode, negatives, and a double negative. -	{"\\p{Braille}", "cc{0x2800-0x28ff}"}, -	{"\\P{Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, -	{"\\p{^Braille}", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, -	{"\\P{^Braille}", "cc{0x2800-0x28ff}"}, -	{"\\pZ", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}"}, -	{"[\\p{Braille}]", "cc{0x2800-0x28ff}"}, -	{"[\\P{Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, -	{"[\\p{^Braille}]", "cc{0x0-0x27ff 0x2900-0x10ffff}"}, -	{"[\\P{^Braille}]", "cc{0x2800-0x28ff}"}, -	{"[\\pZ]", "cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}"}, +	{`\p{Braille}`, `cc{0x2800-0x28ff}`}, +	{`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, +	{`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, +	{`\P{^Braille}`, `cc{0x2800-0x28ff}`}, +	{`\pZ`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, +	{`[\p{Braille}]`, `cc{0x2800-0x28ff}`}, +	{`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, +	{`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`}, +	{`[\P{^Braille}]`, `cc{0x2800-0x28ff}`}, +	{`[\pZ]`, `cc{0x20 0xa0 0x1680 0x180e 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`}, +	{`\p{Lu}`, mkCharClass(unicode.IsUpper)}, +	{`[\p{Lu}]`, mkCharClass(unicode.IsUpper)}, +	{`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)}, + +	// Hex, octal. +	{`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`}, +	{`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},  	// More interesting regular expressions. -	//	{ "a{,2}", "str{a{,2}}" }, -	//	{ "\\.\\^\\$\\\\", "str{.^$\\}" }, -	{"[a-zABC]", "cc{0x41-0x43 0x61-0x7a}"}, -	{"[^a]", "cc{0x0-0x60 0x62-0x10ffff}"}, -	{"[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}"}, // utf-8 -	{"a*{", "cat{star{lit{a}}lit{{}}"}, +	{`a{,2}`, `str{a{,2}}`}, +	{`\.\^\$\\`, `str{.^$\}`}, +	{`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`}, +	{`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`}, +	{`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8 +	{`a*{`, `cat{star{lit{a}}lit{{}}`},  	// Test precedences -	//	{ "(?:ab)*", "star{str{ab}}" }, -	//	{ "(ab)*", "star{cap{str{ab}}}" }, -	//	{ "ab|cd", "alt{str{ab}str{cd}}" }, -	//	{ "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" }, -	{"(?:ab)*", "star{cat{lit{a}lit{b}}}"}, -	{"(ab)*", "star{cap{cat{lit{a}lit{b}}}}"}, -	{"ab|cd", "alt{cat{lit{a}lit{b}}cat{lit{c}lit{d}}}"}, -	{"a(b|c)d", "cat{lit{a}cap{alt{lit{b}lit{c}}}lit{d}}"}, +	{`(?:ab)*`, `star{str{ab}}`}, +	{`(ab)*`, `star{cap{str{ab}}}`}, +	{`ab|cd`, `alt{str{ab}str{cd}}`}, +	{`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},  	// Test flattening. -	{"(?:a)", "lit{a}"}, -	//	{ "(?:ab)(?:cd)", "str{abcd}" }, -	//	{ "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" }, -	//	{ "a|.", "dot{}" }, -	//	{ ".|a", "dot{}" }, +	{`(?:a)`, `lit{a}`}, +	{`(?:ab)(?:cd)`, `str{abcd}`}, +	{`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`}, +	{`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`}, +	{`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`}, +	{`a|.`, `dot{}`}, +	{`.|a`, `dot{}`}, +	{`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`}, +	{`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},  	// Test Perl quoted literals -	{"\\Q+|*?{[\\E", "str{+|*?{[}"}, -	{"\\Q+\\E+", "plus{lit{+}}"}, -	{"\\Q\\\\E", "lit{\\}"}, -	{"\\Q\\\\\\E", "str{\\\\}"}, +	{`\Q+|*?{[\E`, `str{+|*?{[}`}, +	{`\Q+\E+`, `plus{lit{+}}`}, +	{`\Q\\E`, `lit{\}`}, +	{`\Q\\\E`, `str{\\}`},  	// Test Perl \A and \z -	{"(?m)^", "bol{}"}, -	{"(?m)$", "eol{}"}, -	{"(?-m)^", "bot{}"}, -	{"(?-m)$", "eot{}"}, -	{"(?m)\\A", "bot{}"}, -	{"(?m)\\z", "eot{\\z}"}, -	{"(?-m)\\A", "bot{}"}, -	{"(?-m)\\z", "eot{\\z}"}, +	{`(?m)^`, `bol{}`}, +	{`(?m)$`, `eol{}`}, +	{`(?-m)^`, `bot{}`}, +	{`(?-m)$`, `eot{}`}, +	{`(?m)\A`, `bot{}`}, +	{`(?m)\z`, `eot{\z}`}, +	{`(?-m)\A`, `bot{}`}, +	{`(?-m)\z`, `eot{\z}`},  	// Test named captures -	{"(?P<name>a)", "cap{name:lit{a}}"}, +	{`(?P<name>a)`, `cap{name:lit{a}}`},  	// Case-folded literals -	//	{ "[Aa]", "litfold{a}" }, +	{`[Aa]`, `litfold{A}`}, +	{`[\x{100}\x{101}]`, `litfold{Ā}`}, +	{`[Δδ]`, `litfold{Δ}`},  	// Strings -	//	{ "abcde", "str{abcde}" }, -	//	{ "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" }, +	{`abcde`, `str{abcde}`}, +	{`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`}, + +	// Factoring. +	{`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`}, +	{`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`},  }  const testFlags = MatchNL | PerlX | UnicodeGroups @@ -223,8 +234,9 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) {  			}  			if re.Flags&FoldCase != 0 {  				for _, r := range re.Rune { -					if unicode.ToUpper(r) != r { +					if unicode.SimpleFold(r) != r {  						b.WriteString("fold") +						break  					}  				}  			} @@ -270,3 +282,69 @@ func dumpRegexp(b *bytes.Buffer, re *Regexp) {  	}  	b.WriteByte('}')  } + +func mkCharClass(f func(int) bool) string { +	re := &Regexp{Op: OpCharClass} +	lo := -1 +	for i := 0; i <= unicode.MaxRune; i++ { +		if f(i) { +			if lo < 0 { +				lo = i +			} +		} else { +			if lo >= 0 { +				re.Rune = append(re.Rune, lo, i-1) +				lo = -1 +			} +		} +	} +	if lo >= 0 { +		re.Rune = append(re.Rune, lo, unicode.MaxRune) +	} +	return dump(re) +} + +func isUpperFold(rune int) bool { +	if unicode.IsUpper(rune) { +		return true +	} +	c := unicode.SimpleFold(rune) +	for c != rune { +		if unicode.IsUpper(c) { +			return true +		} +		c = unicode.SimpleFold(c) +	} +	return false +} + +func TestFoldConstants(t *testing.T) { +	last := -1 +	for i := 0; i <= unicode.MaxRune; i++ { +		if unicode.SimpleFold(i) == i { +			continue +		} +		if last == -1 && minFold != i { +			t.Errorf("minFold=%#U should be %#U", minFold, i) +		} +		last = i +	} +	if maxFold != last { +		t.Errorf("maxFold=%#U should be %#U", maxFold, last) +	} +} + +func TestAppendRangeCollapse(t *testing.T) { +	// AppendRange should collapse each of the new ranges +	// into the earlier ones (it looks back two ranges), so that +	// the slice never grows very large. +	// Note that we are not calling cleanClass. +	var r []int +	for i := 'A'; i <= 'Z'; i++ { +		r = appendRange(r, i, i) +		r = appendRange(r, i+'a'-'A', i+'a'-'A') +	} +	if string(r) != "AZaz" { +		t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r)) +	} +} diff --git a/src/pkg/exp/regexp/syntax/prog.go b/src/pkg/exp/regexp/syntax/prog.go new file mode 100644 index 000000000..6eeb3da0c --- /dev/null +++ b/src/pkg/exp/regexp/syntax/prog.go @@ -0,0 +1,182 @@ +package syntax + +import ( +	"bytes" +	"strconv" +) + +// Compiled program. +// May not belong in this package, but convenient for now. + +// A Prog is a compiled regular expression program. +type Prog struct { +	Inst  []Inst +	Start int // index of start instruction +} + +// An InstOp is an instruction opcode. +type InstOp uint8 + +const ( +	InstAlt InstOp = iota +	InstAltMatch +	InstCapture +	InstEmptyWidth +	InstMatch +	InstFail +	InstNop +	InstRune +) + +// An EmptyOp specifies a kind or mixture of zero-width assertions. +type EmptyOp uint8 + +const ( +	EmptyBeginLine EmptyOp = 1 << iota +	EmptyEndLine +	EmptyBeginText +	EmptyEndText +	EmptyWordBoundary +	EmptyNoWordBoundary +) + +// An Inst is a single instruction in a regular expression program. +type Inst struct { +	Op   InstOp +	Out  uint32 // all but InstMatch, InstFail +	Arg  uint32 // InstAlt, InstAltMatch, InstCapture, InstEmptyWidth +	Rune []int +} + +func (p *Prog) String() string { +	var b bytes.Buffer +	dumpProg(&b, p) +	return b.String() +} + +// MatchRune returns true if the instruction matches (and consumes) r. +// It should only be called when i.Op == InstRune. +func (i *Inst) MatchRune(r int) bool { +	rune := i.Rune + +	// Special case: single-rune slice is from literal string, not char class. +	// TODO: Case folding. +	if len(rune) == 1 { +		return r == rune[0] +	} + +	// Peek at the first few pairs. +	// Should handle ASCII well. +	for j := 0; j < len(rune) && j <= 8; j += 2 { +		if r < rune[j] { +			return false +		} +		if r <= rune[j+1] { +			return true +		} +	} + +	// Otherwise binary search. +	lo := 0 +	hi := len(rune) / 2 +	for lo < hi { +		m := lo + (hi-lo)/2 +		if c := rune[2*m]; c <= r { +			if r <= rune[2*m+1] { +				return true +			} +			lo = m + 1 +		} else { +			hi = m +		} +	} +	return false +} + +// As per re2's Prog::IsWordChar. Determines whether rune is an ASCII word char. +// Since we act on runes, it would be easy to support Unicode here. +func wordRune(rune int) bool { +	return rune == '_' || +		('A' <= rune && rune <= 'Z') || +		('a' <= rune && rune <= 'z') || +		('0' <= rune && rune <= '9') +} + +// MatchEmptyWidth returns true if the instruction matches +// an empty string between the runes before and after. +// It should only be called when i.Op == InstEmptyWidth. +func (i *Inst) MatchEmptyWidth(before int, after int) bool { +	switch EmptyOp(i.Arg) { +	case EmptyBeginLine: +		return before == '\n' || before == -1 +	case EmptyEndLine: +		return after == '\n' || after == -1 +	case EmptyBeginText: +		return before == -1 +	case EmptyEndText: +		return after == -1 +	case EmptyWordBoundary: +		return wordRune(before) != wordRune(after) +	case EmptyNoWordBoundary: +		return wordRune(before) == wordRune(after) +	} +	panic("unknown empty width arg") +} + + +func (i *Inst) String() string { +	var b bytes.Buffer +	dumpInst(&b, i) +	return b.String() +} + +func bw(b *bytes.Buffer, args ...string) { +	for _, s := range args { +		b.WriteString(s) +	} +} + +func dumpProg(b *bytes.Buffer, p *Prog) { +	for j := range p.Inst { +		i := &p.Inst[j] +		pc := strconv.Itoa(j) +		if len(pc) < 3 { +			b.WriteString("   "[len(pc):]) +		} +		if j == p.Start { +			pc += "*" +		} +		bw(b, pc, "\t") +		dumpInst(b, i) +		bw(b, "\n") +	} +} + +func u32(i uint32) string { +	return strconv.Uitoa64(uint64(i)) +} + +func dumpInst(b *bytes.Buffer, i *Inst) { +	switch i.Op { +	case InstAlt: +		bw(b, "alt -> ", u32(i.Out), ", ", u32(i.Arg)) +	case InstAltMatch: +		bw(b, "altmatch -> ", u32(i.Out), ", ", u32(i.Arg)) +	case InstCapture: +		bw(b, "cap ", u32(i.Arg), " -> ", u32(i.Out)) +	case InstEmptyWidth: +		bw(b, "empty ", u32(i.Arg), " -> ", u32(i.Out)) +	case InstMatch: +		bw(b, "match") +	case InstFail: +		bw(b, "fail") +	case InstNop: +		bw(b, "nop -> ", u32(i.Out)) +	case InstRune: +		if i.Rune == nil { +			// shouldn't happen +			bw(b, "rune <nil>") +		} +		bw(b, "rune ", strconv.QuoteToASCII(string(i.Rune)), " -> ", u32(i.Out)) +	} +} diff --git a/src/pkg/exp/regexp/syntax/prog_test.go b/src/pkg/exp/regexp/syntax/prog_test.go new file mode 100644 index 000000000..7be4281c2 --- /dev/null +++ b/src/pkg/exp/regexp/syntax/prog_test.go @@ -0,0 +1,91 @@ +package syntax + +import ( +	"testing" +) + +var compileTests = []struct { +	Regexp string +	Prog   string +}{ +	{"a", `  0	fail +  1*	rune "a" -> 2 +  2	match +`}, +	{"[A-M][n-z]", `  0	fail +  1*	rune "AM" -> 2 +  2	rune "nz" -> 3 +  3	match +`}, +	{"", `  0	fail +  1*	nop -> 2 +  2	match +`}, +	{"a?", `  0	fail +  1	rune "a" -> 3 +  2*	alt -> 1, 3 +  3	match +`}, +	{"a??", `  0	fail +  1	rune "a" -> 3 +  2*	alt -> 3, 1 +  3	match +`}, +	{"a+", `  0	fail +  1*	rune "a" -> 2 +  2	alt -> 1, 3 +  3	match +`}, +	{"a+?", `  0	fail +  1*	rune "a" -> 2 +  2	alt -> 3, 1 +  3	match +`}, +	{"a*", `  0	fail +  1	rune "a" -> 2 +  2*	alt -> 1, 3 +  3	match +`}, +	{"a*?", `  0	fail +  1	rune "a" -> 2 +  2*	alt -> 3, 1 +  3	match +`}, +	{"a+b+", `  0	fail +  1*	rune "a" -> 2 +  2	alt -> 1, 3 +  3	rune "b" -> 4 +  4	alt -> 3, 5 +  5	match +`}, +	{"(a+)(b+)", `  0	fail +  1*	cap 2 -> 2 +  2	rune "a" -> 3 +  3	alt -> 2, 4 +  4	cap 3 -> 5 +  5	cap 4 -> 6 +  6	rune "b" -> 7 +  7	alt -> 6, 8 +  8	cap 5 -> 9 +  9	match +`}, +	{"a+|b+", `  0	fail +  1	rune "a" -> 2 +  2	alt -> 1, 6 +  3	rune "b" -> 4 +  4	alt -> 3, 6 +  5*	alt -> 1, 3 +  6	match +`}, +} + +func TestCompile(t *testing.T) { +	for _, tt := range compileTests { +		re, _ := Parse(tt.Regexp, Perl) +		p, _ := Compile(re) +		s := p.String() +		if s != tt.Prog { +			t.Errorf("compiled %#q:\n--- have\n%s---\n--- want\n%s---", tt.Regexp, s, tt.Prog) +		} +	} +} diff --git a/src/pkg/exp/regexp/syntax/regexp.go b/src/pkg/exp/regexp/syntax/regexp.go index a0c465967..00a4addef 100644 --- a/src/pkg/exp/regexp/syntax/regexp.go +++ b/src/pkg/exp/regexp/syntax/regexp.go @@ -33,6 +33,8 @@ type Regexp struct {  type Op uint8  // Operators are listed in precedence order, tightest binding to weakest. +// Character class operators are listed simplest to most complex +// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).  const (  	OpNoMatch        Op = 1 + iota // matches no strings @@ -58,6 +60,59 @@ const (  const opPseudo Op = 128 // where pseudo-ops start +// Equal returns true if x and y have identical structure. +func (x *Regexp) Equal(y *Regexp) bool { +	if x == nil || y == nil { +		return x == y +	} +	if x.Op != y.Op { +		return false +	} +	switch x.Op { +	case OpEndText: +		// The parse flags remember whether this is \z or \Z. +		if x.Flags&WasDollar != y.Flags&WasDollar { +			return false +		} + +	case OpLiteral, OpCharClass: +		if len(x.Rune) != len(y.Rune) { +			return false +		} +		for i, r := range x.Rune { +			if r != y.Rune[i] { +				return false +			} +		} + +	case OpAlternate, OpConcat: +		if len(x.Sub) != len(y.Sub) { +			return false +		} +		for i, sub := range x.Sub { +			if !sub.Equal(y.Sub[i]) { +				return false +			} +		} + +	case OpStar, OpPlus, OpQuest: +		if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) { +			return false +		} + +	case OpRepeat: +		if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) { +			return false +		} + +	case OpCapture: +		if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) { +			return false +		} +	} +	return true +} +  // writeRegexp writes the Perl syntax for the regular expression re to b.  func writeRegexp(b *bytes.Buffer, re *Regexp) {  	switch re.Op { @@ -68,16 +123,24 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {  	case OpEmptyMatch:  		b.WriteString(`(?:)`)  	case OpLiteral: +		if re.Flags&FoldCase != 0 { +			b.WriteString(`(?i:`) +		}  		for _, r := range re.Rune {  			escape(b, r, false)  		} +		if re.Flags&FoldCase != 0 { +			b.WriteString(`)`) +		}  	case OpCharClass:  		if len(re.Rune)%2 != 0 {  			b.WriteString(`[invalid char class]`)  			break  		}  		b.WriteRune('[') -		if len(re.Rune) > 0 && re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune { +		if len(re.Rune) == 0 { +			b.WriteString(`^\x00-\x{10FFFF}`) +		} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune {  			// Contains 0 and MaxRune.  Probably a negated class.  			// Print the gaps.  			b.WriteRune('^') @@ -124,7 +187,9 @@ func writeRegexp(b *bytes.Buffer, re *Regexp) {  		} else {  			b.WriteRune('(')  		} -		writeRegexp(b, re.Sub[0]) +		if re.Sub[0].Op != OpEmptyMatch { +			writeRegexp(b, re.Sub[0]) +		}  		b.WriteRune(')')  	case OpStar, OpPlus, OpQuest, OpRepeat:  		if sub := re.Sub[0]; sub.Op > OpCapture { @@ -203,6 +268,15 @@ func escape(b *bytes.Buffer, r int, force bool) {  	case '\v':  		b.WriteString(`\v`)  	default: +		if r < 0x100 { +			b.WriteString(`\x`) +			s := strconv.Itob(r, 16) +			if len(s) == 1 { +				b.WriteRune('0') +			} +			b.WriteString(s) +			break +		}  		b.WriteString(`\x{`)  		b.WriteString(strconv.Itob(r, 16))  		b.WriteString(`}`) diff --git a/src/pkg/exp/regexp/syntax/simplify.go b/src/pkg/exp/regexp/syntax/simplify.go new file mode 100644 index 000000000..72390417b --- /dev/null +++ b/src/pkg/exp/regexp/syntax/simplify.go @@ -0,0 +1,151 @@ +// Copyright 2011 The Go Authors.  All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +// Simplify returns a regexp equivalent to re but without counted repetitions +// and with various other simplifications, such as rewriting /(?:a+)+/ to /a+/. +// The resulting regexp will execute correctly but its string representation +// will not produce the same parse tree, because capturing parentheses +// may have been duplicated or removed.  For example, the simplified form +// for /(x){1,2}/ is /(x)(x)?/ but both parentheses capture as $1. +// The returned regexp may share structure with or be the original. +func (re *Regexp) Simplify() *Regexp { +	if re == nil { +		return nil +	} +	switch re.Op { +	case OpCapture, OpConcat, OpAlternate: +		// Simplify children, building new Regexp if children change. +		nre := re +		for i, sub := range re.Sub { +			nsub := sub.Simplify() +			if nre == re && nsub != sub { +				// Start a copy. +				nre = new(Regexp) +				*nre = *re +				nre.Rune = nil +				nre.Sub = append(nre.Sub0[:0], re.Sub[:i]...) +			} +			if nre != re { +				nre.Sub = append(nre.Sub, nsub) +			} +		} +		return nre + +	case OpStar, OpPlus, OpQuest: +		sub := re.Sub[0].Simplify() +		return simplify1(re.Op, re.Flags, sub, re) + +	case OpRepeat: +		// Special special case: x{0} matches the empty string +		// and doesn't even need to consider x. +		if re.Min == 0 && re.Max == 0 { +			return &Regexp{Op: OpEmptyMatch} +		} + +		// The fun begins. +		sub := re.Sub[0].Simplify() + +		// x{n,} means at least n matches of x. +		if re.Max == -1 { +			// Special case: x{0,} is x*. +			if re.Min == 0 { +				return simplify1(OpStar, re.Flags, sub, nil) +			} + +			// Special case: x{1,} is x+. +			if re.Min == 1 { +				return simplify1(OpPlus, re.Flags, sub, nil) +			} + +			// General case: x{4,} is xxxx+. +			nre := &Regexp{Op: OpConcat} +			nre.Sub = nre.Sub0[:0] +			for i := 0; i < re.Min-1; i++ { +				nre.Sub = append(nre.Sub, sub) +			} +			nre.Sub = append(nre.Sub, simplify1(OpPlus, re.Flags, sub, nil)) +			return nre +		} + +		// Special case x{0} handled above. + +		// Special case: x{1} is just x. +		if re.Min == 1 && re.Max == 1 { +			return sub +		} + +		// General case: x{n,m} means n copies of x and m copies of x? +		// The machine will do less work if we nest the final m copies, +		// so that x{2,5} = xx(x(x(x)?)?)? + +		// Build leading prefix: xx. +		var prefix *Regexp +		if re.Min > 0 { +			prefix = &Regexp{Op: OpConcat} +			prefix.Sub = prefix.Sub0[:0] +			for i := 0; i < re.Min; i++ { +				prefix.Sub = append(prefix.Sub, sub) +			} +		} + +		// Build and attach suffix: (x(x(x)?)?)? +		if re.Max > re.Min { +			suffix := simplify1(OpQuest, re.Flags, sub, nil) +			for i := re.Min + 1; i < re.Max; i++ { +				nre2 := &Regexp{Op: OpConcat} +				nre2.Sub = append(nre2.Sub0[:0], sub, suffix) +				suffix = simplify1(OpQuest, re.Flags, nre2, nil) +			} +			if prefix == nil { +				return suffix +			} +			prefix.Sub = append(prefix.Sub, suffix) +		} +		if prefix != nil { +			return prefix +		} + +		// Some degenerate case like min > max or min < max < 0. +		// Handle as impossible match. +		return &Regexp{Op: OpNoMatch} +	} + +	return re +} + +// simplify1 implements Simplify for the unary OpStar, +// OpPlus, and OpQuest operators.  It returns the simple regexp +// equivalent to +// +//	Regexp{Op: op, Flags: flags, Sub: {sub}} +// +// under the assumption that sub is already simple, and +// without first allocating that structure.  If the regexp +// to be returned turns out to be equivalent to re, simplify1 +// returns re instead. +// +// simplify1 is factored out of Simplify because the implementation +// for other operators generates these unary expressions. +// Letting them call simplify1 makes sure the expressions they +// generate are simple. +func simplify1(op Op, flags Flags, sub, re *Regexp) *Regexp { +	// Special case: repeat the empty string as much as +	// you want, but it's still the empty string. +	if sub.Op == OpEmptyMatch { +		return sub +	} +	// The operators are idempotent if the flags match. +	if op == sub.Op && flags&NonGreedy == sub.Flags&NonGreedy { +		return sub +	} +	if re != nil && re.Op == op && re.Flags&NonGreedy == flags&NonGreedy && sub == re.Sub[0] { +		return re +	} + +	re = &Regexp{Op: op, Flags: flags} +	re.Sub = append(re.Sub0[:0], sub) +	return re +} diff --git a/src/pkg/exp/regexp/syntax/simplify_test.go b/src/pkg/exp/regexp/syntax/simplify_test.go new file mode 100644 index 000000000..c8cec2183 --- /dev/null +++ b/src/pkg/exp/regexp/syntax/simplify_test.go @@ -0,0 +1,151 @@ +// Copyright 2011 The Go Authors.  All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syntax + +import "testing" + +var simplifyTests = []struct { +	Regexp string +	Simple string +}{ +	// Already-simple constructs +	{`a`, `a`}, +	{`ab`, `ab`}, +	{`a|b`, `[a-b]`}, +	{`ab|cd`, `ab|cd`}, +	{`(ab)*`, `(ab)*`}, +	{`(ab)+`, `(ab)+`}, +	{`(ab)?`, `(ab)?`}, +	{`.`, `.`}, +	{`^`, `^`}, +	{`$`, `$`}, +	{`[ac]`, `[ac]`}, +	{`[^ac]`, `[^ac]`}, + +	// Posix character classes +	{`[[:alnum:]]`, `[0-9A-Za-z]`}, +	{`[[:alpha:]]`, `[A-Za-z]`}, +	{`[[:blank:]]`, `[\t ]`}, +	{`[[:cntrl:]]`, `[\x00-\x1f\x7f]`}, +	{`[[:digit:]]`, `[0-9]`}, +	{`[[:graph:]]`, `[!-~]`}, +	{`[[:lower:]]`, `[a-z]`}, +	{`[[:print:]]`, `[ -~]`}, +	{`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"}, +	{`[[:space:]]`, `[\t-\r ]`}, +	{`[[:upper:]]`, `[A-Z]`}, +	{`[[:xdigit:]]`, `[0-9A-Fa-f]`}, + +	// Perl character classes +	{`\d`, `[0-9]`}, +	{`\s`, `[\t-\n\f-\r ]`}, +	{`\w`, `[0-9A-Z_a-z]`}, +	{`\D`, `[^0-9]`}, +	{`\S`, `[^\t-\n\f-\r ]`}, +	{`\W`, `[^0-9A-Z_a-z]`}, +	{`[\d]`, `[0-9]`}, +	{`[\s]`, `[\t-\n\f-\r ]`}, +	{`[\w]`, `[0-9A-Z_a-z]`}, +	{`[\D]`, `[^0-9]`}, +	{`[\S]`, `[^\t-\n\f-\r ]`}, +	{`[\W]`, `[^0-9A-Z_a-z]`}, + +	// Posix repetitions +	{`a{1}`, `a`}, +	{`a{2}`, `aa`}, +	{`a{5}`, `aaaaa`}, +	{`a{0,1}`, `a?`}, +	// The next three are illegible because Simplify inserts (?:) +	// parens instead of () parens to avoid creating extra +	// captured subexpressions.  The comments show a version with fewer parens. +	{`(a){0,2}`, `(?:(a)(a)?)?`},                       //       (aa?)? +	{`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`},       //   (a(a(aa?)?)?)? +	{`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? +	{`a{0,2}`, `(?:aa?)?`},                             //       (aa?)? +	{`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`},                 //   (a(a(aa?)?)?)? +	{`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`},               // aa(a(a(aa?)?)?)? +	{`a{0,}`, `a*`}, +	{`a{1,}`, `a+`}, +	{`a{2,}`, `aa+`}, +	{`a{5,}`, `aaaaa+`}, + +	// Test that operators simplify their arguments. +	{`(?:a{1,}){1,}`, `a+`}, +	{`(a{1,}b{1,})`, `(a+b+)`}, +	{`a{1,}|b{1,}`, `a+|b+`}, +	{`(?:a{1,})*`, `(?:a+)*`}, +	{`(?:a{1,})+`, `a+`}, +	{`(?:a{1,})?`, `(?:a+)?`}, +	{``, `(?:)`}, +	{`a{0}`, `(?:)`}, + +	// Character class simplification +	{`[ab]`, `[a-b]`}, +	{`[a-za-za-z]`, `[a-z]`}, +	{`[A-Za-zA-Za-z]`, `[A-Za-z]`}, +	{`[ABCDEFGH]`, `[A-H]`}, +	{`[AB-CD-EF-GH]`, `[A-H]`}, +	{`[W-ZP-XE-R]`, `[E-Z]`}, +	{`[a-ee-gg-m]`, `[a-m]`}, +	{`[a-ea-ha-m]`, `[a-m]`}, +	{`[a-ma-ha-e]`, `[a-m]`}, +	{`[a-zA-Z0-9 -~]`, `[ -~]`}, + +	// Empty character classes +	{`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`}, + +	// Full character classes +	{`[[:cntrl:][:^cntrl:]]`, `.`}, + +	// Unicode case folding. +	{`(?i)A`, `(?i:A)`}, +	{`(?i)a`, `(?i:a)`}, +	{`(?i)[A]`, `(?i:A)`}, +	{`(?i)[a]`, `(?i:A)`}, +	{`(?i)K`, `(?i:K)`}, +	{`(?i)k`, `(?i:k)`}, +	{`(?i)\x{212a}`, "(?i:\u212A)"}, +	{`(?i)[K]`, "[Kk\u212A]"}, +	{`(?i)[k]`, "[Kk\u212A]"}, +	{`(?i)[\x{212a}]`, "[Kk\u212A]"}, +	{`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"}, +	{`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"}, +	{`(?i)[\x00-\x{10FFFF}]`, `.`}, + +	// Empty string as a regular expression. +	// The empty string must be preserved inside parens in order +	// to make submatches work right, so these tests are less +	// interesting than they might otherwise be.  String inserts +	// explicit (?:) in place of non-parenthesized empty strings, +	// to make them easier to spot for other parsers. +	{`(a|b|)`, `([a-b]|(?:))`}, +	{`(|)`, `()`}, +	{`a()`, `a()`}, +	{`(()|())`, `(()|())`}, +	{`(a|)`, `(a|(?:))`}, +	{`ab()cd()`, `ab()cd()`}, +	{`()`, `()`}, +	{`()*`, `()*`}, +	{`()+`, `()+`}, +	{`()?`, `()?`}, +	{`(){0}`, `(?:)`}, +	{`(){1}`, `()`}, +	{`(){1,}`, `()+`}, +	{`(){0,2}`, `(?:()()?)?`}, +} + +func TestSimplify(t *testing.T) { +	for _, tt := range simplifyTests { +		re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine) +		if err != nil { +			t.Errorf("Parse(%#q) = error %v", tt.Regexp, err) +			continue +		} +		s := re.Simplify().String() +		if s != tt.Simple { +			t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple) +		} +	} +} diff --git a/src/pkg/exp/template/Makefile b/src/pkg/exp/template/Makefile index ab9832f61..8550b0d52 100644 --- a/src/pkg/exp/template/Makefile +++ b/src/pkg/exp/template/Makefile @@ -4,9 +4,12 @@  include ../../../Make.inc -TARG=template +TARG=exp/template  GOFILES=\ +	exec.go\ +	funcs.go\  	lex.go\  	parse.go\ +	set.go\  include ../../../Make.pkg diff --git a/src/pkg/exp/template/exec.go b/src/pkg/exp/template/exec.go new file mode 100644 index 000000000..fb0a9e621 --- /dev/null +++ b/src/pkg/exp/template/exec.go @@ -0,0 +1,508 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( +	"fmt" +	"io" +	"os" +	"reflect" +	"strings" +	"unicode" +	"utf8" +) + +// state represents the state of an execution. It's not part of the +// template so that multiple executions of the same template +// can execute in parallel. +type state struct { +	tmpl *Template +	wr   io.Writer +	set  *Set +	line int // line number for errors +} + +// errorf formats the error and terminates processing. +func (s *state) errorf(format string, args ...interface{}) { +	format = fmt.Sprintf("template: %s:%d: %s", s.tmpl.name, s.line, format) +	panic(fmt.Errorf(format, args...)) +} + +// error terminates processing. +func (s *state) error(err os.Error) { +	s.errorf("%s", err) +} + +// Execute applies a parsed template to the specified data object, +// writing the output to wr. +func (t *Template) Execute(wr io.Writer, data interface{}) os.Error { +	return t.ExecuteInSet(wr, data, nil) +} + +// ExecuteInSet applies a parsed template to the specified data object, +// writing the output to wr. Nested template invocations will be resolved +// from the specified set. +func (t *Template) ExecuteInSet(wr io.Writer, data interface{}, set *Set) (err os.Error) { +	defer t.recover(&err) +	state := &state{ +		tmpl: t, +		wr:   wr, +		set:  set, +		line: 1, +	} +	if t.root == nil { +		state.errorf("must be parsed before execution") +	} +	state.walk(reflect.ValueOf(data), t.root) +	return +} + +// Walk functions step through the major pieces of the template structure, +// generating output as they go. +func (s *state) walk(data reflect.Value, n node) { +	switch n := n.(type) { +	case *actionNode: +		s.line = n.line +		s.printValue(n, s.evalPipeline(data, n.pipeline)) +	case *listNode: +		for _, node := range n.nodes { +			s.walk(data, node) +		} +	case *ifNode: +		s.line = n.line +		s.walkIfOrWith(nodeIf, data, n.pipeline, n.list, n.elseList) +	case *rangeNode: +		s.line = n.line +		s.walkRange(data, n) +	case *textNode: +		if _, err := s.wr.Write(n.text); err != nil { +			s.error(err) +		} +	case *templateNode: +		s.line = n.line +		s.walkTemplate(data, n) +	case *withNode: +		s.line = n.line +		s.walkIfOrWith(nodeWith, data, n.pipeline, n.list, n.elseList) +	default: +		s.errorf("unknown node: %s", n) +	} +} + +// walkIfOrWith walks an 'if' or 'with' node. The two control structures +// are identical in behavior except that 'with' sets dot. +func (s *state) walkIfOrWith(typ nodeType, data reflect.Value, pipe []*commandNode, list, elseList *listNode) { +	val := s.evalPipeline(data, pipe) +	truth, ok := isTrue(val) +	if !ok { +		s.errorf("if/with can't use value of type %T", val.Interface()) +	} +	if truth { +		if typ == nodeWith { +			data = val +		} +		s.walk(data, list) +	} else if elseList != nil { +		s.walk(data, elseList) +	} +} + +// isTrue returns whether the value is 'true', in the sense of not the zero of its type, +// and whether the value has a meaningful truth value. +func isTrue(val reflect.Value) (truth, ok bool) { +	switch val.Kind() { +	case reflect.Array, reflect.Map, reflect.Slice, reflect.String: +		truth = val.Len() > 0 +	case reflect.Bool: +		truth = val.Bool() +	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: +		truth = val.Int() != 0 +	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: +		truth = val.Uint() != 0 +	case reflect.Float32, reflect.Float64: +		truth = val.Float() != 0 +	case reflect.Complex64, reflect.Complex128: +		truth = val.Complex() != 0 +	case reflect.Chan, reflect.Func, reflect.Ptr: +		truth = !val.IsNil() +	default: +		return +	} +	return truth, true +} + +func (s *state) walkRange(data reflect.Value, r *rangeNode) { +	val := s.evalPipeline(data, r.pipeline) +	switch val.Kind() { +	case reflect.Array, reflect.Slice: +		if val.Len() == 0 { +			break +		} +		for i := 0; i < val.Len(); i++ { +			s.walk(val.Index(i), r.list) +		} +		return +	case reflect.Map: +		if val.Len() == 0 { +			break +		} +		for _, key := range val.MapKeys() { +			s.walk(val.MapIndex(key), r.list) +		} +		return +	default: +		s.errorf("range can't iterate over value of type %T", val.Interface()) +	} +	if r.elseList != nil { +		s.walk(data, r.elseList) +	} +} + +func (s *state) walkTemplate(data reflect.Value, t *templateNode) { +	name := s.evalArg(data, reflect.TypeOf("string"), t.name).String() +	if s.set == nil { +		s.errorf("no set defined in which to invoke template named %q", name) +	} +	tmpl := s.set.tmpl[name] +	if tmpl == nil { +		s.errorf("template %q not in set", name) +	} +	data = s.evalPipeline(data, t.pipeline) +	newState := *s +	newState.tmpl = tmpl +	newState.walk(data, tmpl.root) +} + +// Eval functions evaluate pipelines, commands, and their elements and extract +// values from the data structure by examining fields, calling methods, and so on. +// The printing of those values happens only through walk functions. + +func (s *state) evalPipeline(data reflect.Value, pipe []*commandNode) reflect.Value { +	value := reflect.Value{} +	for _, cmd := range pipe { +		value = s.evalCommand(data, cmd, value) // previous value is this one's final arg. +		// If the object has type interface{}, dig down one level to the thing inside. +		if value.Kind() == reflect.Interface && value.Type().NumMethod() == 0 { +			value = reflect.ValueOf(value.Interface()) // lovely! +		} +	} +	return value +} + +func (s *state) evalCommand(data reflect.Value, cmd *commandNode, final reflect.Value) reflect.Value { +	firstWord := cmd.args[0] +	switch n := firstWord.(type) { +	case *fieldNode: +		return s.evalFieldNode(data, n, cmd.args, final) +	case *identifierNode: +		return s.evalFieldOrCall(data, n.ident, cmd.args, final) +	} +	if len(cmd.args) > 1 || final.IsValid() { +		s.errorf("can't give argument to non-function %s", cmd.args[0]) +	} +	switch word := cmd.args[0].(type) { +	case *dotNode: +		return data +	case *boolNode: +		return reflect.ValueOf(word.true) +	case *numberNode: +		// These are ideal constants but we don't know the type +		// and we have no context.  (If it was a method argument, +		// we'd know what we need.) The syntax guides us to some extent. +		switch { +		case word.isComplex: +			return reflect.ValueOf(word.complex128) // incontrovertible. +		case word.isFloat && strings.IndexAny(word.text, ".eE") >= 0: +			return reflect.ValueOf(word.float64) +		case word.isInt: +			return reflect.ValueOf(word.int64) +		case word.isUint: +			return reflect.ValueOf(word.uint64) +		} +	case *stringNode: +		return reflect.ValueOf(word.text) +	} +	s.errorf("can't handle command %q", firstWord) +	panic("not reached") +} + +func (s *state) evalFieldNode(data reflect.Value, field *fieldNode, args []node, final reflect.Value) reflect.Value { +	// Up to the last entry, it must be a field. +	n := len(field.ident) +	for i := 0; i < n-1; i++ { +		data = s.evalField(data, field.ident[i]) +	} +	// Now it can be a field or method and if a method, gets arguments. +	return s.evalFieldOrCall(data, field.ident[n-1], args, final) +} + +// Is this an exported - upper case - name? +func isExported(name string) bool { +	rune, _ := utf8.DecodeRuneInString(name) +	return unicode.IsUpper(rune) +} + +func (s *state) evalField(data reflect.Value, fieldName string) reflect.Value { +	var isNil bool +	if data, isNil = indirect(data); isNil { +		s.errorf("%s is nil pointer", fieldName) +	} +	switch data.Kind() { +	case reflect.Struct: +		// Is it a field? +		field := data.FieldByName(fieldName) +		// TODO: look higher up the tree if we can't find it here. Also unexported fields +		// might succeed higher up, as map keys. +		if field.IsValid() && isExported(fieldName) { // valid and exported +			return field +		} +		s.errorf("%s has no exported field %q", data.Type(), fieldName) +	default: +		s.errorf("can't evaluate field %s of type %s", fieldName, data.Type()) +	} +	panic("not reached") +} + +func (s *state) evalFieldOrCall(data reflect.Value, fieldName string, args []node, final reflect.Value) reflect.Value { +	// Is it a function? +	if function, ok := findFunction(fieldName, s.tmpl, s.set); ok { +		return s.evalCall(data, function, fieldName, false, args, final) +	} +	ptr := data +	for data.Kind() == reflect.Ptr && !data.IsNil() { +		ptr, data = data, reflect.Indirect(data) +	} +	// Is it a method? We use the pointer because it has value methods too. +	if method, ok := methodByName(ptr.Type(), fieldName); ok { +		return s.evalCall(ptr, method.Func, fieldName, true, args, final) +	} +	if len(args) > 1 || final.IsValid() { +		s.errorf("%s is not a method but has arguments", fieldName) +	} +	switch data.Kind() { +	case reflect.Struct: +		return s.evalField(data, fieldName) +	default: +		s.errorf("can't handle evaluation of field %s of type %s", fieldName, data.Type()) +	} +	panic("not reached") +} + +// TODO: delete when reflect's own MethodByName is released. +func methodByName(typ reflect.Type, name string) (reflect.Method, bool) { +	for i := 0; i < typ.NumMethod(); i++ { +		if typ.Method(i).Name == name { +			return typ.Method(i), true +		} +	} +	return reflect.Method{}, false +} + +var ( +	osErrorType = reflect.TypeOf(new(os.Error)).Elem() +) + +func (s *state) evalCall(v, fun reflect.Value, name string, isMethod bool, args []node, final reflect.Value) reflect.Value { +	typ := fun.Type() +	if !isMethod && len(args) > 0 { // Args will be nil if it's a niladic call in an argument list +		args = args[1:] // first arg is name of function; not used in call. +	} +	numIn := len(args) +	if final.IsValid() { +		numIn++ +	} +	numFixed := len(args) +	if typ.IsVariadic() { +		numFixed = typ.NumIn() - 1 // last arg is the variadic one. +		if numIn < numFixed { +			s.errorf("wrong number of args for %s: want at least %d got %d", name, typ.NumIn()-1, len(args)) +		} +	} else if numIn < typ.NumIn()-1 || !typ.IsVariadic() && numIn != typ.NumIn() { +		s.errorf("wrong number of args for %s: want %d got %d", name, typ.NumIn(), len(args)) +	} +	if !goodFunc(typ) { +		s.errorf("can't handle multiple results from method/function %q", name) +	} +	// Build the arg list. +	argv := make([]reflect.Value, numIn) +	// First arg is the receiver. +	i := 0 +	if isMethod { +		argv[0] = v +		i++ +	} +	// Others must be evaluated. Fixed args first. +	for ; i < numFixed; i++ { +		argv[i] = s.evalArg(v, typ.In(i), args[i]) +	} +	// And now the ... args. +	if typ.IsVariadic() { +		argType := typ.In(typ.NumIn() - 1).Elem() // Argument is a slice. +		for ; i < len(args); i++ { +			argv[i] = s.evalArg(v, argType, args[i]) +		} +	} +	// Add final value if necessary. +	if final.IsValid() { +		argv[len(args)] = final +	} +	result := fun.Call(argv) +	// If we have an os.Error that is not nil, stop execution and return that error to the caller. +	if len(result) == 2 && !result[1].IsNil() { +		s.error(result[1].Interface().(os.Error)) +	} +	return result[0] +} + +func (s *state) evalArg(data reflect.Value, typ reflect.Type, n node) reflect.Value { +	if field, ok := n.(*fieldNode); ok { +		value := s.evalFieldNode(data, field, []node{n}, reflect.Value{}) +		if !value.Type().AssignableTo(typ) { +			s.errorf("wrong type for value; expected %s; got %s", typ, value.Type()) +		} +		return value +	} +	switch typ.Kind() { +	case reflect.Bool: +		return s.evalBool(typ, n) +	case reflect.String: +		return s.evalString(typ, n) +	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: +		return s.evalInteger(typ, n) +	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: +		return s.evalUnsignedInteger(typ, n) +	case reflect.Float32, reflect.Float64: +		return s.evalFloat(typ, n) +	case reflect.Complex64, reflect.Complex128: +		return s.evalComplex(typ, n) +	case reflect.Interface: +		if typ.NumMethod() == 0 { +			return s.evalEmptyInterface(data, typ, n) +		} +	} +	s.errorf("can't handle %s for arg of type %s", n, typ) +	panic("not reached") +} + +func (s *state) evalBool(typ reflect.Type, n node) reflect.Value { +	if n, ok := n.(*boolNode); ok { +		value := reflect.New(typ).Elem() +		value.SetBool(n.true) +		return value +	} +	s.errorf("expected bool; found %s", n) +	panic("not reached") +} + +func (s *state) evalString(typ reflect.Type, n node) reflect.Value { +	if n, ok := n.(*stringNode); ok { +		value := reflect.New(typ).Elem() +		value.SetString(n.text) +		return value +	} +	s.errorf("expected string; found %s", n) +	panic("not reached") +} + +func (s *state) evalInteger(typ reflect.Type, n node) reflect.Value { +	if n, ok := n.(*numberNode); ok && n.isInt { +		value := reflect.New(typ).Elem() +		value.SetInt(n.int64) +		return value +	} +	s.errorf("expected integer; found %s", n) +	panic("not reached") +} + +func (s *state) evalUnsignedInteger(typ reflect.Type, n node) reflect.Value { +	if n, ok := n.(*numberNode); ok && n.isUint { +		value := reflect.New(typ).Elem() +		value.SetUint(n.uint64) +		return value +	} +	s.errorf("expected unsigned integer; found %s", n) +	panic("not reached") +} + +func (s *state) evalFloat(typ reflect.Type, n node) reflect.Value { +	if n, ok := n.(*numberNode); ok && n.isFloat { +		value := reflect.New(typ).Elem() +		value.SetFloat(n.float64) +		return value +	} +	s.errorf("expected float; found %s", n) +	panic("not reached") +} + +func (s *state) evalComplex(typ reflect.Type, n node) reflect.Value { +	if n, ok := n.(*numberNode); ok && n.isComplex { +		value := reflect.New(typ).Elem() +		value.SetComplex(n.complex128) +		return value +	} +	s.errorf("expected complex; found %s", n) +	panic("not reached") +} + +func (s *state) evalEmptyInterface(data reflect.Value, typ reflect.Type, n node) reflect.Value { +	switch n := n.(type) { +	case *boolNode: +		return reflect.ValueOf(n.true) +	case *dotNode: +		return data +	case *fieldNode: +		return s.evalFieldNode(data, n, nil, reflect.Value{}) +	case *identifierNode: +		return s.evalFieldOrCall(data, n.ident, nil, reflect.Value{}) +	case *numberNode: +		if n.isComplex { +			return reflect.ValueOf(n.complex128) +		} +		if n.isInt { +			return reflect.ValueOf(n.int64) +		} +		if n.isUint { +			return reflect.ValueOf(n.uint64) +		} +		if n.isFloat { +			return reflect.ValueOf(n.float64) +		} +	case *stringNode: +		return reflect.ValueOf(n.text) +	} +	s.errorf("can't handle assignment of %s to empty interface argument", n) +	panic("not reached") +} + +// indirect returns the item at the end of indirection, and a bool to indicate if it's nil. +func indirect(v reflect.Value) (rv reflect.Value, isNil bool) { +	for v.Kind() == reflect.Ptr { +		if v.IsNil() { +			return v, true +		} +		v = v.Elem() +	} +	return v, false +} + +// printValue writes the textual representation of the value to the output of +// the template. +func (s *state) printValue(n node, v reflect.Value) { +	if !v.IsValid() { +		fmt.Fprint(s.wr, "<no value>") +		return +	} +	switch v.Kind() { +	case reflect.Ptr: +		var isNil bool +		if v, isNil = indirect(v); isNil { +			fmt.Fprint(s.wr, "<nil>") +			return +		} +	case reflect.Chan, reflect.Func, reflect.Interface: +		s.errorf("can't print %s of type %s", n, v.Type()) +	} +	fmt.Fprint(s.wr, v.Interface()) +} diff --git a/src/pkg/exp/template/exec_test.go b/src/pkg/exp/template/exec_test.go new file mode 100644 index 000000000..86b958e84 --- /dev/null +++ b/src/pkg/exp/template/exec_test.go @@ -0,0 +1,342 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( +	"bytes" +	"fmt" +	"os" +	"sort" +	"strings" +	"testing" +) + +// T has lots of interesting pieces to use to test execution. +type T struct { +	// Basics +	I           int +	U16         uint16 +	X           string +	FloatZero   float64 +	ComplexZero float64 +	// Nested structs. +	U *U +	// Slices +	SI      []int +	SIEmpty []int +	SB      []bool +	// Maps +	MSI      map[string]int +	MSIone   map[string]int // one element, for deterministic output +	MSIEmpty map[string]int +	SMSI     []map[string]int +	// Empty interfaces; used to see if we can dig inside one. +	Empty0 interface{} // nil +	Empty1 interface{} +	Empty2 interface{} +	Empty3 interface{} +	Empty4 interface{} +	// Pointers +	PI  *int +	PSI *[]int +	NIL *int +} + +var tVal = &T{ +	I:      17, +	U16:    16, +	X:      "x", +	U:      &U{"v"}, +	SI:     []int{3, 4, 5}, +	SB:     []bool{true, false}, +	MSI:    map[string]int{"one": 1, "two": 2, "three": 3}, +	MSIone: map[string]int{"one": 1}, +	SMSI: []map[string]int{ +		{"one": 1, "two": 2}, +		{"eleven": 11, "twelve": 12}, +	}, +	Empty1: 3, +	Empty2: "empty2", +	Empty3: []int{7, 8}, +	Empty4: &U{"v"}, +	PI:     newInt(23), +	PSI:    newIntSlice(21, 22, 23), +} + +// Helpers for creation. +func newInt(n int) *int { +	p := new(int) +	*p = n +	return p +} + +func newIntSlice(n ...int) *[]int { +	p := new([]int) +	*p = make([]int, len(n)) +	copy(*p, n) +	return p +} + +// Simple methods with and without arguments. +func (t *T) Method0() string { +	return "resultOfMethod0" +} + +func (t *T) Method1(a int) int { +	return a +} + +func (t *T) Method2(a uint16, b string) string { +	return fmt.Sprintf("Method2: %d %s", a, b) +} + +func (t *T) MAdd(a int, b []int) []int { +	v := make([]int, len(b)) +	for i, x := range b { +		v[i] = x + a +	} +	return v +} + +// MSort is used to sort map keys for stable output. (Nice trick!) +func (t *T) MSort(m map[string]int) []string { +	keys := make([]string, len(m)) +	i := 0 +	for k := range m { +		keys[i] = k +		i++ +	} +	sort.Strings(keys) +	return keys +} + +// EPERM returns a value and an os.Error according to its argument. +func (t *T) EPERM(error bool) (bool, os.Error) { +	if error { +		return true, os.EPERM +	} +	return false, nil +} + +type U struct { +	V string +} + +type execTest struct { +	name   string +	input  string +	output string +	data   interface{} +	ok     bool +} + +var execTests = []execTest{ +	// Trivial cases. +	{"empty", "", "", nil, true}, +	{"text", "some text", "some text", nil, true}, + +	// Fields of structs. +	{".X", "-{{.X}}-", "-x-", tVal, true}, +	{".U.V", "-{{.U.V}}-", "-v-", tVal, true}, + +	// Dots of all kinds to test basic evaluation. +	{"dot int", "<{{.}}>", "<13>", 13, true}, +	{"dot uint", "<{{.}}>", "<14>", uint(14), true}, +	{"dot float", "<{{.}}>", "<15.1>", 15.1, true}, +	{"dot bool", "<{{.}}>", "<true>", true, true}, +	{"dot complex", "<{{.}}>", "<(16.2-17i)>", 16.2 - 17i, true}, +	{"dot string", "<{{.}}>", "<hello>", "hello", true}, +	{"dot slice", "<{{.}}>", "<[-1 -2 -3]>", []int{-1, -2, -3}, true}, +	{"dot map", "<{{.}}>", "<map[two:22 one:11]>", map[string]int{"one": 11, "two": 22}, true}, +	{"dot struct", "<{{.}}>", "<{7 seven}>", struct { +		a int +		b string +	}{7, "seven"}, true}, + +	// Pointers. +	{"*int", "{{.PI}}", "23", tVal, true}, +	{"*[]int", "{{.PSI}}", "[21 22 23]", tVal, true}, +	{"*[]int[1]", "{{index .PSI 1}}", "22", tVal, true}, +	{"NIL", "{{.NIL}}", "<nil>", tVal, true}, + +	// Emtpy interfaces holding values. +	{"empty nil", "{{.Empty0}}", "<no value>", tVal, true}, +	{"empty with int", "{{.Empty1}}", "3", tVal, true}, +	{"empty with string", "{{.Empty2}}", "empty2", tVal, true}, +	{"empty with slice", "{{.Empty3}}", "[7 8]", tVal, true}, +	{"empty with struct", "{{.Empty4}}", "{v}", tVal, true}, + +	// Method calls. +	{".Method0", "-{{.Method0}}-", "-resultOfMethod0-", tVal, true}, +	{".Method1(1234)", "-{{.Method1 1234}}-", "-1234-", tVal, true}, +	{".Method1(.I)", "-{{.Method1 .I}}-", "-17-", tVal, true}, +	{".Method2(3, .X)", "-{{.Method2 3 .X}}-", "-Method2: 3 x-", tVal, true}, +	{".Method2(.U16, `str`)", "-{{.Method2 .U16 `str`}}-", "-Method2: 16 str-", tVal, true}, + +	// Pipelines. +	{"pipeline", "-{{.Method0 | .Method2 .U16}}-", "-Method2: 16 resultOfMethod0-", tVal, true}, + +	// If. +	{"if true", "{{if true}}TRUE{{end}}", "TRUE", tVal, true}, +	{"if false", "{{if false}}TRUE{{else}}FALSE{{end}}", "FALSE", tVal, true}, +	{"if 1", "{{if 1}}NON-ZERO{{else}}ZERO{{end}}", "NON-ZERO", tVal, true}, +	{"if 0", "{{if 0}}NON-ZERO{{else}}ZERO{{end}}", "ZERO", tVal, true}, +	{"if 1.5", "{{if 1.5}}NON-ZERO{{else}}ZERO{{end}}", "NON-ZERO", tVal, true}, +	{"if 0.0", "{{if .FloatZero}}NON-ZERO{{else}}ZERO{{end}}", "ZERO", tVal, true}, +	{"if 1.5i", "{{if 1.5i}}NON-ZERO{{else}}ZERO{{end}}", "NON-ZERO", tVal, true}, +	{"if 0.0i", "{{if .ComplexZero}}NON-ZERO{{else}}ZERO{{end}}", "ZERO", tVal, true}, +	{"if emptystring", "{{if ``}}NON-EMPTY{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"if string", "{{if `notempty`}}NON-EMPTY{{else}}EMPTY{{end}}", "NON-EMPTY", tVal, true}, +	{"if emptyslice", "{{if .SIEmpty}}NON-EMPTY{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"if slice", "{{if .SI}}NON-EMPTY{{else}}EMPTY{{end}}", "NON-EMPTY", tVal, true}, +	{"if emptymap", "{{if .MSIEmpty}}NON-EMPTY{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"if map", "{{if .MSI}}NON-EMPTY{{else}}EMPTY{{end}}", "NON-EMPTY", tVal, true}, + +	// Printf. +	{"printf", `{{printf "hello, printf"}}`, "hello, printf", tVal, true}, +	{"printf int", `{{printf "%04x" 127}}`, "007f", tVal, true}, +	{"printf float", `{{printf "%g" 3.5}}`, "3.5", tVal, true}, +	{"printf complex", `{{printf "%g" 1+7i}}`, "(1+7i)", tVal, true}, +	{"printf string", `{{printf "%s" "hello"}}`, "hello", tVal, true}, +	{"printf function", `{{printf "%#q" gopher}}`, "`gopher`", tVal, true}, +	{"printf field", `{{printf "%s" .U.V}}`, "v", tVal, true}, +	{"printf method", `{{printf "%s" .Method0}}`, "resultOfMethod0", tVal, true}, +	{"printf lots", `{{printf "%d %s %g %s" 127 "hello" 7-3i .Method0}}`, "127 hello (7-3i) resultOfMethod0", tVal, true}, + +	// HTML. +	{"html", `{{html "<script>alert(\"XSS\");</script>"}}`, +		"<script>alert("XSS");</script>", nil, true}, +	{"html pipeline", `{{printf "<script>alert(\"XSS\");</script>" | html}}`, +		"<script>alert("XSS");</script>", nil, true}, + +	// JavaScript. +	{"js", `{{js .}}`, `It\'d be nice.`, `It'd be nice.`, true}, + +	// Booleans +	{"not", "{{not true}} {{not false}}", "false true", nil, true}, +	{"and", "{{and 0 0}} {{and 1 0}} {{and 0 1}} {{and 1 1}}", "false false false true", nil, true}, +	{"or", "{{or 0 0}} {{or 1 0}} {{or 0 1}} {{or 1 1}}", "false true true true", nil, true}, +	{"boolean if", "{{if and true 1 `hi`}}TRUE{{else}}FALSE{{end}}", "TRUE", tVal, true}, +	{"boolean if not", "{{if and true 1 `hi` | not}}TRUE{{else}}FALSE{{end}}", "FALSE", nil, true}, + +	// Indexing. +	{"slice[0]", "{{index .SI 0}}", "3", tVal, true}, +	{"slice[1]", "{{index .SI 1}}", "4", tVal, true}, +	{"slice[HUGE]", "{{index .SI 10}}", "", tVal, false}, +	{"slice[WRONG]", "{{index .SI `hello`}}", "", tVal, false}, +	{"map[one]", "{{index .MSI `one`}}", "1", tVal, true}, +	{"map[two]", "{{index .MSI `two`}}", "2", tVal, true}, +	{"map[NO]", "{{index .MSI `XXX`}}", "", tVal, false}, +	{"map[WRONG]", "{{index .MSI 10}}", "", tVal, false}, +	{"double index", "{{index .SMSI 1 `eleven`}}", "11", tVal, true}, + +	// With. +	{"with true", "{{with true}}{{.}}{{end}}", "true", tVal, true}, +	{"with false", "{{with false}}{{.}}{{else}}FALSE{{end}}", "FALSE", tVal, true}, +	{"with 1", "{{with 1}}{{.}}{{else}}ZERO{{end}}", "1", tVal, true}, +	{"with 0", "{{with 0}}{{.}}{{else}}ZERO{{end}}", "ZERO", tVal, true}, +	{"with 1.5", "{{with 1.5}}{{.}}{{else}}ZERO{{end}}", "1.5", tVal, true}, +	{"with 0.0", "{{with .FloatZero}}{{.}}{{else}}ZERO{{end}}", "ZERO", tVal, true}, +	{"with 1.5i", "{{with 1.5i}}{{.}}{{else}}ZERO{{end}}", "(0+1.5i)", tVal, true}, +	{"with 0.0i", "{{with .ComplexZero}}{{.}}{{else}}ZERO{{end}}", "ZERO", tVal, true}, +	{"with emptystring", "{{with ``}}{{.}}{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"with string", "{{with `notempty`}}{{.}}{{else}}EMPTY{{end}}", "notempty", tVal, true}, +	{"with emptyslice", "{{with .SIEmpty}}{{.}}{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"with slice", "{{with .SI}}{{.}}{{else}}EMPTY{{end}}", "[3 4 5]", tVal, true}, +	{"with emptymap", "{{with .MSIEmpty}}{{.}}{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"with map", "{{with .MSIone}}{{.}}{{else}}EMPTY{{end}}", "map[one:1]", tVal, true}, +	{"with empty interface, struct field", "{{with .Empty4}}{{.V}}{{end}}", "v", tVal, true}, + +	// Range. +	{"range []int", "{{range .SI}}-{{.}}-{{end}}", "-3--4--5-", tVal, true}, +	{"range empty no else", "{{range .SIEmpty}}-{{.}}-{{end}}", "", tVal, true}, +	{"range []int else", "{{range .SI}}-{{.}}-{{else}}EMPTY{{end}}", "-3--4--5-", tVal, true}, +	{"range empty else", "{{range .SIEmpty}}-{{.}}-{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"range []bool", "{{range .SB}}-{{.}}-{{end}}", "-true--false-", tVal, true}, +	{"range []int method", "{{range .SI | .MAdd .I}}-{{.}}-{{end}}", "-20--21--22-", tVal, true}, +	{"range map", "{{range .MSI | .MSort}}-{{.}}-{{end}}", "-one--three--two-", tVal, true}, +	{"range empty map no else", "{{range .MSIEmpty}}-{{.}}-{{end}}", "", tVal, true}, +	{"range map else", "{{range .MSI | .MSort}}-{{.}}-{{else}}EMPTY{{end}}", "-one--three--two-", tVal, true}, +	{"range empty map else", "{{range .MSIEmpty}}-{{.}}-{{else}}EMPTY{{end}}", "EMPTY", tVal, true}, +	{"range empty interface", "{{range .Empty3}}-{{.}}-{{else}}EMPTY{{end}}", "-7--8-", tVal, true}, + +	// Error handling. +	{"error method, error", "{{.EPERM true}}", "", tVal, false}, +	{"error method, no error", "{{.EPERM false}}", "false", tVal, true}, +} + +func gopher() string { +	return "gopher" +} + +func testExecute(execTests []execTest, set *Set, t *testing.T) { +	b := new(bytes.Buffer) +	funcs := FuncMap{"gopher": gopher} +	for _, test := range execTests { +		tmpl := New(test.name).Funcs(funcs) +		err := tmpl.Parse(test.input) +		if err != nil { +			t.Errorf("%s: parse error: %s", test.name, err) +			continue +		} +		b.Reset() +		err = tmpl.ExecuteInSet(b, test.data, set) +		switch { +		case !test.ok && err == nil: +			t.Errorf("%s: expected error; got none", test.name) +			continue +		case test.ok && err != nil: +			t.Errorf("%s: unexpected execute error: %s", test.name, err) +			continue +		case !test.ok && err != nil: +			// expected error, got one +			if *debug { +				fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err) +			} +		} +		result := b.String() +		if result != test.output { +			t.Errorf("%s: expected\n\t%q\ngot\n\t%q", test.name, test.output, result) +		} +	} +} + +func TestExecute(t *testing.T) { +	testExecute(execTests, nil, t) +} + +// Check that an error from a method flows back to the top. +func TestExecuteError(t *testing.T) { +	b := new(bytes.Buffer) +	tmpl := New("error") +	err := tmpl.Parse("{{.EPERM true}}") +	if err != nil { +		t.Fatalf("parse error: %s", err) +	} +	err = tmpl.Execute(b, tVal) +	if err == nil { +		t.Errorf("expected error; got none") +	} else if !strings.Contains(err.String(), os.EPERM.String()) { +		t.Errorf("expected os.EPERM; got %s", err) +	} +} + +func TestJSEscaping(t *testing.T) { +	testCases := []struct { +		in, exp string +	}{ +		{`a`, `a`}, +		{`'foo`, `\'foo`}, +		{`Go "jump" \`, `Go \"jump\" \\`}, +		{`Yukihiro says "今日は世界"`, `Yukihiro says \"今日は世界\"`}, +		{"unprintable \uFDFF", `unprintable \uFDFF`}, +	} +	for _, tc := range testCases { +		s := JSEscapeString(tc.in) +		if s != tc.exp { +			t.Errorf("JS escaping [%s] got [%s] want [%s]", tc.in, s, tc.exp) +		} +	} +} diff --git a/src/pkg/exp/template/funcs.go b/src/pkg/exp/template/funcs.go new file mode 100644 index 000000000..66be40fd4 --- /dev/null +++ b/src/pkg/exp/template/funcs.go @@ -0,0 +1,294 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( +	"bytes" +	"fmt" +	"io" +	"os" +	"reflect" +	"strings" +	"unicode" +	"utf8" +) + +// FuncMap is the type of the map defining the mapping from names to functions. +// Each function must have either a single return value, or two return values of +// which the second has type os.Error. +type FuncMap map[string]interface{} + +var funcs = map[string]reflect.Value{ +	"and":    reflect.ValueOf(and), +	"html":   reflect.ValueOf(HTMLEscaper), +	"index":  reflect.ValueOf(index), +	"js":     reflect.ValueOf(JSEscaper), +	"not":    reflect.ValueOf(not), +	"or":     reflect.ValueOf(or), +	"printf": reflect.ValueOf(fmt.Sprintf), +} + +// addFuncs adds to values the functions in funcs, converting them to reflect.Values. +func addFuncs(values map[string]reflect.Value, funcMap FuncMap) { +	for name, fn := range funcMap { +		v := reflect.ValueOf(fn) +		if v.Kind() != reflect.Func { +			panic("value for " + name + " not a function") +		} +		if !goodFunc(v.Type()) { +			panic(fmt.Errorf("can't handle multiple results from method/function %q", name)) +		} +		values[name] = v +	} +} + +// goodFunc checks that the function or method has the right result signature. +func goodFunc(typ reflect.Type) bool { +	// We allow functions with 1 result or 2 results where the second is an os.Error. +	switch { +	case typ.NumOut() == 1: +		return true +	case typ.NumOut() == 2 && typ.Out(1) == osErrorType: +		return true +	} +	return false +} + +// findFunction looks for a function in the template, set, and global map. +func findFunction(name string, tmpl *Template, set *Set) (reflect.Value, bool) { +	if tmpl != nil { +		if fn := tmpl.funcs[name]; fn.IsValid() { +			return fn, true +		} +	} +	if set != nil { +		if fn := set.funcs[name]; fn.IsValid() { +			return fn, true +		} +	} +	if fn := funcs[name]; fn.IsValid() { +		return fn, true +	} +	return reflect.Value{}, false +} + +// Indexing. + +// index returns the result of indexing its first argument by the following +// arguments.  Thus "index x 1 2 3" is, in Go syntax, x[1][2][3]. Each +// indexed item must be a map, slice, or array. +func index(item interface{}, indices ...interface{}) (interface{}, os.Error) { +	v := reflect.ValueOf(item) +	for _, i := range indices { +		index := reflect.ValueOf(i) +		var isNil bool +		if v, isNil = indirect(v); isNil { +			return nil, fmt.Errorf("index of nil pointer") +		} +		switch v.Kind() { +		case reflect.Array, reflect.Slice: +			var x int64 +			switch index.Kind() { +			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: +				x = index.Int() +			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: +				x = int64(index.Uint()) +			default: +				return nil, fmt.Errorf("cannot index slice/array with type %s", index.Type()) +			} +			if x < 0 || x >= int64(v.Len()) { +				return nil, fmt.Errorf("index out of range: %d", x) +			} +			v = v.Index(int(x)) +		case reflect.Map: +			if !index.Type().AssignableTo(v.Type().Key()) { +				return nil, fmt.Errorf("%s is not index type for %s", index.Type(), v.Type()) +			} +			v = v.MapIndex(index) +			if !v.IsValid() { +				return nil, fmt.Errorf("index %v not present in map", index.Interface()) +			} +		default: +			return nil, fmt.Errorf("can't index item of type %s", index.Type()) +		} +	} +	return v.Interface(), nil +} + +// Boolean logic. + +// and returns the Boolean AND of its arguments. +func and(arg0 interface{}, args ...interface{}) (truth bool) { +	truth, _ = isTrue(reflect.ValueOf(arg0)) +	for i := 0; truth && i < len(args); i++ { +		truth, _ = isTrue(reflect.ValueOf(args[i])) +	} +	return +} + +// or returns the Boolean OR of its arguments. +func or(arg0 interface{}, args ...interface{}) (truth bool) { +	truth, _ = isTrue(reflect.ValueOf(arg0)) +	for i := 0; !truth && i < len(args); i++ { +		truth, _ = isTrue(reflect.ValueOf(args[i])) +	} +	return +} + +// not returns the Boolean negation of its argument. +func not(arg interface{}) (truth bool) { +	truth, _ = isTrue(reflect.ValueOf(arg)) +	return !truth +} + +// HTML escaping. + +var ( +	htmlQuot = []byte(""") // shorter than """ +	htmlApos = []byte("'") // shorter than "'" +	htmlAmp  = []byte("&") +	htmlLt   = []byte("<") +	htmlGt   = []byte(">") +) + +// HTMLEscape writes to w the escaped HTML equivalent of the plain text data b. +func HTMLEscape(w io.Writer, b []byte) { +	last := 0 +	for i, c := range b { +		var html []byte +		switch c { +		case '"': +			html = htmlQuot +		case '\'': +			html = htmlApos +		case '&': +			html = htmlAmp +		case '<': +			html = htmlLt +		case '>': +			html = htmlGt +		default: +			continue +		} +		w.Write(b[last:i]) +		w.Write(html) +		last = i + 1 +	} +	w.Write(b[last:]) +} + +// HTMLEscapeString returns the escaped HTML equivalent of the plain text data s. +func HTMLEscapeString(s string) string { +	// Avoid allocation if we can. +	if strings.IndexAny(s, `'"&<>`) < 0 { +		return s +	} +	var b bytes.Buffer +	HTMLEscape(&b, []byte(s)) +	return b.String() +} + +// HTMLEscaper returns the escaped HTML equivalent of the textual +// representation of its arguments. +func HTMLEscaper(args ...interface{}) string { +	ok := false +	var s string +	if len(args) == 1 { +		s, ok = args[0].(string) +	} +	if !ok { +		s = fmt.Sprint(args...) +	} +	return HTMLEscapeString(s) +} + +// JavaScript escaping. + +var ( +	jsLowUni = []byte(`\u00`) +	hex      = []byte("0123456789ABCDEF") + +	jsBackslash = []byte(`\\`) +	jsApos      = []byte(`\'`) +	jsQuot      = []byte(`\"`) +) + + +// JSEscape writes to w the escaped JavaScript equivalent of the plain text data b. +func JSEscape(w io.Writer, b []byte) { +	last := 0 +	for i := 0; i < len(b); i++ { +		c := b[i] + +		if ' ' <= c && c < utf8.RuneSelf && c != '\\' && c != '"' && c != '\'' { +			// fast path: nothing to do +			continue +		} +		w.Write(b[last:i]) + +		if c < utf8.RuneSelf { +			// Quotes and slashes get quoted. +			// Control characters get written as \u00XX. +			switch c { +			case '\\': +				w.Write(jsBackslash) +			case '\'': +				w.Write(jsApos) +			case '"': +				w.Write(jsQuot) +			default: +				w.Write(jsLowUni) +				t, b := c>>4, c&0x0f +				w.Write(hex[t : t+1]) +				w.Write(hex[b : b+1]) +			} +		} else { +			// Unicode rune. +			rune, size := utf8.DecodeRune(b[i:]) +			if unicode.IsPrint(rune) { +				w.Write(b[i : i+size]) +			} else { +				// TODO(dsymonds): Do this without fmt? +				fmt.Fprintf(w, "\\u%04X", rune) +			} +			i += size - 1 +		} +		last = i + 1 +	} +	w.Write(b[last:]) +} + +// JSEscapeString returns the escaped JavaScript equivalent of the plain text data s. +func JSEscapeString(s string) string { +	// Avoid allocation if we can. +	if strings.IndexFunc(s, jsIsSpecial) < 0 { +		return s +	} +	var b bytes.Buffer +	JSEscape(&b, []byte(s)) +	return b.String() +} + +func jsIsSpecial(rune int) bool { +	switch rune { +	case '\\', '\'', '"': +		return true +	} +	return rune < ' ' || utf8.RuneSelf <= rune +} + +// JSEscaper returns the escaped JavaScript equivalent of the textual +// representation of its arguments. +func JSEscaper(args ...interface{}) string { +	ok := false +	var s string +	if len(args) == 1 { +		s, ok = args[0].(string) +	} +	if !ok { +		s = fmt.Sprint(args...) +	} +	return JSEscapeString(s) +} diff --git a/src/pkg/exp/template/lex.go b/src/pkg/exp/template/lex.go index 826d3eb88..d78152979 100644 --- a/src/pkg/exp/template/lex.go +++ b/src/pkg/exp/template/lex.go @@ -18,58 +18,71 @@ type item struct {  }  func (i item) String() string { -	switch i.typ { -	case itemEOF: +	switch { +	case i.typ == itemEOF:  		return "EOF" -	case itemError: +	case i.typ == itemError:  		return i.val -	} -	if len(i.val) > 10 { +	case i.typ > itemKeyword: +		return fmt.Sprintf("<%s>", i.val) +	case len(i.val) > 10:  		return fmt.Sprintf("%.10q...", i.val)  	}  	return fmt.Sprintf("%q", i.val)  } -// itemType identifies the type of lex item. +// itemType identifies the type of lex items.  type itemType int  const ( -	itemError itemType = iota // error occurred; value is text of error -	itemDot                   // the cursor, spelled '.'. +	itemError   itemType = iota // error occurred; value is text of error +	itemBool                    // boolean constant +	itemComplex                 // complex constant (1+2i); imaginary is just a number  	itemEOF -	itemElse       // else keyword -	itemEnd        // end keyword -	itemField      // alphanumeric identifier, starting with '.'. +	itemField      // alphanumeric identifier, starting with '.', possibly chained ('.x.y')  	itemIdentifier // alphanumeric identifier -	itemIf         // if keyword -	itemLeftMeta   // left meta-string -	itemNumber     // number +	itemLeftDelim  // left action delimiter +	itemNumber     // simple number, including imaginary  	itemPipe       // pipe symbol -	itemRange      // range keyword  	itemRawString  // raw quoted string (includes quotes) -	itemRightMeta  // right meta-string +	itemRightDelim // right action delimiter  	itemString     // quoted string (includes quotes)  	itemText       // plain text +	// Keywords appear after all the rest. +	itemKeyword  // used only to delimit the keywords +	itemDot      // the cursor, spelled '.'. +	itemDefine   // define keyword +	itemElse     // else keyword +	itemEnd      // end keyword +	itemIf       // if keyword +	itemRange    // range keyword +	itemTemplate // template keyword +	itemWith     // with keyword  )  // Make the types prettyprint.  var itemName = map[itemType]string{  	itemError:      "error", -	itemDot:        ".", +	itemBool:       "bool", +	itemComplex:    "complex",  	itemEOF:        "EOF", -	itemElse:       "else", -	itemEnd:        "end",  	itemField:      "field",  	itemIdentifier: "identifier", -	itemIf:         "if", -	itemLeftMeta:   "left meta", +	itemLeftDelim:  "left delim",  	itemNumber:     "number",  	itemPipe:       "pipe", -	itemRange:      "range",  	itemRawString:  "raw string", -	itemRightMeta:  "rightMeta", +	itemRightDelim: "right delim",  	itemString:     "string", -	itemText:       "text", +	// keywords +	itemDot:      ".", +	itemDefine:   "define", +	itemElse:     "else", +	itemIf:       "if", +	itemEnd:      "end", +	itemRange:    "range", +	itemTemplate: "template", +	itemWith:     "with",  }  func (i itemType) String() string { @@ -81,11 +94,14 @@ func (i itemType) String() string {  }  var key = map[string]itemType{ -	".":     itemDot, -	"else":  itemElse, -	"end":   itemEnd, -	"if":    itemIf, -	"range": itemRange, +	".":        itemDot, +	"define":   itemDefine, +	"else":     itemElse, +	"end":      itemEnd, +	"if":       itemIf, +	"range":    itemRange, +	"template": itemTemplate, +	"with":     itemWith,  }  const eof = -1 @@ -97,6 +113,7 @@ type stateFn func(*lexer) stateFn  type lexer struct {  	name  string    // the name of the input; used only for error reports.  	input string    // the string being scanned. +	state stateFn   // the next lexing function to enter  	pos   int       // current position in the input.  	start int       // start position of this item.  	width int       // width of last rune read from input. @@ -166,38 +183,47 @@ func (l *lexer) errorf(format string, args ...interface{}) stateFn {  	return nil  } -// run lexes the input by executing state functions until nil. -func (l *lexer) run() { -	for state := lexText; state != nil; { -		state = state(l) +// nextItem returns the next item from the input. +func (l *lexer) nextItem() item { +	for { +		select { +		case item := <-l.items: +			return item +		default: +			l.state = l.state(l) +		}  	} -	close(l.items) +	panic("not reached")  } -// lex launches a new scanner and returns the channel of items. -func lex(name, input string) (*lexer, chan item) { +// lex creates a new scanner for the input string. +func lex(name, input string) *lexer {  	l := &lexer{  		name:  name,  		input: input, -		items: make(chan item), +		state: lexText, +		items: make(chan item, 2), // Two items of buffering is sufficient for all state functions  	} -	go l.run() -	return l, l.items +	return l  }  // state functions -const leftMeta = "{{" -const rightMeta = "}}" +const ( +	leftDelim    = "{{" +	rightDelim   = "}}" +	leftComment  = "{{/*" +	rightComment = "*/}}" +) -// lexText scans until a metacharacter +// lexText scans until an opening action delimiter, "{{".  func lexText(l *lexer) stateFn {  	for { -		if strings.HasPrefix(l.input[l.pos:], leftMeta) { +		if strings.HasPrefix(l.input[l.pos:], leftDelim) {  			if l.pos > l.start {  				l.emit(itemText)  			} -			return lexLeftMeta +			return lexLeftDelim  		}  		if l.next() == eof {  			break @@ -211,28 +237,42 @@ func lexText(l *lexer) stateFn {  	return nil  } -// leftMeta scans the left "metacharacter", which is known to be present. -func lexLeftMeta(l *lexer) stateFn { -	l.pos += len(leftMeta) -	l.emit(itemLeftMeta) +// lexLeftDelim scans the left delimiter, which is known to be present. +func lexLeftDelim(l *lexer) stateFn { +	if strings.HasPrefix(l.input[l.pos:], leftComment) { +		return lexComment +	} +	l.pos += len(leftDelim) +	l.emit(itemLeftDelim)  	return lexInsideAction  } -// rightMeta scans the right "metacharacter", which is known to be present. -func lexRightMeta(l *lexer) stateFn { -	l.pos += len(rightMeta) -	l.emit(itemRightMeta) +// lexComment scans a comment. The left comment marker is known to be present. +func lexComment(l *lexer) stateFn { +	i := strings.Index(l.input[l.pos:], rightComment) +	if i < 0 { +		return l.errorf("unclosed comment") +	} +	l.pos += i + len(rightComment) +	l.ignore() +	return lexText +} + +// lexRightDelim scans the right delimiter, which is known to be present. +func lexRightDelim(l *lexer) stateFn { +	l.pos += len(rightDelim) +	l.emit(itemRightDelim)  	return lexText  } -// lexInsideAction scans the elements inside "metacharacters". +// lexInsideAction scans the elements inside action delimiters.  func lexInsideAction(l *lexer) stateFn {  	// Either number, quoted string, or identifier.  	// Spaces separate and are ignored.  	// Pipe symbols separate and are emitted.  	for { -		if strings.HasPrefix(l.input[l.pos:], rightMeta) { -			return lexRightMeta +		if strings.HasPrefix(l.input[l.pos:], rightDelim) { +			return lexRightDelim  		}  		switch r := l.next(); {  		case r == eof || r == '\n': @@ -273,15 +313,19 @@ Loop:  	for {  		switch r := l.next(); {  		case isAlphaNumeric(r): -			// absorb +			// absorb. +		case r == '.' && l.input[l.start] == '.': +			// field chaining; absorb into one token.  		default:  			l.backup()  			word := l.input[l.start:l.pos]  			switch { -			case key[word] != itemError: +			case key[word] > itemKeyword:  				l.emit(key[word])  			case word[0] == '.':  				l.emit(itemField) +			case word == "true", word == "false": +				l.emit(itemBool)  			default:  				l.emit(itemIdentifier)  			} @@ -295,8 +339,23 @@ Loop:  // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"  // and "089" - but when it's wrong the input is invalid and the parser (via  // strconv) will notice. -// TODO: without expressions you can do imaginary but not complex.  func lexNumber(l *lexer) stateFn { +	if !l.scanNumber() { +		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) +	} +	if sign := l.peek(); sign == '+' || sign == '-' { +		// Complex: 1+2i.  No spaces, must end in 'i'. +		if !l.scanNumber() || l.input[l.pos-1] != 'i' { +			return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) +		} +		l.emit(itemComplex) +	} else { +		l.emit(itemNumber) +	} +	return lexInsideAction +} + +func (l *lexer) scanNumber() bool {  	// Optional leading sign.  	l.accept("+-")  	// Is it hex? @@ -317,10 +376,9 @@ func lexNumber(l *lexer) stateFn {  	// Next thing mustn't be alphanumeric.  	if isAlphaNumeric(l.peek()) {  		l.next() -		return l.errorf("bad number syntax: %q", l.input[l.start:l.pos]) +		return false  	} -	l.emit(itemNumber) -	return lexInsideAction +	return true  }  // lexQuote scans a quoted string. diff --git a/src/pkg/exp/template/lex_test.go b/src/pkg/exp/template/lex_test.go index 184e833ef..256ec04d8 100644 --- a/src/pkg/exp/template/lex_test.go +++ b/src/pkg/exp/template/lex_test.go @@ -17,8 +17,8 @@ type lexTest struct {  var (  	tEOF      = item{itemEOF, ""} -	tLeft     = item{itemLeftMeta, "{{"} -	tRight    = item{itemRightMeta, "}}"} +	tLeft     = item{itemLeftDelim, "{{"} +	tRight    = item{itemRightDelim, "}}"}  	tRange    = item{itemRange, "range"}  	tPipe     = item{itemPipe, "|"}  	tFor      = item{itemIdentifier, "for"} @@ -31,11 +31,16 @@ var lexTests = []lexTest{  	{"empty", "", []item{tEOF}},  	{"spaces", " \t\n", []item{{itemText, " \t\n"}, tEOF}},  	{"text", `now is the time`, []item{{itemText, "now is the time"}, tEOF}}, +	{"text with comment", "hello-{{/* this is a comment */}}-world", []item{ +		{itemText, "hello-"}, +		{itemText, "-world"}, +		tEOF, +	}},  	{"empty action", `{{}}`, []item{tLeft, tRight, tEOF}},  	{"for", `{{for }}`, []item{tLeft, tFor, tRight, tEOF}},  	{"quote", `{{"abc \n\t\" "}}`, []item{tLeft, tQuote, tRight, tEOF}},  	{"raw quote", "{{" + raw + "}}", []item{tLeft, tRawQuote, tRight, tEOF}}, -	{"numbers", "{{1 02 0x14 -7.2i 1e3 +1.2e-4}}", []item{ +	{"numbers", "{{1 02 0x14 -7.2i 1e3 +1.2e-4 4.2i 1+2i}}", []item{  		tLeft,  		{itemNumber, "1"},  		{itemNumber, "02"}, @@ -43,25 +48,40 @@ var lexTests = []lexTest{  		{itemNumber, "-7.2i"},  		{itemNumber, "1e3"},  		{itemNumber, "+1.2e-4"}, +		{itemNumber, "4.2i"}, +		{itemComplex, "1+2i"},  		tRight,  		tEOF,  	}}, -	{"dots", "{{.x . .2 .x.y }}", []item{ +	{"bools", "{{true false}}", []item{ +		tLeft, +		{itemBool, "true"}, +		{itemBool, "false"}, +		tRight, +		tEOF, +	}}, +	{"dot", "{{.}}", []item{ +		tLeft, +		{itemDot, "."}, +		tRight, +		tEOF, +	}}, +	{"dots", "{{.x . .2 .x.y}}", []item{  		tLeft,  		{itemField, ".x"},  		{itemDot, "."},  		{itemNumber, ".2"}, -		{itemField, ".x"}, -		{itemField, ".y"}, +		{itemField, ".x.y"},  		tRight,  		tEOF,  	}}, -	{"keywords", "{{range if else end}}", []item{ +	{"keywords", "{{range if else end with}}", []item{  		tLeft,  		{itemRange, "range"},  		{itemIf, "if"},  		{itemElse, "else"},  		{itemEnd, "end"}, +		{itemWith, "with"},  		tRight,  		tEOF,  	}}, @@ -112,9 +132,13 @@ var lexTests = []lexTest{  // collect gathers the emitted items into a slice.  func collect(t *lexTest) (items []item) { -	_, tokens := lex(t.name, t.input) -	for i := range tokens { -		items = append(items, i) +	l := lex(t.name, t.input) +	for { +		item := l.nextItem() +		items = append(items, item) +		if item.typ == itemEOF || item.typ == itemError { +			break +		}  	}  	return  } diff --git a/src/pkg/exp/template/parse.go b/src/pkg/exp/template/parse.go index 57ddb0084..8b2d60207 100644 --- a/src/pkg/exp/template/parse.go +++ b/src/pkg/exp/template/parse.go @@ -8,17 +8,21 @@ import (  	"bytes"  	"fmt"  	"os" +	"reflect"  	"runtime"  	"strconv" +	"strings" +	"unicode"  )  // Template is the representation of a parsed template.  type Template struct { -	// TODO: At the moment, these are all internal to parsing. -	name     string -	root     *listNode +	name  string +	root  *listNode +	funcs map[string]reflect.Value +	// Parsing only; cleared after parse. +	set      *Set  	lex      *lexer -	tokens   chan item  	token    item // token lookahead for parser  	havePeek bool  } @@ -28,7 +32,7 @@ func (t *Template) next() item {  	if t.havePeek {  		t.havePeek = false  	} else { -		t.token = <-t.tokens +		t.token = t.lex.nextItem()  	}  	return t.token  } @@ -43,7 +47,7 @@ func (t *Template) peek() item {  	if t.havePeek {  		return t.token  	} -	t.token = <-t.tokens +	t.token = t.lex.nextItem()  	t.havePeek = true  	return t.token  } @@ -64,14 +68,18 @@ const (  	nodeText nodeType = iota  	nodeAction  	nodeCommand +	nodeDot  	nodeElse  	nodeEnd  	nodeField  	nodeIdentifier +	nodeIf  	nodeList  	nodeNumber  	nodeRange  	nodeString +	nodeTemplate +	nodeWith  )  // Nodes. @@ -103,25 +111,26 @@ func (l *listNode) String() string {  // textNode holds plain text.  type textNode struct {  	nodeType -	text string +	text []byte  }  func newText(text string) *textNode { -	return &textNode{nodeType: nodeText, text: text} +	return &textNode{nodeType: nodeText, text: []byte(text)}  }  func (t *textNode) String() string {  	return fmt.Sprintf("(text: %q)", t.text)  } -// actionNode holds an action (something bounded by metacharacters). +// actionNode holds an action (something bounded by delimiters).  type actionNode struct {  	nodeType +	line     int  	pipeline []*commandNode  } -func newAction() *actionNode { -	return &actionNode{nodeType: nodeAction} +func newAction(line int, pipeline []*commandNode) *actionNode { +	return &actionNode{nodeType: nodeAction, line: line, pipeline: pipeline}  }  func (a *actionNode) append(command *commandNode) { @@ -164,45 +173,88 @@ func (i *identifierNode) String() string {  	return fmt.Sprintf("I=%s", i.ident)  } -// fieldNode holds a field (identifier starting with '.'). The period is dropped from the ident. +// dotNode holds the special identifier '.'. It is represented by a nil pointer. +type dotNode bool + +func newDot() *dotNode { +	return nil +} + +func (d *dotNode) typ() nodeType { +	return nodeDot +} + +func (d *dotNode) String() string { +	return "{{<.>}}" +} + +// fieldNode holds a field (identifier starting with '.'). +// The names may be chained ('.x.y'). +// The period is dropped from each ident.  type fieldNode struct {  	nodeType -	ident string +	ident []string  }  func newField(ident string) *fieldNode { -	return &fieldNode{nodeType: nodeField, ident: ident[1:]} //drop period +	return &fieldNode{nodeType: nodeField, ident: strings.Split(ident[1:], ".")} // [1:] to drop leading period  }  func (f *fieldNode) String() string { -	return fmt.Sprintf("F=.%s", f.ident) +	return fmt.Sprintf("F=%s", f.ident) +} + +// boolNode holds a boolean constant. +type boolNode struct { +	nodeType +	true bool +} + +func newBool(true bool) *boolNode { +	return &boolNode{nodeType: nodeString, true: true} +} + +func (b *boolNode) String() string { +	if b.true { +		return fmt.Sprintf("B=true") +	} +	return fmt.Sprintf("B=false")  } -// numberNode holds a number, signed or unsigned, integer, floating, or imaginary. +// numberNode holds a number, signed or unsigned integer, floating, or complex.  // The value is parsed and stored under all the types that can represent the value.  // This simulates in a small amount of code the behavior of Go's ideal constants. -// TODO: booleans, complex numbers.  type numberNode struct {  	nodeType -	isInt     bool // number has an integral value -	isUint    bool // number has an unsigned integral value -	isFloat   bool // number has a floating-point value -	imaginary bool // number is imaginary -	int64          // the signed integer value -	uint64         // the unsigned integer value -	float64        // the positive floating-point value -	text      string -} - -func newNumber(text string) (*numberNode, os.Error) { +	isInt      bool // number has an integral value +	isUint     bool // number has an unsigned integral value +	isFloat    bool // number has a floating-point value +	isComplex  bool // number is complex +	int64           // the signed integer value +	uint64          // the unsigned integer value +	float64         // the floating-point value +	complex128      // the complex value +	text       string +} + +func newNumber(text string, isComplex bool) (*numberNode, os.Error) {  	n := &numberNode{nodeType: nodeNumber, text: text} -	// Imaginary constants can only be floating-point. +	if isComplex { +		// fmt.Sscan can parse the pair, so let it do the work. +		if _, err := fmt.Sscan(text, &n.complex128); err != nil { +			return nil, err +		} +		n.isComplex = true +		n.simplifyComplex() +		return n, nil +	} +	// Imaginary constants can only be complex unless they are zero.  	if len(text) > 0 && text[len(text)-1] == 'i' {  		f, err := strconv.Atof64(text[:len(text)-1])  		if err == nil { -			n.imaginary = true -			n.isFloat = true -			n.float64 = f +			n.isComplex = true +			n.complex128 = complex(0, f) +			n.simplifyComplex()  			return n, nil  		}  	} @@ -250,6 +302,23 @@ func newNumber(text string) (*numberNode, os.Error) {  	return n, nil  } +// simplifyComplex pulls out any other types that are represented by the complex number. +// These all require that the imaginary part be zero. +func (n *numberNode) simplifyComplex() { +	n.isFloat = imag(n.complex128) == 0 +	if n.isFloat { +		n.float64 = real(n.complex128) +		n.isInt = float64(int64(n.float64)) == n.float64 +		if n.isInt { +			n.int64 = int64(n.float64) +		} +		n.isUint = float64(uint64(n.float64)) == n.float64 +		if n.isUint { +			n.uint64 = uint64(n.float64) +		} +	} +} +  func (n *numberNode) String() string {  	return fmt.Sprintf("N=%s", n.text)  } @@ -283,11 +352,14 @@ func (e *endNode) String() string {  	return "{{end}}"  } -// elseNode represents an {{else}} action. It is represented by a nil pointer. -type elseNode bool +// elseNode represents an {{else}} action. +type elseNode struct { +	nodeType +	line int +} -func newElse() *elseNode { -	return nil +func newElse(line int) *elseNode { +	return &elseNode{nodeType: nodeElse, line: line}  }  func (e *elseNode) typ() nodeType { @@ -297,37 +369,106 @@ func (e *elseNode) typ() nodeType {  func (e *elseNode) String() string {  	return "{{else}}"  } +// ifNode represents an {{if}} action and its commands. +// TODO: what should evaluation look like? is a pipeline enough? +type ifNode struct { +	nodeType +	line     int +	pipeline []*commandNode +	list     *listNode +	elseList *listNode +} + +func newIf(line int, pipeline []*commandNode, list, elseList *listNode) *ifNode { +	return &ifNode{nodeType: nodeIf, line: line, pipeline: pipeline, list: list, elseList: elseList} +} + +func (i *ifNode) String() string { +	if i.elseList != nil { +		return fmt.Sprintf("({{if %s}} %s {{else}} %s)", i.pipeline, i.list, i.elseList) +	} +	return fmt.Sprintf("({{if %s}} %s)", i.pipeline, i.list) +} -// rangeNode represents an {{range}} action and its commands. +// rangeNode represents a {{range}} action and its commands.  type rangeNode struct {  	nodeType -	field    node +	line     int +	pipeline []*commandNode  	list     *listNode  	elseList *listNode  } -func newRange(field node, list *listNode) *rangeNode { -	return &rangeNode{nodeType: nodeRange, field: field, list: list} +func newRange(line int, pipeline []*commandNode, list, elseList *listNode) *rangeNode { +	return &rangeNode{nodeType: nodeRange, line: line, pipeline: pipeline, list: list, elseList: elseList}  }  func (r *rangeNode) String() string {  	if r.elseList != nil { -		return fmt.Sprintf("({{range %s}} %s {{else}} %s)", r.field, r.list, r.elseList) +		return fmt.Sprintf("({{range %s}} %s {{else}} %s)", r.pipeline, r.list, r.elseList)  	} -	return fmt.Sprintf("({{range %s}} %s)", r.field, r.list) +	return fmt.Sprintf("({{range %s}} %s)", r.pipeline, r.list) +} + +// templateNode represents a {{template}} action. +type templateNode struct { +	nodeType +	line     int +	name     node +	pipeline []*commandNode  } +func newTemplate(line int, name node, pipeline []*commandNode) *templateNode { +	return &templateNode{nodeType: nodeTemplate, line: line, name: name, pipeline: pipeline} +} + +func (t *templateNode) String() string { +	return fmt.Sprintf("{{template %s %s}}", t.name, t.pipeline) +} + +// withNode represents a {{with}} action and its commands. +type withNode struct { +	nodeType +	line     int +	pipeline []*commandNode +	list     *listNode +	elseList *listNode +} + +func newWith(line int, pipeline []*commandNode, list, elseList *listNode) *withNode { +	return &withNode{nodeType: nodeWith, line: line, pipeline: pipeline, list: list, elseList: elseList} +} + +func (w *withNode) String() string { +	if w.elseList != nil { +		return fmt.Sprintf("({{with %s}} %s {{else}} %s)", w.pipeline, w.list, w.elseList) +	} +	return fmt.Sprintf("({{with %s}} %s)", w.pipeline, w.list) +} + +  // Parsing.  // New allocates a new template with the given name.  func New(name string) *Template {  	return &Template{ -		name: name, +		name:  name, +		funcs: make(map[string]reflect.Value),  	}  } +// Funcs adds to the template's function map the elements of the +// argument map.   It panics if a value in the map is not a function +// with appropriate return type. +// The return value is the template, so calls can be chained. +func (t *Template) Funcs(funcMap FuncMap) *Template { +	addFuncs(t.funcs, funcMap) +	return t +} +  // errorf formats the error and terminates processing.  func (t *Template) errorf(format string, args ...interface{}) { +	t.root = nil  	format = fmt.Sprintf("template: %s:%d: %s", t.name, t.lex.lineNumber(), format)  	panic(fmt.Errorf(format, args...))  } @@ -351,25 +492,80 @@ func (t *Template) unexpected(token item, context string) {  	t.errorf("unexpected %s in %s", token, context)  } -// Parse parses the template definition string and constructs an efficient representation of the template. -func (t *Template) Parse(s string) (err os.Error) { -	t.lex, t.tokens = lex(t.name, s) -	defer func() { -		e := recover() -		if e != nil { -			if _, ok := e.(runtime.Error); ok { -				panic(e) +// recover is the handler that turns panics into returns from the top +// level of Parse or Execute. +func (t *Template) recover(errp *os.Error) { +	e := recover() +	if e != nil { +		if _, ok := e.(runtime.Error); ok { +			panic(e) +		} +		t.stopParse() +		*errp = e.(os.Error) +	} +	return +} + +// startParse starts the template parsing from the lexer. +func (t *Template) startParse(set *Set, lex *lexer) { +	t.root = nil +	t.set = set +	t.lex = lex +} + +// stopParse terminates parsing. +func (t *Template) stopParse() { +	t.set, t.lex = nil, nil +} + +// atEOF returns true if, possibly after spaces, we're at EOF. +func (t *Template) atEOF() bool { +	for { +		token := t.peek() +		switch token.typ { +		case itemEOF: +			return true +		case itemText: +			for _, r := range token.val { +				if !unicode.IsSpace(r) { +					return false +				}  			} -			err = e.(os.Error) +			t.next() // skip spaces. +			continue  		} -		return -	}() -	var next node +		break +	} +	return false +} + +// Parse parses the template definition string to construct an internal representation +// of the template for execution. +func (t *Template) Parse(s string) (err os.Error) { +	t.startParse(nil, lex(t.name, s)) +	defer t.recover(&err) +	t.parse(true) +	t.stopParse() +	return +} + +// ParseInSet parses the template definition string to construct an internal representation +// of the template for execution. Function bindings are checked against those in the set. +func (t *Template) ParseInSet(s string, set *Set) (err os.Error) { +	t.startParse(set, lex(t.name, s)) +	defer t.recover(&err) +	t.parse(true) +	t.stopParse() +	return +} + +// parse is the helper for Parse. It triggers an error if we expect EOF but don't reach it. +func (t *Template) parse(toEOF bool) (next node) {  	t.root, next = t.itemList(true) -	if next != nil { +	if toEOF && next != nil {  		t.errorf("unexpected %s", next)  	} -	return nil +	return next  }  // itemList: @@ -398,7 +594,7 @@ func (t *Template) textOrAction() node {  	switch token := t.next(); token.typ {  	case itemText:  		return newText(token.val) -	case itemLeftMeta: +	case itemLeftDelim:  		return t.action()  	default:  		t.unexpected(token, "input") @@ -409,63 +605,95 @@ func (t *Template) textOrAction() node {  // Action:  //	control  //	command ("|" command)* -// Left meta is past. Now get actions. +// Left delim is past. Now get actions. +// First word could be a keyword such as range.  func (t *Template) action() (n node) { -	action := newAction()  	switch token := t.next(); token.typ { -	case itemRange: -		return t.rangeControl()  	case itemElse:  		return t.elseControl()  	case itemEnd:  		return t.endControl() +	case itemIf: +		return t.ifControl() +	case itemRange: +		return t.rangeControl() +	case itemTemplate: +		return t.templateControl() +	case itemWith: +		return t.withControl()  	}  	t.backup() -Loop: +	return newAction(t.lex.lineNumber(), t.pipeline("command")) +} + +// Pipeline: +//	field or command +//	pipeline "|" pipeline +func (t *Template) pipeline(context string) (pipe []*commandNode) {  	for {  		switch token := t.next(); token.typ { -		case itemRightMeta: -			break Loop -		case itemIdentifier, itemField: -			t.backup() -			cmd, err := t.command() -			if err != nil { -				t.error(err) +		case itemRightDelim: +			if len(pipe) == 0 { +				t.errorf("missing value for %s", context)  			} -			action.append(cmd) +			return +		case itemBool, itemComplex, itemDot, itemField, itemIdentifier, itemNumber, itemRawString, itemString: +			t.backup() +			pipe = append(pipe, t.command())  		default: -			t.unexpected(token, "command") +			t.unexpected(token, context)  		}  	} -	return action +	return  } -// Range: -//	{{range field}} itemList {{end}} -//	{{range field}} itemList {{else}} itemList {{end}} -// Range keyword is past. -func (t *Template) rangeControl() node { -	field := t.expect(itemField, "range") -	t.expect(itemRightMeta, "range") -	list, next := t.itemList(false) -	r := newRange(newField(field.val), list) +func (t *Template) parseControl(context string) (lineNum int, pipe []*commandNode, list, elseList *listNode) { +	lineNum = t.lex.lineNumber() +	pipe = t.pipeline(context) +	var next node +	list, next = t.itemList(false)  	switch next.typ() {  	case nodeEnd: //done  	case nodeElse: -		elseList, next := t.itemList(false) +		elseList, next = t.itemList(false)  		if next.typ() != nodeEnd {  			t.errorf("expected end; found %s", next)  		} -		r.elseList = elseList +		elseList = elseList  	} -	return r +	return lineNum, pipe, list, elseList +} + +// If: +//	{{if pipeline}} itemList {{end}} +//	{{if pipeline}} itemList {{else}} itemList {{end}} +// If keyword is past. +func (t *Template) ifControl() node { +	return newIf(t.parseControl("if")) +} + +// Range: +//	{{range pipeline}} itemList {{end}} +//	{{range pipeline}} itemList {{else}} itemList {{end}} +// Range keyword is past. +func (t *Template) rangeControl() node { +	return newRange(t.parseControl("range"))  } +// With: +//	{{with pipeline}} itemList {{end}} +//	{{with pipeline}} itemList {{else}} itemList {{end}} +// If keyword is past. +func (t *Template) withControl() node { +	return newWith(t.parseControl("with")) +} + +  // End:  //	{{end}}  // End keyword is past.  func (t *Template) endControl() node { -	t.expect(itemRightMeta, "end") +	t.expect(itemRightDelim, "end")  	return newEnd()  } @@ -473,50 +701,83 @@ func (t *Template) endControl() node {  //	{{else}}  // Else keyword is past.  func (t *Template) elseControl() node { -	t.expect(itemRightMeta, "else") -	return newElse() +	t.expect(itemRightDelim, "else") +	return newElse(t.lex.lineNumber()) +} + +// Template: +//	{{template stringValue pipeline}} +// Template keyword is past.  The name must be something that can evaluate +// to a string. +func (t *Template) templateControl() node { +	var name node +	switch token := t.next(); token.typ { +	case itemIdentifier: +		if _, ok := findFunction(token.val, t, t.set); !ok { +			t.errorf("function %q not defined", token.val) +		} +		name = newIdentifier(token.val) +	case itemDot: +		name = newDot() +	case itemField: +		name = newField(token.val) +	case itemString, itemRawString: +		s, err := strconv.Unquote(token.val) +		if err != nil { +			t.error(err) +		} +		name = newString(s) +	default: +		t.unexpected(token, "template invocation") +	} +	pipeline := t.pipeline("template") +	return newTemplate(t.lex.lineNumber(), name, pipeline)  }  // command: -// space-separated arguments up to a pipeline character or right metacharacter. -// we consume the pipe character but leave the right meta to terminate the action. -func (t *Template) command() (*commandNode, os.Error) { +// space-separated arguments up to a pipeline character or right delimiter. +// we consume the pipe character but leave the right delim to terminate the action. +func (t *Template) command() *commandNode {  	cmd := newCommand()  Loop:  	for {  		switch token := t.next(); token.typ { -		case itemRightMeta: +		case itemRightDelim:  			t.backup()  			break Loop  		case itemPipe:  			break Loop  		case itemError: -			return nil, os.NewError(token.val) +			t.errorf("%s", token.val)  		case itemIdentifier: +			if _, ok := findFunction(token.val, t, t.set); !ok { +				t.errorf("function %q not defined", token.val) +			}  			cmd.append(newIdentifier(token.val)) +		case itemDot: +			cmd.append(newDot())  		case itemField:  			cmd.append(newField(token.val)) -		case itemNumber: -			if len(cmd.args) == 0 { -				t.errorf("command cannot be %q", token.val) -			} -			number, err := newNumber(token.val) +		case itemBool: +			cmd.append(newBool(token.val == "true")) +		case itemComplex, itemNumber: +			number, err := newNumber(token.val, token.typ == itemComplex)  			if err != nil {  				t.error(err)  			}  			cmd.append(number)  		case itemString, itemRawString: -			if len(cmd.args) == 0 { -				t.errorf("command cannot be %q", token.val) -			}  			s, err := strconv.Unquote(token.val)  			if err != nil { -				return nil, err +				t.error(err)  			}  			cmd.append(newString(s))  		default:  			t.unexpected(token, "command")  		}  	} -	return cmd, nil +	if len(cmd.args) == 0 { +		t.errorf("empty command") +	} +	return cmd  } diff --git a/src/pkg/exp/template/parse_test.go b/src/pkg/exp/template/parse_test.go index f89eaa6ce..71580f8b6 100644 --- a/src/pkg/exp/template/parse_test.go +++ b/src/pkg/exp/template/parse_test.go @@ -5,52 +5,65 @@  package template  import ( +	"flag"  	"fmt"  	"testing"  ) -const dumpErrors = true +var debug = flag.Bool("debug", false, "show the errors produced by the tests")  type numberTest struct {  	text      string  	isInt     bool  	isUint    bool  	isFloat   bool -	imaginary bool +	isComplex bool  	int64  	uint64  	float64 +	complex128  }  var numberTests = []numberTest{  	// basics -	{"0", true, true, true, false, 0, 0, 0}, -	{"-0", true, true, true, false, 0, 0, 0}, // check that -0 is a uint. -	{"73", true, true, true, false, 73, 73, 73}, -	{"-73", true, false, true, false, -73, 0, -73}, -	{"+73", true, false, true, false, 73, 0, 73}, -	{"100", true, true, true, false, 100, 100, 100}, -	{"1e9", true, true, true, false, 1e9, 1e9, 1e9}, -	{"-1e9", true, false, true, false, -1e9, 0, -1e9}, -	{"-1.2", false, false, true, false, 0, 0, -1.2}, -	{"1e19", false, true, true, false, 0, 1e19, 1e19}, -	{"-1e19", false, false, true, false, 0, 0, -1e19}, -	{"4i", false, false, true, true, 0, 0, 4}, +	{"0", true, true, true, false, 0, 0, 0, 0}, +	{"-0", true, true, true, false, 0, 0, 0, 0}, // check that -0 is a uint. +	{"73", true, true, true, false, 73, 73, 73, 0}, +	{"-73", true, false, true, false, -73, 0, -73, 0}, +	{"+73", true, false, true, false, 73, 0, 73, 0}, +	{"100", true, true, true, false, 100, 100, 100, 0}, +	{"1e9", true, true, true, false, 1e9, 1e9, 1e9, 0}, +	{"-1e9", true, false, true, false, -1e9, 0, -1e9, 0}, +	{"-1.2", false, false, true, false, 0, 0, -1.2, 0}, +	{"1e19", false, true, true, false, 0, 1e19, 1e19, 0}, +	{"-1e19", false, false, true, false, 0, 0, -1e19, 0}, +	{"4i", false, false, false, true, 0, 0, 0, 4i}, +	{"-1.2+4.2i", false, false, false, true, 0, 0, 0, -1.2 + 4.2i}, +	// complex with 0 imaginary are float (and maybe integer) +	{"0i", true, true, true, true, 0, 0, 0, 0}, +	{"-1.2+0i", false, false, true, true, 0, 0, -1.2, -1.2}, +	{"-12+0i", true, false, true, true, -12, 0, -12, -12}, +	{"13+0i", true, true, true, true, 13, 13, 13, 13},  	// funny bases -	{"0123", true, true, true, false, 0123, 0123, 0123}, -	{"-0x0", true, true, true, false, 0, 0, 0}, -	{"0xdeadbeef", true, true, true, false, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef}, +	{"0123", true, true, true, false, 0123, 0123, 0123, 0}, +	{"-0x0", true, true, true, false, 0, 0, 0, 0}, +	{"0xdeadbeef", true, true, true, false, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0},  	// some broken syntax  	{text: "+-2"},  	{text: "0x123."},  	{text: "1e."},  	{text: "0xi."}, +	{text: "1+2."},  }  func TestNumberParse(t *testing.T) {  	for _, test := range numberTests { -		n, err := newNumber(test.text) -		ok := test.isInt || test.isUint || test.isFloat +		// If fmt.Sscan thinks it's complex, it's complex.  We can't trust the output +		// because imaginary comes out as a number. +		var c complex128 +		_, err := fmt.Sscan(test.text, &c) +		n, err := newNumber(test.text, err == nil) +		ok := test.isInt || test.isUint || test.isFloat || test.isComplex  		if ok && err != nil {  			t.Errorf("unexpected error for %q", test.text)  			continue @@ -62,8 +75,8 @@ func TestNumberParse(t *testing.T) {  		if !ok {  			continue  		} -		if n.imaginary != test.imaginary { -			t.Errorf("imaginary incorrect for %q; should be %t", test.text, test.imaginary) +		if n.isComplex != test.isComplex { +			t.Errorf("complex incorrect for %q; should be %t", test.text, test.isComplex)  		}  		if test.isInt {  			if !n.isInt { @@ -95,17 +108,19 @@ func TestNumberParse(t *testing.T) {  		} else if n.isFloat {  			t.Errorf("did not expect float for %q", test.text)  		} +		if test.isComplex { +			if !n.isComplex { +				t.Errorf("expected complex for %q", test.text) +			} +			if n.complex128 != test.complex128 { +				t.Errorf("complex128 for %q should be %g is %g", test.text, test.complex128, n.complex128) +			} +		} else if n.isComplex { +			t.Errorf("did not expect complex for %q", test.text) +		}  	}  } -func num(s string) *numberNode { -	n, err := newNumber(s) -	if err != nil { -		panic(err) -	} -	return n -} -  type parseTest struct {  	name   string  	input  string @@ -125,29 +140,45 @@ var parseTests = []parseTest{  		`[(text: " \t\n")]`},  	{"text", "some text", noError,  		`[(text: "some text")]`}, -	{"emptyMeta", "{{}}", noError, +	{"emptyAction", "{{}}", hasError,  		`[(action: [])]`}, -	{"simple command", "{{hello}}", noError, -		`[(action: [(command: [I=hello])])]`}, -	{"multi-word command", "{{hello world}}", noError, -		`[(action: [(command: [I=hello I=world])])]`}, -	{"multi-word command with number", "{{hello 80}}", noError, -		`[(action: [(command: [I=hello N=80])])]`}, -	{"multi-word command with string", "{{hello `quoted text`}}", noError, -		"[(action: [(command: [I=hello S=`quoted text`])])]"}, -	{"pipeline", "{{hello|world}}", noError, -		`[(action: [(command: [I=hello]) (command: [I=world])])]`}, -	{"simple range", "{{range .x}}hello{{end}}", noError, -		`[({{range F=.x}} [(text: "hello")])]`}, -	{"nested range", "{{range .x}}hello{{range .y}}goodbye{{end}}{{end}}", noError, -		`[({{range F=.x}} [(text: "hello")({{range F=.y}} [(text: "goodbye")])])]`}, -	{"range with else", "{{range .x}}true{{else}}false{{end}}", noError, -		`[({{range F=.x}} [(text: "true")] {{else}} [(text: "false")])]`}, +	{"field", "{{.X}}", noError, +		`[(action: [(command: [F=[X]])])]`}, +	{"simple command", "{{printf}}", noError, +		`[(action: [(command: [I=printf])])]`}, +	{"multi-word command", "{{printf `%d` 23}}", noError, +		"[(action: [(command: [I=printf S=`%d` N=23])])]"}, +	{"pipeline", "{{.X|.Y}}", noError, +		`[(action: [(command: [F=[X]]) (command: [F=[Y]])])]`}, +	{"simple if", "{{if .X}}hello{{end}}", noError, +		`[({{if [(command: [F=[X]])]}} [(text: "hello")])]`}, +	{"if with else", "{{if .X}}true{{else}}false{{end}}", noError, +		`[({{if [(command: [F=[X]])]}} [(text: "true")] {{else}} [(text: "false")])]`}, +	{"simple range", "{{range .X}}hello{{end}}", noError, +		`[({{range [(command: [F=[X]])]}} [(text: "hello")])]`}, +	{"chained field range", "{{range .X.Y.Z}}hello{{end}}", noError, +		`[({{range [(command: [F=[X Y Z]])]}} [(text: "hello")])]`}, +	{"nested range", "{{range .X}}hello{{range .Y}}goodbye{{end}}{{end}}", noError, +		`[({{range [(command: [F=[X]])]}} [(text: "hello")({{range [(command: [F=[Y]])]}} [(text: "goodbye")])])]`}, +	{"range with else", "{{range .X}}true{{else}}false{{end}}", noError, +		`[({{range [(command: [F=[X]])]}} [(text: "true")] {{else}} [(text: "false")])]`}, +	{"range over pipeline", "{{range .X|.M}}true{{else}}false{{end}}", noError, +		`[({{range [(command: [F=[X]]) (command: [F=[M]])]}} [(text: "true")] {{else}} [(text: "false")])]`}, +	{"range []int", "{{range .SI}}{{.}}{{end}}", noError, +		`[({{range [(command: [F=[SI]])]}} [(action: [(command: [{{<.>}}])])])]`}, +	{"constants", "{{range .SI 1 -3.2i true false }}{{end}}", noError, +		`[({{range [(command: [F=[SI] N=1 N=-3.2i B=true B=false])]}} [])]`}, +	{"template", "{{template `x` .Y}}", noError, +		"[{{template S=`x` [(command: [F=[Y]])]}}]"}, +	{"with", "{{with .X}}hello{{end}}", noError, +		`[({{with [(command: [F=[X]])]}} [(text: "hello")])]`}, +	{"with with else", "{{with .X}}hello{{else}}goodbye{{end}}", noError, +		`[({{with [(command: [F=[X]])]}} [(text: "hello")] {{else}} [(text: "goodbye")])]`},  	// Errors.  	{"unclosed action", "hello{{range", hasError, ""}, -	{"not a field", "hello{{range x}}{{end}}", hasError, ""},  	{"missing end", "hello{{range .x}}", hasError, ""},  	{"missing end after else", "hello{{range .x}}{{else}}", hasError, ""}, +	{"undefined function", "hello{{undefined}}", hasError, ""},  }  func TestParse(t *testing.T) { @@ -163,7 +194,7 @@ func TestParse(t *testing.T) {  			continue  		case err != nil && !test.ok:  			// expected error, got one -			if dumpErrors { +			if *debug {  				fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err)  			}  			continue diff --git a/src/pkg/exp/template/set.go b/src/pkg/exp/template/set.go new file mode 100644 index 000000000..492e270e1 --- /dev/null +++ b/src/pkg/exp/template/set.go @@ -0,0 +1,115 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( +	"fmt" +	"io" +	"os" +	"reflect" +	"runtime" +	"strconv" +) + +// Set holds a set of related templates that can refer to one another by name. +// A template may be a member of multiple sets. +type Set struct { +	tmpl  map[string]*Template +	funcs map[string]reflect.Value +} + +// NewSet allocates a new, empty template set. +func NewSet() *Set { +	return &Set{ +		tmpl:  make(map[string]*Template), +		funcs: make(map[string]reflect.Value), +	} +} + +// Funcs adds to the set's function map the elements of the +// argument map.   It panics if a value in the map is not a function +// with appropriate return type. +// The return value is the set, so calls can be chained. +func (s *Set) Funcs(funcMap FuncMap) *Set { +	addFuncs(s.funcs, funcMap) +	return s +} + +// Add adds the argument templates to the set. It panics if the call +// attempts to reuse a name defined in the template. +// The return value is the set, so calls can be chained. +func (s *Set) Add(templates ...*Template) *Set { +	for _, t := range templates { +		if _, ok := s.tmpl[t.name]; ok { +			panic(fmt.Errorf("template: %q already defined in set", t.name)) +		} +		s.tmpl[t.name] = t +	} +	return s +} + +// Template returns the template with the given name in the set, +// or nil if there is no such template. +func (s *Set) Template(name string) *Template { +	return s.tmpl[name] +} + +// Execute looks for the named template in the set and then applies that +// template to the specified data object, writing the output to wr.  Nested +// template invocations will be resolved from the set. +func (s *Set) Execute(name string, wr io.Writer, data interface{}) os.Error { +	tmpl := s.tmpl[name] +	if tmpl == nil { +		return fmt.Errorf("template: no template %q in set", name) +	} +	return tmpl.ExecuteInSet(wr, data, s) +} + +// recover is the handler that turns panics into returns from the top +// level of Parse. +func (s *Set) recover(errp *os.Error) { +	e := recover() +	if e != nil { +		if _, ok := e.(runtime.Error); ok { +			panic(e) +		} +		s.tmpl = nil +		*errp = e.(os.Error) +	} +	return +} + +// Parse parses the file into a set of named templates. +func (s *Set) Parse(text string) (err os.Error) { +	defer s.recover(&err) +	lex := lex("set", text) +	const context = "define clause" +	for { +		t := New("set") // name will be updated once we know it. +		t.startParse(s, lex) +		// Expect EOF or "{{ define name }}". +		if t.atEOF() { +			return +		} +		t.expect(itemLeftDelim, context) +		t.expect(itemDefine, context) +		name := t.expect(itemString, context) +		t.name, err = strconv.Unquote(name.val) +		if err != nil { +			t.error(err) +		} +		t.expect(itemRightDelim, context) +		end := t.parse(false) +		if end == nil { +			t.errorf("unexpected EOF in %s", context) +		} +		if end.typ() != nodeEnd { +			t.errorf("unexpected %s in %s", end, context) +		} +		t.stopParse() +		s.tmpl[t.name] = t +	} +	return nil +} diff --git a/src/pkg/exp/template/set_test.go b/src/pkg/exp/template/set_test.go new file mode 100644 index 000000000..c0115ec0a --- /dev/null +++ b/src/pkg/exp/template/set_test.go @@ -0,0 +1,101 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( +	"fmt" +	"testing" +) + +type setParseTest struct { +	name    string +	input   string +	ok      bool +	names   []string +	results []string +} + +var setParseTests = []setParseTest{ +	{"empty", "", noError, +		nil, +		nil}, +	{"one", `{{define "foo"}} FOO {{end}}`, noError, +		[]string{"foo"}, +		[]string{`[(text: " FOO ")]`}}, +	{"two", `{{define "foo"}} FOO {{end}}{{define "bar"}} BAR {{end}}`, noError, +		[]string{"foo", "bar"}, +		[]string{`[(text: " FOO ")]`, `[(text: " BAR ")]`}}, +	// errors +	{"missing end", `{{define "foo"}} FOO `, hasError, +		nil, +		nil}, +	{"malformed name", `{{define "foo}} FOO `, hasError, +		nil, +		nil}, +} + +func TestSetParse(t *testing.T) { +	for _, test := range setParseTests { +		set := NewSet() +		err := set.Parse(test.input) +		switch { +		case err == nil && !test.ok: +			t.Errorf("%q: expected error; got none", test.name) +			continue +		case err != nil && test.ok: +			t.Errorf("%q: unexpected error: %v", test.name, err) +			continue +		case err != nil && !test.ok: +			// expected error, got one +			if *debug { +				fmt.Printf("%s: %s\n\t%s\n", test.name, test.input, err) +			} +			continue +		} +		if len(set.tmpl) != len(test.names) { +			t.Errorf("%s: wrong number of templates; wanted %d got %d", test.name, len(test.names), len(set.tmpl)) +			continue +		} +		for i, name := range test.names { +			tmpl, ok := set.tmpl[name] +			if !ok { +				t.Errorf("%s: can't find template %q", test.name, name) +				continue +			} +			result := tmpl.root.String() +			if result != test.results[i] { +				t.Errorf("%s=(%q): got\n\t%v\nexpected\n\t%v", test.name, test.input, result, test.results[i]) +			} +		} +	} +} + + +var setExecTests = []execTest{ +	{"empty", "", "", nil, true}, +	{"text", "some text", "some text", nil, true}, +	{"invoke text", `{{template "text" .SI}}`, "TEXT", tVal, true}, +	{"invoke dot int", `{{template "dot" .I}}`, "17", tVal, true}, +	{"invoke dot []int", `{{template "dot" .SI}}`, "[3 4 5]", tVal, true}, +	{"invoke dotV", `{{template "dotV" .U}}`, "v", tVal, true}, +	{"invoke nested int", `{{template "nested" .I}}`, "17", tVal, true}, +} + +const setText = ` +	{{define "text"}}TEXT{{end}} +	{{define "dotV"}}{{.V}}{{end}} +	{{define "dot"}}{{.}}{{end}} +	{{define "nested"}}{{template "dot" .}}{{end}} +` + +func TestSetExecute(t *testing.T) { +	// Declare a set with a couple of templates first. +	set := NewSet() +	err := set.Parse(setText) +	if err != nil { +		t.Fatalf("error parsing set: %s", err) +	} +	testExecute(setExecTests, set, t) +} | 
