1 files changed, 550 insertions, 0 deletions
diff --git a/src/html/template/transition.go b/src/html/template/transition.go
new file mode 100644
index 000000000..b486fcd28
--- /dev/null
+++ b/src/html/template/transition.go
@@ -0,0 +1,550 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+	"bytes"
+	"strings"
+)
+
+// transitionFunc is the array of context transition functions for text nodes.
+// A transition function takes a context and template text input, and returns
+// the updated context and the number of bytes consumed from the front of the
+// input.
+var transitionFunc = [...]func(context, []byte) (context, int){
+	stateText:        tText,
+	stateTag:         tTag,
+	stateAttrName:    tAttrName,
+	stateAfterName:   tAfterName,
+	stateBeforeValue: tBeforeValue,
+	stateHTMLCmt:     tHTMLCmt,
+	stateRCDATA:      tSpecialTagEnd,
+	stateAttr:        tAttr,
+	stateURL:         tURL,
+	stateJS:          tJS,
+	stateJSDqStr:     tJSDelimited,
+	stateJSSqStr:     tJSDelimited,
+	stateJSRegexp:    tJSDelimited,
+	stateJSBlockCmt:  tBlockCmt,
+	stateJSLineCmt:   tLineCmt,
+	stateCSS:         tCSS,
+	stateCSSDqStr:    tCSSStr,
+	stateCSSSqStr:    tCSSStr,
+	stateCSSDqURL:    tCSSStr,
+	stateCSSSqURL:    tCSSStr,
+	stateCSSURL:      tCSSStr,
+	stateCSSBlockCmt: tBlockCmt,
+	stateCSSLineCmt:  tLineCmt,
+	stateError:       tError,
+}
+
+var commentStart = []byte("<!--")
+var commentEnd = []byte("-->")
+
+// tText is the context transition function for the text state.
+func tText(c context, s []byte) (context, int) {
+	k := 0
+	for {
+		i := k + bytes.IndexByte(s[k:], '<')
+		if i < k || i+1 == len(s) {
+			return c, len(s)
+		} else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+			return context{state: stateHTMLCmt}, i + 4
+		}
+		i++
+		end := false
+		if s[i] == '/' {
+			if i+1 == len(s) {
+				return c, len(s)
+			}
+			end, i = true, i+1
+		}
+		j, e := eatTagName(s, i)
+		if j != i {
+			if end {
+				e = elementNone
+			}
+			// We've found an HTML tag.
+			return context{state: stateTag, element: e}, j
+		}
+		k = j
+	}
+}
+
+var elementContentType = [...]state{
+	elementNone:     stateText,
+	elementScript:   stateJS,
+	elementStyle:    stateCSS,
+	elementTextarea: stateRCDATA,
+	elementTitle:    stateRCDATA,
+}
+
+// tTag is the context transition function for the tag state.
+func tTag(c context, s []byte) (context, int) {
+	// Find the attribute name.
+	i := eatWhiteSpace(s, 0)
+	if i == len(s) {
+		return c, len(s)
+	}
+	if s[i] == '>' {
+		return context{
+			state:   elementContentType[c.element],
+			element: c.element,
+		}, i + 1
+	}
+	j, err := eatAttrName(s, i)
+	if err != nil {
+		return context{state: stateError, err: err}, len(s)
+	}
+	state, attr := stateTag, attrNone
+	if i == j {
+		return context{
+			state: stateError,
+			err:   errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
+		}, len(s)
+	}
+	switch attrType(string(s[i:j])) {
+	case contentTypeURL:
+		attr = attrURL
+	case contentTypeCSS:
+		attr = attrStyle
+	case contentTypeJS:
+		attr = attrScript
+	}
+	if j == len(s) {
+		state = stateAttrName
+	} else {
+		state = stateAfterName
+	}
+	return context{state: state, element: c.element, attr: attr}, j
+}
+
+// tAttrName is the context transition function for stateAttrName.
+func tAttrName(c context, s []byte) (context, int) {
+	i, err := eatAttrName(s, 0)
+	if err != nil {
+		return context{state: stateError, err: err}, len(s)
+	} else if i != len(s) {
+		c.state = stateAfterName
+	}
+	return c, i
+}
+
+// tAfterName is the context transition function for stateAfterName.
+func tAfterName(c context, s []byte) (context, int) {
+	// Look for the start of the value.
+	i := eatWhiteSpace(s, 0)
+	if i == len(s) {
+		return c, len(s)
+	} else if s[i] != '=' {
+		// Occurs due to tag ending '>', and valueless attribute.
+		c.state = stateTag
+		return c, i
+	}
+	c.state = stateBeforeValue
+	// Consume the "=".
+	return c, i + 1
+}
+
+var attrStartStates = [...]state{
+	attrNone:   stateAttr,
+	attrScript: stateJS,
+	attrStyle:  stateCSS,
+	attrURL:    stateURL,
+}
+
+// tBeforeValue is the context transition function for stateBeforeValue.
+func tBeforeValue(c context, s []byte) (context, int) {
+	i := eatWhiteSpace(s, 0)
+	if i == len(s) {
+		return c, len(s)
+	}
+	// Find the attribute delimiter.
+	delim := delimSpaceOrTagEnd
+	switch s[i] {
+	case '\'':
+		delim, i = delimSingleQuote, i+1
+	case '"':
+		delim, i = delimDoubleQuote, i+1
+	}
+	c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone
+	return c, i
+}
+
+// tHTMLCmt is the context transition function for stateHTMLCmt.
+func tHTMLCmt(c context, s []byte) (context, int) {
+	if i := bytes.Index(s, commentEnd); i != -1 {
+		return context{}, i + 3
+	}
+	return c, len(s)
+}
+
+// specialTagEndMarkers maps element types to the character sequence that
+// case-insensitively signals the end of the special tag body.
+var specialTagEndMarkers = [...]string{
+	elementScript:   "</script",
+	elementStyle:    "</style",
+	elementTextarea: "</textarea",
+	elementTitle:    "</title",
+}
+
+// tSpecialTagEnd is the context transition function for raw text and RCDATA
+// element states.
+func tSpecialTagEnd(c context, s []byte) (context, int) {
+	if c.element != elementNone {
+		if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 {
+			return context{}, i
+		}
+	}
+	return c, len(s)
+}
+
+// tAttr is the context transition function for the attribute state.
+func tAttr(c context, s []byte) (context, int) {
+	return c, len(s)
+}
+
+// tURL is the context transition function for the URL state.
+func tURL(c context, s []byte) (context, int) {
+	if bytes.IndexAny(s, "#?") >= 0 {
+		c.urlPart = urlPartQueryOrFrag
+	} else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
+		// HTML5 uses "Valid URL potentially surrounded by spaces" for
+		// attrs: http://www.w3.org/TR/html5/index.html#attributes-1
+		c.urlPart = urlPartPreQuery
+	}
+	return c, len(s)
+}
+
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, int) {
+	i := bytes.IndexAny(s, `"'/`)
+	if i == -1 {
+		// Entire input is non string, comment, regexp tokens.
+		c.jsCtx = nextJSCtx(s, c.jsCtx)
+		return c, len(s)
+	}
+	c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+	switch s[i] {
+	case '"':
+		c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+	case '\'':
+		c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+	case '/':
+		switch {
+		case i+1 < len(s) && s[i+1] == '/':
+			c.state, i = stateJSLineCmt, i+1
+		case i+1 < len(s) && s[i+1] == '*':
+			c.state, i = stateJSBlockCmt, i+1
+		case c.jsCtx == jsCtxRegexp:
+			c.state = stateJSRegexp
+		case c.jsCtx == jsCtxDivOp:
+			c.jsCtx = jsCtxRegexp
+		default:
+			return context{
+				state: stateError,
+				err:   errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
+			}, len(s)
+		}
+	default:
+		panic("unreachable")
+	}
+	return c, i + 1
+}
+
+// tJSDelimited is the context transition function for the JS string and regexp
+// states.
+func tJSDelimited(c context, s []byte) (context, int) {
+	specials := `\"`
+	switch c.state {
+	case stateJSSqStr:
+		specials = `\'`
+	case stateJSRegexp:
+		specials = `\/[]`
+	}
+
+	k, inCharset := 0, false
+	for {
+		i := k + bytes.IndexAny(s[k:], specials)
+		if i < k {
+			break
+		}
+		switch s[i] {
+		case '\\':
+			i++
+			if i == len(s) {
+				return context{
+					state: stateError,
+					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
+				}, len(s)
+			}
+		case '[':
+			inCharset = true
+		case ']':
+			inCharset = false
+		default:
+			// end delimiter
+			if !inCharset {
+				c.state, c.jsCtx = stateJS, jsCtxDivOp
+				return c, i + 1
+			}
+		}
+		k = i + 1
+	}
+
+	if inCharset {
+		// This can be fixed by making context richer if interpolation
+		// into charsets is desired.
+		return context{
+			state: stateError,
+			err:   errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
+		}, len(s)
+	}
+
+	return c, len(s)
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, int) {
+	i := bytes.Index(s, blockCommentEnd)
+	if i == -1 {
+		return c, len(s)
+	}
+	switch c.state {
+	case stateJSBlockCmt:
+		c.state = stateJS
+	case stateCSSBlockCmt:
+		c.state = stateCSS
+	default:
+		panic(c.state.String())
+	}
+	return c, i + 2
+}
+
+// tLineCmt is the context transition function for //comment states.
+func tLineCmt(c context, s []byte) (context, int) {
+	var lineTerminators string
+	var endState state
+	switch c.state {
+	case stateJSLineCmt:
+		lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+	case stateCSSLineCmt:
+		lineTerminators, endState = "\n\f\r", stateCSS
+		// Line comments are not part of any published CSS standard but
+		// are supported by the 4 major browsers.
+		// This defines line comments as
+		//     LINECOMMENT ::= "//" [^\n\f\d]*
+		// since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+		// newlines:
+		//     nl ::= #xA | #xD #xA | #xD | #xC
+	default:
+		panic(c.state.String())
+	}
+
+	i := bytes.IndexAny(s, lineTerminators)
+	if i == -1 {
+		return c, len(s)
+	}
+	c.state = endState
+	// Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4
+	// "However, the LineTerminator at the end of the line is not
+	// considered to be part of the single-line comment; it is
+	// recognized separately by the lexical grammar and becomes part
+	// of the stream of input elements for the syntactic grammar."
+	return c, i
+}
+
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, int) {
+	// CSS quoted strings are almost never used except for:
+	// (1) URLs as in background: "/foo.png"
+	// (2) Multiword font-names as in font-family: "Times New Roman"
+	// (3) List separators in content values as in inline-lists:
+	//    <style>
+	//    ul.inlineList { list-style: none; padding:0 }
+	//    ul.inlineList > li { display: inline }
+	//    ul.inlineList > li:before { content: ", " }
+	//    ul.inlineList > li:first-child:before { content: "" }
+	//    </style>
+	//    <ul class=inlineList><li>One<li>Two<li>Three</ul>
+	// (4) Attribute value selectors as in a[href="http://example.com/"]
+	//
+	// We conservatively treat all strings as URLs, but make some
+	// allowances to avoid confusion.
+	//
+	// In (1), our conservative assumption is justified.
+	// In (2), valid font names do not contain ':', '?', or '#', so our
+	// conservative assumption is fine since we will never transition past
+	// urlPartPreQuery.
+	// In (3), our protocol heuristic should not be tripped, and there
+	// should not be non-space content after a '?' or '#', so as long as
+	// we only %-encode RFC 3986 reserved characters we are ok.
+	// In (4), we should URL escape for URL attributes, and for others we
+	// have the attribute name available if our conservative assumption
+	// proves problematic for real code.
+
+	k := 0
+	for {
+		i := k + bytes.IndexAny(s[k:], `("'/`)
+		if i < k {
+			return c, len(s)
+		}
+		switch s[i] {
+		case '(':
+			// Look for url to the left.
+			p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+			if endsWithCSSKeyword(p, "url") {
+				j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
+				switch {
+				case j != len(s) && s[j] == '"':
+					c.state, j = stateCSSDqURL, j+1
+				case j != len(s) && s[j] == '\'':
+					c.state, j = stateCSSSqURL, j+1
+				default:
+					c.state = stateCSSURL
+				}
+				return c, j
+			}
+		case '/':
+			if i+1 < len(s) {
+				switch s[i+1] {
+				case '/':
+					c.state = stateCSSLineCmt
+					return c, i + 2
+				case '*':
+					c.state = stateCSSBlockCmt
+					return c, i + 2
+				}
+			}
+		case '"':
+			c.state = stateCSSDqStr
+			return c, i + 1
+		case '\'':
+			c.state = stateCSSSqStr
+			return c, i + 1
+		}
+		k = i + 1
+	}
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, int) {
+	var endAndEsc string
+	switch c.state {
+	case stateCSSDqStr, stateCSSDqURL:
+		endAndEsc = `\"`
+	case stateCSSSqStr, stateCSSSqURL:
+		endAndEsc = `\'`
+	case stateCSSURL:
+		// Unquoted URLs end with a newline or close parenthesis.
+		// The below includes the wc (whitespace character) and nl.
+		endAndEsc = "\\\t\n\f\r )"
+	default:
+		panic(c.state.String())
+	}
+
+	k := 0
+	for {
+		i := k + bytes.IndexAny(s[k:], endAndEsc)
+		if i < k {
+			c, nread := tURL(c, decodeCSS(s[k:]))
+			return c, k + nread
+		}
+		if s[i] == '\\' {
+			i++
+			if i == len(s) {
+				return context{
+					state: stateError,
+					err:   errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
+				}, len(s)
+			}
+		} else {
+			c.state = stateCSS
+			return c, i + 1
+		}
+		c, _ = tURL(c, decodeCSS(s[:i+1]))
+		k = i + 1
+	}
+}
+
+// tError is the context transition function for the error state.
+func tError(c context, s []byte) (context, int) {
+	return c, len(s)
+}
+
+// eatAttrName returns the largest j such that s[i:j] is an attribute name.
+// It returns an error if s[i:] does not look like it begins with an
+// attribute name, such as encountering a quote mark without a preceding
+// equals sign.
+func eatAttrName(s []byte, i int) (int, *Error) {
+	for j := i; j < len(s); j++ {
+		switch s[j] {
+		case ' ', '\t', '\n', '\f', '\r', '=', '>':
+			return j, nil
+		case '\'', '"', '<':
+			// These result in a parse warning in HTML5 and are
+			// indicative of serious problems if seen in an attr
+			// name in a template.
+			return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
+		default:
+			// No-op.
+		}
+	}
+	return len(s), nil
+}
+
+var elementNameMap = map[string]element{
+	"script":   elementScript,
+	"style":    elementStyle,
+	"textarea": elementTextarea,
+	"title":    elementTitle,
+}
+
+// asciiAlpha reports whether c is an ASCII letter.
+func asciiAlpha(c byte) bool {
+	return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+}
+
+// asciiAlphaNum reports whether c is an ASCII letter or digit.
+func asciiAlphaNum(c byte) bool {
+	return asciiAlpha(c) || '0' <= c && c <= '9'
+}
+
+// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
+func eatTagName(s []byte, i int) (int, element) {
+	if i == len(s) || !asciiAlpha(s[i]) {
+		return i, elementNone
+	}
+	j := i + 1
+	for j < len(s) {
+		x := s[j]
+		if asciiAlphaNum(x) {
+			j++
+			continue
+		}
+		// Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
+		if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
+			j += 2
+			continue
+		}
+		break
+	}
+	return j, elementNameMap[strings.ToLower(string(s[i:j]))]
+}
+
+// eatWhiteSpace returns the largest j such that s[i:j] is white space.
+func eatWhiteSpace(s []byte, i int) int {
+	for j := i; j < len(s); j++ {
+		switch s[j] {
+		case ' ', '\t', '\n', '\f', '\r':
+			// No-op.
+		default:
+			return j
+		}
+	}
+	return len(s)
+}