diff options
Diffstat (limited to 'src/pkg/html/template/transition.go')
-rw-r--r-- | src/pkg/html/template/transition.go | 550 |
1 files changed, 0 insertions, 550 deletions
diff --git a/src/pkg/html/template/transition.go b/src/pkg/html/template/transition.go deleted file mode 100644 index 7f30a7ab8..000000000 --- a/src/pkg/html/template/transition.go +++ /dev/null @@ -1,550 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package template - -import ( - "bytes" - "strings" -) - -// transitionFunc is the array of context transition functions for text nodes. -// A transition function takes a context and template text input, and returns -// the updated context and the number of bytes consumed from the front of the -// input. -var transitionFunc = [...]func(context, []byte) (context, int){ - stateText: tText, - stateTag: tTag, - stateAttrName: tAttrName, - stateAfterName: tAfterName, - stateBeforeValue: tBeforeValue, - stateHTMLCmt: tHTMLCmt, - stateRCDATA: tSpecialTagEnd, - stateAttr: tAttr, - stateURL: tURL, - stateJS: tJS, - stateJSDqStr: tJSDelimited, - stateJSSqStr: tJSDelimited, - stateJSRegexp: tJSDelimited, - stateJSBlockCmt: tBlockCmt, - stateJSLineCmt: tLineCmt, - stateCSS: tCSS, - stateCSSDqStr: tCSSStr, - stateCSSSqStr: tCSSStr, - stateCSSDqURL: tCSSStr, - stateCSSSqURL: tCSSStr, - stateCSSURL: tCSSStr, - stateCSSBlockCmt: tBlockCmt, - stateCSSLineCmt: tLineCmt, - stateError: tError, -} - -var commentStart = []byte("<!--") -var commentEnd = []byte("-->") - -// tText is the context transition function for the text state. -func tText(c context, s []byte) (context, int) { - k := 0 - for { - i := k + bytes.IndexByte(s[k:], '<') - if i < k || i+1 == len(s) { - return c, len(s) - } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) { - return context{state: stateHTMLCmt}, i + 4 - } - i++ - end := false - if s[i] == '/' { - if i+1 == len(s) { - return c, len(s) - } - end, i = true, i+1 - } - j, e := eatTagName(s, i) - if j != i { - if end { - e = elementNone - } - // We've found an HTML tag. - return context{state: stateTag, element: e}, j - } - k = j - } -} - -var elementContentType = [...]state{ - elementNone: stateText, - elementScript: stateJS, - elementStyle: stateCSS, - elementTextarea: stateRCDATA, - elementTitle: stateRCDATA, -} - -// tTag is the context transition function for the tag state. -func tTag(c context, s []byte) (context, int) { - // Find the attribute name. - i := eatWhiteSpace(s, 0) - if i == len(s) { - return c, len(s) - } - if s[i] == '>' { - return context{ - state: elementContentType[c.element], - element: c.element, - }, i + 1 - } - j, err := eatAttrName(s, i) - if err != nil { - return context{state: stateError, err: err}, len(s) - } - state, attr := stateTag, attrNone - if i == j { - return context{ - state: stateError, - err: errorf(ErrBadHTML, 0, "expected space, attr name, or end of tag, but got %q", s[i:]), - }, len(s) - } - switch attrType(string(s[i:j])) { - case contentTypeURL: - attr = attrURL - case contentTypeCSS: - attr = attrStyle - case contentTypeJS: - attr = attrScript - } - if j == len(s) { - state = stateAttrName - } else { - state = stateAfterName - } - return context{state: state, element: c.element, attr: attr}, j -} - -// tAttrName is the context transition function for stateAttrName. -func tAttrName(c context, s []byte) (context, int) { - i, err := eatAttrName(s, 0) - if err != nil { - return context{state: stateError, err: err}, len(s) - } else if i != len(s) { - c.state = stateAfterName - } - return c, i -} - -// tAfterName is the context transition function for stateAfterName. -func tAfterName(c context, s []byte) (context, int) { - // Look for the start of the value. - i := eatWhiteSpace(s, 0) - if i == len(s) { - return c, len(s) - } else if s[i] != '=' { - // Occurs due to tag ending '>', and valueless attribute. - c.state = stateTag - return c, i - } - c.state = stateBeforeValue - // Consume the "=". - return c, i + 1 -} - -var attrStartStates = [...]state{ - attrNone: stateAttr, - attrScript: stateJS, - attrStyle: stateCSS, - attrURL: stateURL, -} - -// tBeforeValue is the context transition function for stateBeforeValue. -func tBeforeValue(c context, s []byte) (context, int) { - i := eatWhiteSpace(s, 0) - if i == len(s) { - return c, len(s) - } - // Find the attribute delimiter. - delim := delimSpaceOrTagEnd - switch s[i] { - case '\'': - delim, i = delimSingleQuote, i+1 - case '"': - delim, i = delimDoubleQuote, i+1 - } - c.state, c.delim, c.attr = attrStartStates[c.attr], delim, attrNone - return c, i -} - -// tHTMLCmt is the context transition function for stateHTMLCmt. -func tHTMLCmt(c context, s []byte) (context, int) { - if i := bytes.Index(s, commentEnd); i != -1 { - return context{}, i + 3 - } - return c, len(s) -} - -// specialTagEndMarkers maps element types to the character sequence that -// case-insensitively signals the end of the special tag body. -var specialTagEndMarkers = [...]string{ - elementScript: "</script", - elementStyle: "</style", - elementTextarea: "</textarea", - elementTitle: "</title", -} - -// tSpecialTagEnd is the context transition function for raw text and RCDATA -// element states. -func tSpecialTagEnd(c context, s []byte) (context, int) { - if c.element != elementNone { - if i := strings.Index(strings.ToLower(string(s)), specialTagEndMarkers[c.element]); i != -1 { - return context{}, i - } - } - return c, len(s) -} - -// tAttr is the context transition function for the attribute state. -func tAttr(c context, s []byte) (context, int) { - return c, len(s) -} - -// tURL is the context transition function for the URL state. -func tURL(c context, s []byte) (context, int) { - if bytes.IndexAny(s, "#?") >= 0 { - c.urlPart = urlPartQueryOrFrag - } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone { - // HTML5 uses "Valid URL potentially surrounded by spaces" for - // attrs: http://www.w3.org/TR/html5/index.html#attributes-1 - c.urlPart = urlPartPreQuery - } - return c, len(s) -} - -// tJS is the context transition function for the JS state. -func tJS(c context, s []byte) (context, int) { - i := bytes.IndexAny(s, `"'/`) - if i == -1 { - // Entire input is non string, comment, regexp tokens. - c.jsCtx = nextJSCtx(s, c.jsCtx) - return c, len(s) - } - c.jsCtx = nextJSCtx(s[:i], c.jsCtx) - switch s[i] { - case '"': - c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp - case '\'': - c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp - case '/': - switch { - case i+1 < len(s) && s[i+1] == '/': - c.state, i = stateJSLineCmt, i+1 - case i+1 < len(s) && s[i+1] == '*': - c.state, i = stateJSBlockCmt, i+1 - case c.jsCtx == jsCtxRegexp: - c.state = stateJSRegexp - case c.jsCtx == jsCtxDivOp: - c.jsCtx = jsCtxRegexp - default: - return context{ - state: stateError, - err: errorf(ErrSlashAmbig, 0, "'/' could start a division or regexp: %.32q", s[i:]), - }, len(s) - } - default: - panic("unreachable") - } - return c, i + 1 -} - -// tJSDelimited is the context transition function for the JS string and regexp -// states. -func tJSDelimited(c context, s []byte) (context, int) { - specials := `\"` - switch c.state { - case stateJSSqStr: - specials = `\'` - case stateJSRegexp: - specials = `\/[]` - } - - k, inCharset := 0, false - for { - i := k + bytes.IndexAny(s[k:], specials) - if i < k { - break - } - switch s[i] { - case '\\': - i++ - if i == len(s) { - return context{ - state: stateError, - err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in JS string: %q", s), - }, len(s) - } - case '[': - inCharset = true - case ']': - inCharset = false - default: - // end delimiter - if !inCharset { - c.state, c.jsCtx = stateJS, jsCtxDivOp - return c, i + 1 - } - } - k = i + 1 - } - - if inCharset { - // This can be fixed by making context richer if interpolation - // into charsets is desired. - return context{ - state: stateError, - err: errorf(ErrPartialCharset, 0, "unfinished JS regexp charset: %q", s), - }, len(s) - } - - return c, len(s) -} - -var blockCommentEnd = []byte("*/") - -// tBlockCmt is the context transition function for /*comment*/ states. -func tBlockCmt(c context, s []byte) (context, int) { - i := bytes.Index(s, blockCommentEnd) - if i == -1 { - return c, len(s) - } - switch c.state { - case stateJSBlockCmt: - c.state = stateJS - case stateCSSBlockCmt: - c.state = stateCSS - default: - panic(c.state.String()) - } - return c, i + 2 -} - -// tLineCmt is the context transition function for //comment states. -func tLineCmt(c context, s []byte) (context, int) { - var lineTerminators string - var endState state - switch c.state { - case stateJSLineCmt: - lineTerminators, endState = "\n\r\u2028\u2029", stateJS - case stateCSSLineCmt: - lineTerminators, endState = "\n\f\r", stateCSS - // Line comments are not part of any published CSS standard but - // are supported by the 4 major browsers. - // This defines line comments as - // LINECOMMENT ::= "//" [^\n\f\d]* - // since http://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines - // newlines: - // nl ::= #xA | #xD #xA | #xD | #xC - default: - panic(c.state.String()) - } - - i := bytes.IndexAny(s, lineTerminators) - if i == -1 { - return c, len(s) - } - c.state = endState - // Per section 7.4 of EcmaScript 5 : http://es5.github.com/#x7.4 - // "However, the LineTerminator at the end of the line is not - // considered to be part of the single-line comment; it is - // recognized separately by the lexical grammar and becomes part - // of the stream of input elements for the syntactic grammar." - return c, i -} - -// tCSS is the context transition function for the CSS state. -func tCSS(c context, s []byte) (context, int) { - // CSS quoted strings are almost never used except for: - // (1) URLs as in background: "/foo.png" - // (2) Multiword font-names as in font-family: "Times New Roman" - // (3) List separators in content values as in inline-lists: - // <style> - // ul.inlineList { list-style: none; padding:0 } - // ul.inlineList > li { display: inline } - // ul.inlineList > li:before { content: ", " } - // ul.inlineList > li:first-child:before { content: "" } - // </style> - // <ul class=inlineList><li>One<li>Two<li>Three</ul> - // (4) Attribute value selectors as in a[href="http://example.com/"] - // - // We conservatively treat all strings as URLs, but make some - // allowances to avoid confusion. - // - // In (1), our conservative assumption is justified. - // In (2), valid font names do not contain ':', '?', or '#', so our - // conservative assumption is fine since we will never transition past - // urlPartPreQuery. - // In (3), our protocol heuristic should not be tripped, and there - // should not be non-space content after a '?' or '#', so as long as - // we only %-encode RFC 3986 reserved characters we are ok. - // In (4), we should URL escape for URL attributes, and for others we - // have the attribute name available if our conservative assumption - // proves problematic for real code. - - k := 0 - for { - i := k + bytes.IndexAny(s[k:], `("'/`) - if i < k { - return c, len(s) - } - switch s[i] { - case '(': - // Look for url to the left. - p := bytes.TrimRight(s[:i], "\t\n\f\r ") - if endsWithCSSKeyword(p, "url") { - j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r ")) - switch { - case j != len(s) && s[j] == '"': - c.state, j = stateCSSDqURL, j+1 - case j != len(s) && s[j] == '\'': - c.state, j = stateCSSSqURL, j+1 - default: - c.state = stateCSSURL - } - return c, j - } - case '/': - if i+1 < len(s) { - switch s[i+1] { - case '/': - c.state = stateCSSLineCmt - return c, i + 2 - case '*': - c.state = stateCSSBlockCmt - return c, i + 2 - } - } - case '"': - c.state = stateCSSDqStr - return c, i + 1 - case '\'': - c.state = stateCSSSqStr - return c, i + 1 - } - k = i + 1 - } -} - -// tCSSStr is the context transition function for the CSS string and URL states. -func tCSSStr(c context, s []byte) (context, int) { - var endAndEsc string - switch c.state { - case stateCSSDqStr, stateCSSDqURL: - endAndEsc = `\"` - case stateCSSSqStr, stateCSSSqURL: - endAndEsc = `\'` - case stateCSSURL: - // Unquoted URLs end with a newline or close parenthesis. - // The below includes the wc (whitespace character) and nl. - endAndEsc = "\\\t\n\f\r )" - default: - panic(c.state.String()) - } - - k := 0 - for { - i := k + bytes.IndexAny(s[k:], endAndEsc) - if i < k { - c, nread := tURL(c, decodeCSS(s[k:])) - return c, k + nread - } - if s[i] == '\\' { - i++ - if i == len(s) { - return context{ - state: stateError, - err: errorf(ErrPartialEscape, 0, "unfinished escape sequence in CSS string: %q", s), - }, len(s) - } - } else { - c.state = stateCSS - return c, i + 1 - } - c, _ = tURL(c, decodeCSS(s[:i+1])) - k = i + 1 - } -} - -// tError is the context transition function for the error state. -func tError(c context, s []byte) (context, int) { - return c, len(s) -} - -// eatAttrName returns the largest j such that s[i:j] is an attribute name. -// It returns an error if s[i:] does not look like it begins with an -// attribute name, such as encountering a quote mark without a preceding -// equals sign. -func eatAttrName(s []byte, i int) (int, *Error) { - for j := i; j < len(s); j++ { - switch s[j] { - case ' ', '\t', '\n', '\f', '\r', '=', '>': - return j, nil - case '\'', '"', '<': - // These result in a parse warning in HTML5 and are - // indicative of serious problems if seen in an attr - // name in a template. - return -1, errorf(ErrBadHTML, 0, "%q in attribute name: %.32q", s[j:j+1], s) - default: - // No-op. - } - } - return len(s), nil -} - -var elementNameMap = map[string]element{ - "script": elementScript, - "style": elementStyle, - "textarea": elementTextarea, - "title": elementTitle, -} - -// asciiAlpha reports whether c is an ASCII letter. -func asciiAlpha(c byte) bool { - return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' -} - -// asciiAlphaNum reports whether c is an ASCII letter or digit. -func asciiAlphaNum(c byte) bool { - return asciiAlpha(c) || '0' <= c && c <= '9' -} - -// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type. -func eatTagName(s []byte, i int) (int, element) { - if i == len(s) || !asciiAlpha(s[i]) { - return i, elementNone - } - j := i + 1 - for j < len(s) { - x := s[j] - if asciiAlphaNum(x) { - j++ - continue - } - // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y". - if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) { - j += 2 - continue - } - break - } - return j, elementNameMap[strings.ToLower(string(s[i:j]))] -} - -// eatWhiteSpace returns the largest j such that s[i:j] is white space. -func eatWhiteSpace(s []byte, i int) int { - for j := i; j < len(s); j++ { - switch s[j] { - case ' ', '\t', '\n', '\f', '\r': - // No-op. - default: - return j - } - } - return len(s) -} |