// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "bytes" "fmt" "html" "os" "template" "template/parse" ) // Escape rewrites each action in the template to guarantee that the output is // properly escaped. func Escape(t *template.Template) (*template.Template, os.Error) { var s template.Set s.Add(t) if _, err := EscapeSet(&s, t.Name()); err != nil { return nil, err } // TODO: if s contains cloned dependencies due to self-recursion // cross-context, error out. return t, nil } // EscapeSet rewrites the template set to guarantee that the output of any of // the named templates is properly escaped. // Names should include the names of all templates that might be Executed but // need not include helper templates. // If no error is returned, then the named templates have been modified. // Otherwise the named templates have been rendered unusable. func EscapeSet(s *template.Set, names ...string) (*template.Set, os.Error) { if len(names) == 0 { // TODO: Maybe add a method to Set to enumerate template names // and use those instead. return nil, &Error{ErrNoNames, "", 0, "must specify names of top level templates"} } e := newEscaper(s) for _, name := range names { c, _ := e.escapeTree(context{}, name, 0) var err os.Error if c.err != nil { err, c.err.Name = c.err, name } else if c.state != stateText { err = &Error{ErrEndContext, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} } if err != nil { // Prevent execution of unsafe templates. for _, name := range names { if t := s.Template(name); t != nil { t.Tree = nil } } return nil, err } } e.commit() return s, nil } // funcMap maps command names to functions that render their inputs safe. var funcMap = template.FuncMap{ "exp_template_html_attrescaper": attrEscaper, "exp_template_html_commentescaper": commentEscaper, "exp_template_html_cssescaper": cssEscaper, "exp_template_html_cssvaluefilter": cssValueFilter, "exp_template_html_htmlnamefilter": htmlNameFilter, "exp_template_html_htmlescaper": htmlEscaper, "exp_template_html_jsregexpescaper": jsRegexpEscaper, "exp_template_html_jsstrescaper": jsStrEscaper, "exp_template_html_jsvalescaper": jsValEscaper, "exp_template_html_nospaceescaper": htmlNospaceEscaper, "exp_template_html_rcdataescaper": rcdataEscaper, "exp_template_html_urlescaper": urlEscaper, "exp_template_html_urlfilter": urlFilter, "exp_template_html_urlnormalizer": urlNormalizer, } // equivEscapers matches contextual escapers to equivalent template builtins. var equivEscapers = map[string]string{ "exp_template_html_attrescaper": "html", "exp_template_html_htmlescaper": "html", "exp_template_html_nospaceescaper": "html", "exp_template_html_rcdataescaper": "html", "exp_template_html_urlescaper": "urlquery", "exp_template_html_urlnormalizer": "urlquery", } // escaper collects type inferences about templates and changes needed to make // templates injection safe. type escaper struct { // set is the template set being escaped. set *template.Set // output[templateName] is the output context for a templateName that // has been mangled to include its input context. output map[string]context // derived[c.mangle(name)] maps to a template derived from the template // named name templateName for the start context c. derived map[string]*template.Template // called[templateName] is a set of called mangled template names. called map[string]bool // xxxNodeEdits are the accumulated edits to apply during commit. // Such edits are not applied immediately in case a template set // executes a given template in different escaping contexts. actionNodeEdits map[*parse.ActionNode][]string templateNodeEdits map[*parse.TemplateNode]string textNodeEdits map[*parse.TextNode][]byte } // newEscaper creates a blank escaper for the given set. func newEscaper(s *template.Set) *escaper { return &escaper{ s, map[string]context{}, map[string]*template.Template{}, map[string]bool{}, map[*parse.ActionNode][]string{}, map[*parse.TemplateNode]string{}, map[*parse.TextNode][]byte{}, } } // filterFailsafe is an innocuous word that is emitted in place of unsafe values // by sanitizer functions. It is not a keyword in any programming language, // contains no special characters, is not empty, and when it appears in output // it is distinct enough that a developer can find the source of the problem // via a search engine. const filterFailsafe = "ZgotmplZ" // escape escapes a template node. func (e *escaper) escape(c context, n parse.Node) context { switch n := n.(type) { case *parse.ActionNode: return e.escapeAction(c, n) case *parse.IfNode: return e.escapeBranch(c, &n.BranchNode, "if") case *parse.ListNode: return e.escapeList(c, n) case *parse.RangeNode: return e.escapeBranch(c, &n.BranchNode, "range") case *parse.TemplateNode: return e.escapeTemplate(c, n) case *parse.TextNode: return e.escapeText(c, n) case *parse.WithNode: return e.escapeBranch(c, &n.BranchNode, "with") } panic("escaping " + n.String() + " is unimplemented") } // escapeAction escapes an action template node. func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { c = nudge(c) s := make([]string, 0, 3) switch c.state { case stateError: return c case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: switch c.urlPart { case urlPartNone: s = append(s, "exp_template_html_urlfilter") fallthrough case urlPartPreQuery: switch c.state { case stateCSSDqStr, stateCSSSqStr: s = append(s, "exp_template_html_cssescaper") case stateCSSDqURL, stateCSSSqURL, stateCSSURL: s = append(s, "exp_template_html_urlnormalizer") } case urlPartQueryOrFrag: s = append(s, "exp_template_html_urlescaper") case urlPartUnknown: return context{ state: stateError, err: errorf(ErrAmbigContext, n.Line, "%s appears in an ambiguous URL context", n), } default: panic(c.urlPart.String()) } case stateJS: s = append(s, "exp_template_html_jsvalescaper") // A slash after a value starts a div operator. c.jsCtx = jsCtxDivOp case stateJSDqStr, stateJSSqStr: s = append(s, "exp_template_html_jsstrescaper") case stateJSRegexp: s = append(s, "exp_template_html_jsregexpescaper") case stateCSS: s = append(s, "exp_template_html_cssvaluefilter") case stateText: s = append(s, "exp_template_html_htmlescaper") case stateRCDATA: s = append(s, "exp_template_html_rcdataescaper") case stateAttr: // Handled below in delim check. case stateAttrName, stateTag: c.state = stateAttrName s = append(s, "exp_template_html_htmlnamefilter") default: if isComment(c.state) { s = append(s, "exp_template_html_commentescaper") } else { panic("unexpected state " + c.state.String()) } } switch c.delim { case delimNone: // No extra-escaping needed for raw text content. case delimSpaceOrTagEnd: s = append(s, "exp_template_html_nospaceescaper") default: s = append(s, "exp_template_html_attrescaper") } e.editActionNode(n, s) return c } // ensurePipelineContains ensures that the pipeline has commands with // the identifiers in s in order. // If the pipeline already has some of the sanitizers, do not interfere. // For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it // has one matching, "html", and one to insert, "escapeJSVal", to produce // (.X | escapeJSVal | html). func ensurePipelineContains(p *parse.PipeNode, s []string) { if len(s) == 0 { return } n := len(p.Cmds) // Find the identifiers at the end of the command chain. idents := p.Cmds for i := n - 1; i >= 0; i-- { if cmd := p.Cmds[i]; len(cmd.Args) != 0 { if id, ok := cmd.Args[0].(*parse.IdentifierNode); ok { if id.Ident == "noescape" { return } continue } } idents = p.Cmds[i+1:] } dups := 0 for _, id := range idents { if escFnsEq(s[dups], (id.Args[0].(*parse.IdentifierNode)).Ident) { dups++ if dups == len(s) { return } } } newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups) copy(newCmds, p.Cmds) // Merge existing identifier commands with the sanitizers needed. for _, id := range idents { i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s, escFnsEq) if i != -1 { for _, name := range s[:i] { newCmds = append(newCmds, newIdentCmd(name)) } s = s[i+1:] } newCmds = append(newCmds, id) } // Create any remaining sanitizers. for _, name := range s { newCmds = append(newCmds, newIdentCmd(name)) } p.Cmds = newCmds } // indexOfStr is the first i such that eq(s, strs[i]) or -1 if s was not found. func indexOfStr(s string, strs []string, eq func(a, b string) bool) int { for i, t := range strs { if eq(s, t) { return i } } return -1 } // escFnsEq returns whether the two escaping functions are equivalent. func escFnsEq(a, b string) bool { if e := equivEscapers[a]; e != "" { a = e } if e := equivEscapers[b]; e != "" { b = e } return a == b } // newIdentCmd produces a command containing a single identifier node. func newIdentCmd(identifier string) *parse.CommandNode { return &parse.CommandNode{ NodeType: parse.NodeCommand, Args: []parse.Node{parse.NewIdentifier(identifier)}, } } // nudge returns the context that would result from following empty string // transitions from the input context. // For example, parsing: // `90% of the time. e.output[t.Name()] = c return e.escapeListConditionally(c, t.Tree.Root, filter) } // delimEnds maps each delim to a string of characters that terminate it. var delimEnds = [...]string{ delimDoubleQuote: `"`, delimSingleQuote: "'", // Determined empirically by running the below in various browsers. // var div = document.createElement("DIV"); // for (var i = 0; i < 0x10000; ++i) { // div.innerHTML = ""; // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) // document.write("

U+" + i.toString(16)); // } delimSpaceOrTagEnd: " \t\n\f\r>", } // escapeText escapes a text template node. func (e *escaper) escapeText(c context, n *parse.TextNode) context { s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) for i != len(s) { c1, nread := contextAfterText(c, s[i:]) i1 := i + nread if c.state == stateText || c.state == stateRCDATA { end := i1 if c1.state != c.state { for j := end - 1; j >= i; j-- { if s[j] == '<' { end = j break } } } for j := i; j < end; j++ { if s[j] == '<' { b.Write(s[written:j]) b.WriteString("<") written = j + 1 } } } else if isComment(c.state) && c.delim == delimNone { switch c.state { case stateJSBlockCmt: // http://es5.github.com/#x7.4: // "Comments behave like white space and are // discarded except that, if a MultiLineComment // contains a line terminator character, then // the entire comment is considered to be a // LineTerminator for purposes of parsing by // the syntactic grammar." if bytes.IndexAny(s[written:i1], "\n\r\u2028\u2029") != -1 { b.WriteByte('\n') } else { b.WriteByte(' ') } case stateCSSBlockCmt: b.WriteByte(' ') } written = i1 } if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { // Preserve the portion between written and the comment start. cs := i1 - 2 if c1.state == stateHTMLCmt { // "