// Copyright 2011 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package html is a specialization of template that automates the // construction of safe HTML output. // INCOMPLETE. package html import ( "bytes" "fmt" "html" "os" "strings" "template" "template/parse" ) // Escape rewrites each action in the template to guarantee that the output is // HTML-escaped. func Escape(t *template.Template) (*template.Template, os.Error) { c := escapeList(context{}, t.Tree.Root) if c.errStr != "" { return nil, fmt.Errorf("%s:%d: %s", t.Name(), c.errLine, c.errStr) } if c.state != stateText { return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c) } t.Funcs(funcMap) return t, nil } // funcMap maps command names to functions that render their inputs safe. var funcMap = template.FuncMap{ "exp_template_html_urlfilter": urlFilter, "exp_template_html_jsvalescaper": jsValEscaper, "exp_template_html_jsstrescaper": jsStrEscaper, "exp_template_html_jsregexpescaper": jsRegexpEscaper, } // escape escapes a template node. func escape(c context, n parse.Node) context { switch n := n.(type) { case *parse.ActionNode: return escapeAction(c, n) case *parse.IfNode: return escapeBranch(c, &n.BranchNode, "if") case *parse.ListNode: return escapeList(c, n) case *parse.RangeNode: return escapeBranch(c, &n.BranchNode, "range") case *parse.TextNode: return escapeText(c, n.Text) case *parse.WithNode: return escapeBranch(c, &n.BranchNode, "with") } // TODO: handle a *parse.TemplateNode. Should Escape take a *template.Set? panic("escaping " + n.String() + " is unimplemented") } // escapeAction escapes an action template node. func escapeAction(c context, n *parse.ActionNode) context { s := make([]string, 0, 2) switch c.state { case stateURL: switch c.urlPart { case urlPartNone: s = append(s, "exp_template_html_urlfilter") case urlPartQueryOrFrag: s = append(s, "urlquery") case urlPartPreQuery: s = append(s, "html") case urlPartUnknown: return context{ state: stateError, errLine: n.Line, errStr: fmt.Sprintf("%s appears in an ambiguous URL context", n), } default: panic(c.urlPart.String()) } case stateJS: s = append(s, "exp_template_html_jsvalescaper") if c.delim != delimNone { s = append(s, "html") } case stateJSDqStr, stateJSSqStr: s = append(s, "exp_template_html_jsstrescaper") case stateJSRegexp: s = append(s, "exp_template_html_jsregexpescaper") case stateJSBlockCmt, stateJSLineCmt: return context{ state: stateError, errLine: n.Line, errStr: fmt.Sprintf("%s appears inside a comment", n), } default: s = append(s, "html") } ensurePipelineContains(n.Pipe, s) return c } // ensurePipelineContains ensures that the pipeline has commands with // the identifiers in s in order. // If the pipeline already has some of the sanitizers, do not interfere. // For example, if p is (.X | html) and s is ["escapeJSVal", "html"] then it // has one matching, "html", and one to insert, "escapeJSVal", to produce // (.X | escapeJSVal | html). func ensurePipelineContains(p *parse.PipeNode, s []string) { if len(s) == 0 { return } n := len(p.Cmds) // Find the identifiers at the end of the command chain. idents := p.Cmds for i := n - 1; i >= 0; i-- { if cmd := p.Cmds[i]; len(cmd.Args) != 0 { if _, ok := cmd.Args[0].(*parse.IdentifierNode); ok { continue } } idents = p.Cmds[i+1:] } dups := 0 for _, id := range idents { if s[dups] == (id.Args[0].(*parse.IdentifierNode)).Ident { dups++ if dups == len(s) { return } } } newCmds := make([]*parse.CommandNode, n-len(idents), n+len(s)-dups) copy(newCmds, p.Cmds) // Merge existing identifier commands with the sanitizers needed. for _, id := range idents { i := indexOfStr((id.Args[0].(*parse.IdentifierNode)).Ident, s) if i != -1 { for _, name := range s[:i] { newCmds = append(newCmds, newIdentCmd(name)) } s = s[i+1:] } newCmds = append(newCmds, id) } // Create any remaining sanitizers. for _, name := range s { newCmds = append(newCmds, newIdentCmd(name)) } p.Cmds = newCmds } // indexOfStr is the least i such that strs[i] == s or -1 if s is not in strs. func indexOfStr(s string, strs []string) int { for i, t := range strs { if s == t { return i } } return -1 } // newIdentCmd produces a command containing a single identifier node. func newIdentCmd(identifier string) *parse.CommandNode { return &parse.CommandNode{ NodeType: parse.NodeCommand, Args: []parse.Node{parse.NewIdentifier(identifier)}, } } // join joins the two contexts of a branch template node. The result is an // error context if either of the input contexts are error contexts, or if the // the input contexts differ. func join(a, b context, line int, nodeName string) context { if a.state == stateError { return a } if b.state == stateError { return b } if a.eq(b) { return a } c := a c.urlPart = b.urlPart if c.eq(b) { // The contexts differ only by urlPart. c.urlPart = urlPartUnknown return c } return context{ state: stateError, errLine: line, errStr: fmt.Sprintf("{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), } } // escapeBranch escapes a branch template node: "if", "range" and "with". func escapeBranch(c context, n *parse.BranchNode, nodeName string) context { c0 := escapeList(c, n.List) if nodeName == "range" && c0.state != stateError { // The "true" branch of a "range" node can execute multiple times. // We check that executing n.List once results in the same context // as executing n.List twice. c0 = join(c0, escapeList(c0, n.List), n.Line, nodeName) if c0.state == stateError { // Make clear that this is a problem on loop re-entry // since developers tend to overlook that branch when // debugging templates. c0.errLine = n.Line c0.errStr = "on range loop re-entry: " + c0.errStr return c0 } } c1 := escapeList(c, n.ElseList) return join(c0, c1, n.Line, nodeName) } // escapeList escapes a list template node. func escapeList(c context, n *parse.ListNode) context { if n == nil { return c } for _, m := range n.Nodes { c = escape(c, m) } return c } // delimEnds maps each delim to a string of characters that terminate it. var delimEnds = [...]string{ delimDoubleQuote: `"`, delimSingleQuote: "'", // Determined empirically by running the below in various browsers. // var div = document.createElement("DIV"); // for (var i = 0; i < 0x10000; ++i) { // div.innerHTML = ""; // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) // document.write("

U+" + i.toString(16)); // } delimSpaceOrTagEnd: " \t\n\f\r>", } // escapeText escapes a text template node. func escapeText(c context, s []byte) context { for len(s) > 0 { if c.delim == delimNone { c, s = transitionFunc[c.state](c, s) continue } i := bytes.IndexAny(s, delimEnds[c.delim]) if i == -1 { // Remain inside the attribute. // Decode the value so non-HTML rules can easily handle //