| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package html is a specialization of template that automates the |
| // construction of safe HTML output. |
| // INCOMPLETE. |
| package html |
| |
| import ( |
| "bytes" |
| "fmt" |
| "html" |
| "os" |
| "strings" |
| "template" |
| "template/parse" |
| ) |
| |
| // Escape rewrites each action in the template to guarantee that the output is |
| // HTML-escaped. |
| func Escape(t *template.Template) (*template.Template, os.Error) { |
| c := escapeList(context{}, t.Tree.Root) |
| if c.errStr != "" { |
| return nil, fmt.Errorf("%s:%d: %s", t.Name(), c.errLine, c.errStr) |
| } |
| if c.state != stateText { |
| return nil, fmt.Errorf("%s ends in a non-text context: %v", t.Name(), c) |
| } |
| t.Funcs(funcMap) |
| return t, nil |
| } |
| |
| // funcMap maps command names to functions that render their inputs safe. |
| var funcMap = template.FuncMap{ |
| "exp_template_html_urlfilter": urlFilter, |
| } |
| |
| // escape escapes a template node. |
| func escape(c context, n parse.Node) context { |
| switch n := n.(type) { |
| case *parse.ActionNode: |
| return escapeAction(c, n) |
| case *parse.IfNode: |
| return escapeBranch(c, &n.BranchNode, "if") |
| case *parse.ListNode: |
| return escapeList(c, n) |
| case *parse.RangeNode: |
| return escapeBranch(c, &n.BranchNode, "range") |
| case *parse.TextNode: |
| return escapeText(c, n.Text) |
| case *parse.WithNode: |
| return escapeBranch(c, &n.BranchNode, "with") |
| } |
| // TODO: handle a *parse.TemplateNode. Should Escape take a *template.Set? |
| panic("escaping " + n.String() + " is unimplemented") |
| } |
| |
| // escapeAction escapes an action template node. |
| func escapeAction(c context, n *parse.ActionNode) context { |
| sanitizer := "html" |
| if c.state == stateURL { |
| switch c.urlPart { |
| case urlPartNone: |
| sanitizer = "exp_template_html_urlfilter" |
| case urlPartQueryOrFrag: |
| sanitizer = "urlquery" |
| case urlPartPreQuery: |
| // The default "html" works here. |
| case urlPartUnknown: |
| return context{ |
| state: stateError, |
| errLine: n.Line, |
| errStr: fmt.Sprintf("%s appears in an ambiguous URL context", n), |
| } |
| default: |
| panic(c.urlPart.String()) |
| } |
| } |
| // If the pipe already ends with the sanitizer, do not interfere. |
| if m := len(n.Pipe.Cmds); m != 0 { |
| if last := n.Pipe.Cmds[m-1]; len(last.Args) != 0 { |
| if i, ok := last.Args[0].(*parse.IdentifierNode); ok && i.Ident == sanitizer { |
| return c |
| } |
| } |
| } |
| // Otherwise, append the sanitizer. |
| n.Pipe.Cmds = append(n.Pipe.Cmds, &parse.CommandNode{ |
| NodeType: parse.NodeCommand, |
| Args: []parse.Node{parse.NewIdentifier(sanitizer)}, |
| }) |
| return c |
| } |
| |
| // join joins the two contexts of a branch template node. The result is an |
| // error context if either of the input contexts are error contexts, or if the |
| // the input contexts differ. |
| func join(a, b context, line int, nodeName string) context { |
| if a.state == stateError { |
| return a |
| } |
| if b.state == stateError { |
| return b |
| } |
| if a.eq(b) { |
| return a |
| } |
| |
| c := a |
| c.urlPart = b.urlPart |
| if c.eq(b) { |
| // The contexts differ only by urlPart. |
| c.urlPart = urlPartUnknown |
| return c |
| } |
| |
| return context{ |
| state: stateError, |
| errLine: line, |
| errStr: fmt.Sprintf("{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), |
| } |
| } |
| |
| // escapeBranch escapes a branch template node: "if", "range" and "with". |
| func escapeBranch(c context, n *parse.BranchNode, nodeName string) context { |
| c0 := escapeList(c, n.List) |
| if nodeName == "range" && c0.state != stateError { |
| // The "true" branch of a "range" node can execute multiple times. |
| // We check that executing n.List once results in the same context |
| // as executing n.List twice. |
| c0 = join(c0, escapeList(c0, n.List), n.Line, nodeName) |
| if c0.state == stateError { |
| // Make clear that this is a problem on loop re-entry |
| // since developers tend to overlook that branch when |
| // debugging templates. |
| c0.errLine = n.Line |
| c0.errStr = "on range loop re-entry: " + c0.errStr |
| return c0 |
| } |
| } |
| c1 := escapeList(c, n.ElseList) |
| return join(c0, c1, n.Line, nodeName) |
| } |
| |
| // escapeList escapes a list template node. |
| func escapeList(c context, n *parse.ListNode) context { |
| if n == nil { |
| return c |
| } |
| for _, m := range n.Nodes { |
| c = escape(c, m) |
| } |
| return c |
| } |
| |
| // delimEnds maps each delim to a string of characters that terminate it. |
| var delimEnds = [...]string{ |
| delimDoubleQuote: `"`, |
| delimSingleQuote: "'", |
| // Determined empirically by running the below in various browsers. |
| // var div = document.createElement("DIV"); |
| // for (var i = 0; i < 0x10000; ++i) { |
| // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>"; |
| // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) |
| // document.write("<p>U+" + i.toString(16)); |
| // } |
| delimSpaceOrTagEnd: " \t\n\f\r>", |
| } |
| |
| // escapeText escapes a text template node. |
| func escapeText(c context, s []byte) context { |
| for len(s) > 0 { |
| if c.delim == delimNone { |
| c, s = transitionFunc[c.state](c, s) |
| continue |
| } |
| |
| i := bytes.IndexAny(s, delimEnds[c.delim]) |
| if i == -1 { |
| // Remain inside the attribute. |
| // Decode the value so non-HTML rules can easily handle |
| // <button onclick="alert("Hi!")"> |
| // without having to entity decode token boundaries. |
| d := c.delim |
| c.delim = delimNone |
| c = escapeText(c, []byte(html.UnescapeString(string(s)))) |
| if c.state != stateError { |
| c.delim = d |
| } |
| return c |
| } |
| if c.delim != delimSpaceOrTagEnd { |
| // Consume any quote. |
| i++ |
| } |
| c, s = context{state: stateTag}, s[i:] |
| } |
| return c |
| } |
| |
| // transitionFunc is the array of context transition functions for text nodes. |
| // A transition function takes a context and template text input, and returns |
| // the updated context and any unconsumed text. |
| var transitionFunc = [...]func(context, []byte) (context, []byte){ |
| stateText: tText, |
| stateTag: tTag, |
| stateURL: tURL, |
| stateAttr: tAttr, |
| stateError: tError, |
| } |
| |
| // tText is the context transition function for the text state. |
| func tText(c context, s []byte) (context, []byte) { |
| for { |
| i := bytes.IndexByte(s, '<') |
| if i == -1 || i+1 == len(s) { |
| return c, nil |
| } |
| i++ |
| if s[i] == '/' { |
| if i+1 == len(s) { |
| return c, nil |
| } |
| i++ |
| } |
| j := eatTagName(s, i) |
| if j != i { |
| // We've found an HTML tag. |
| return context{state: stateTag}, s[j:] |
| } |
| s = s[j:] |
| } |
| panic("unreachable") |
| } |
| |
| // tTag is the context transition function for the tag state. |
| func tTag(c context, s []byte) (context, []byte) { |
| // Find the attribute name. |
| attrStart := eatWhiteSpace(s, 0) |
| i, err := eatAttrName(s, attrStart) |
| if err != nil { |
| return context{ |
| state: stateError, |
| errStr: err.String(), |
| }, nil |
| } |
| if i == len(s) { |
| return context{state: stateTag}, nil |
| } |
| state := stateAttr |
| if urlAttr[strings.ToLower(string(s[attrStart:i]))] { |
| state = stateURL |
| } |
| |
| // Look for the start of the value. |
| i = eatWhiteSpace(s, i) |
| if i == len(s) { |
| return context{state: stateTag}, s[i:] |
| } |
| if s[i] == '>' { |
| return context{state: stateText}, s[i+1:] |
| } else if s[i] != '=' { |
| // Possible due to a valueless attribute or '/' in "<input />". |
| return context{state: stateTag}, s[i:] |
| } |
| // Consume the "=". |
| i = eatWhiteSpace(s, i+1) |
| |
| // Find the attribute delimiter. |
| if i < len(s) { |
| switch s[i] { |
| case '\'': |
| return context{state: state, delim: delimSingleQuote}, s[i+1:] |
| case '"': |
| return context{state: state, delim: delimDoubleQuote}, s[i+1:] |
| } |
| } |
| |
| return context{state: state, delim: delimSpaceOrTagEnd}, s[i:] |
| } |
| |
| // tAttr is the context transition function for the attribute state. |
| func tAttr(c context, s []byte) (context, []byte) { |
| return c, nil |
| } |
| |
| // tURL is the context transition function for the URL state. |
| func tURL(c context, s []byte) (context, []byte) { |
| if bytes.IndexAny(s, "#?") >= 0 { |
| c.urlPart = urlPartQueryOrFrag |
| } else if c.urlPart == urlPartNone { |
| c.urlPart = urlPartPreQuery |
| } |
| return c, nil |
| } |
| |
| // tError is the context transition function for the error state. |
| func tError(c context, s []byte) (context, []byte) { |
| return c, nil |
| } |
| |
| // eatAttrName returns the largest j such that s[i:j] is an attribute name. |
| // It returns an error if s[i:] does not look like it begins with an |
| // attribute name, such as encountering a quote mark without a preceding |
| // equals sign. |
| func eatAttrName(s []byte, i int) (int, os.Error) { |
| for j := i; j < len(s); j++ { |
| switch s[j] { |
| case ' ', '\t', '\n', '\f', '\r', '=', '>': |
| return j, nil |
| case '\'', '"', '<': |
| // These result in a parse warning in HTML5 and are |
| // indicative of serious problems if seen in an attr |
| // name in a template. |
| return 0, fmt.Errorf("%q in attribute name: %.32q", s[j:j+1], s) |
| default: |
| // No-op. |
| } |
| } |
| return len(s), nil |
| } |
| |
| // eatTagName returns the largest j such that s[i:j] is a tag name. |
| func eatTagName(s []byte, i int) int { |
| for j := i; j < len(s); j++ { |
| x := s[j] |
| switch { |
| case 'a' <= x && x <= 'z': |
| // No-op. |
| case 'A' <= x && x <= 'Z': |
| // No-op. |
| case '0' <= x && x <= '9' && i != j: |
| // No-op. |
| default: |
| return j |
| } |
| } |
| return len(s) |
| } |
| |
| // eatWhiteSpace returns the largest j such that s[i:j] is white space. |
| func eatWhiteSpace(s []byte, i int) int { |
| for j := i; j < len(s); j++ { |
| switch s[j] { |
| case ' ', '\t', '\n', '\f', '\r': |
| // No-op. |
| default: |
| return j |
| } |
| } |
| return len(s) |
| } |
| |
| // urlAttr is the set of attribute names whose values are URLs. |
| // It consists of all "%URI"-typed attributes from |
| // http://www.w3.org/TR/html4/index/attributes.html |
| // as well as those attributes defined at |
| // http://dev.w3.org/html5/spec/index.html#attributes-1 |
| // whose Value column in that table matches |
| // "Valid [non-empty] URL potentially surrounded by spaces". |
| var urlAttr = map[string]bool{ |
| "action": true, |
| "archive": true, |
| "background": true, |
| "cite": true, |
| "classid": true, |
| "codebase": true, |
| "data": true, |
| "formaction": true, |
| "href": true, |
| "icon": true, |
| "longdesc": true, |
| "manifest": true, |
| "poster": true, |
| "profile": true, |
| "src": true, |
| "usemap": true, |
| } |
| |
| // urlFilter returns the HTML equivalent of its input unless it contains an |
| // unsafe protocol in which case it defangs the entire URL. |
| func urlFilter(args ...interface{}) string { |
| ok := false |
| var s string |
| if len(args) == 1 { |
| s, ok = args[0].(string) |
| } |
| if !ok { |
| s = fmt.Sprint(args...) |
| } |
| i := strings.IndexRune(s, ':') |
| if i >= 0 && strings.IndexRune(s[:i], '/') < 0 { |
| protocol := strings.ToLower(s[:i]) |
| if protocol != "http" && protocol != "https" && protocol != "mailto" { |
| // Return a value that someone investigating a bug |
| // report can put into a search engine. |
| return "#ZgotmplZ" |
| } |
| } |
| // TODO: Once we handle <style>#id { background: url({{.Img}}) }</style> |
| // we will need to stop this from HTML escaping and pipeline sanitizers. |
| return template.HTMLEscapeString(s) |
| } |