| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package html is a specialization of template that automates the |
| // construction of safe HTML output. |
| // INCOMPLETE. |
| package html |
| |
| import ( |
| "bytes" |
| "fmt" |
| "os" |
| "strings" |
| "template" |
| "template/parse" |
| ) |
| |
| // Escape rewrites each action in the template to guarantee that the output is |
| // HTML-escaped. |
| func Escape(t *template.Template) (*template.Template, os.Error) { |
| c := escapeList(context{}, t.Tree.Root) |
| if c.errStr != "" { |
| return nil, fmt.Errorf("%s:%d: %s", t.Name(), c.errLine, c.errStr) |
| } |
| return t, nil |
| } |
| |
| // escape escapes a template node. |
| func escape(c context, n parse.Node) context { |
| switch n := n.(type) { |
| case *parse.ActionNode: |
| return escapeAction(c, n) |
| case *parse.IfNode: |
| return escapeBranch(c, &n.BranchNode, "if") |
| case *parse.ListNode: |
| return escapeList(c, n) |
| case *parse.RangeNode: |
| return escapeBranch(c, &n.BranchNode, "range") |
| case *parse.TextNode: |
| return escapeText(c, n) |
| case *parse.WithNode: |
| return escapeBranch(c, &n.BranchNode, "with") |
| } |
| // TODO: handle a *parse.TemplateNode. Should Escape take a *template.Set? |
| panic("escaping " + n.String() + " is unimplemented") |
| } |
| |
| // escapeAction escapes an action template node. |
| func escapeAction(c context, n *parse.ActionNode) context { |
| sanitizer := "html" |
| if c.state == stateURL { |
| sanitizer = "urlquery" |
| } |
| // If the pipe already ends with the sanitizer, do not interfere. |
| if m := len(n.Pipe.Cmds); m != 0 { |
| if last := n.Pipe.Cmds[m-1]; len(last.Args) != 0 { |
| if i, ok := last.Args[0].(*parse.IdentifierNode); ok && i.Ident == sanitizer { |
| return c |
| } |
| } |
| } |
| // Otherwise, append the sanitizer. |
| n.Pipe.Cmds = append(n.Pipe.Cmds, &parse.CommandNode{ |
| NodeType: parse.NodeCommand, |
| Args: []parse.Node{parse.NewIdentifier(sanitizer)}, |
| }) |
| return c |
| } |
| |
| // join joins the two contexts of a branch template node. The result is an |
| // error context if either of the input contexts are error contexts, or if the |
| // the input contexts differ. |
| func join(a, b context, line int, nodeName string) context { |
| if a.state == stateError { |
| return a |
| } |
| if b.state == stateError { |
| return b |
| } |
| if a.eq(b) { |
| return a |
| } |
| return context{ |
| state: stateError, |
| errLine: line, |
| errStr: fmt.Sprintf("{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), |
| } |
| } |
| |
| // escapeBranch escapes a branch template node: "if", "range" and "with". |
| func escapeBranch(c context, n *parse.BranchNode, nodeName string) context { |
| c0 := escapeList(c, n.List) |
| if nodeName == "range" { |
| // The "true" branch of a "range" node can execute multiple times. |
| // We check that executing n.List once results in the same context |
| // as executing n.List twice. |
| c0 = join(c0, escapeList(c0, n.List), n.Line, nodeName) |
| } |
| c1 := escapeList(c, n.ElseList) |
| return join(c0, c1, n.Line, nodeName) |
| } |
| |
| // escapeList escapes a list template node. |
| func escapeList(c context, n *parse.ListNode) context { |
| if n == nil { |
| return c |
| } |
| for _, m := range n.Nodes { |
| c = escape(c, m) |
| } |
| return c |
| } |
| |
| // escapeText escapes a text template node. |
| func escapeText(c context, n *parse.TextNode) context { |
| for s := n.Text; len(s) > 0; { |
| c, s = transitionFunc[c.state](c, s) |
| } |
| return c |
| } |
| |
| // transitionFunc is the array of context transition functions for text nodes. |
| // A transition function takes a context and template text input, and returns |
| // the updated context and any unconsumed text. |
| var transitionFunc = [...]func(context, []byte) (context, []byte){ |
| stateText: tText, |
| stateTag: tTag, |
| stateURL: tURL, |
| stateAttr: tAttr, |
| stateError: tError, |
| } |
| |
| // tText is the context transition function for the text state. |
| func tText(c context, s []byte) (context, []byte) { |
| for { |
| i := bytes.IndexByte(s, '<') |
| if i == -1 || i+1 == len(s) { |
| return c, nil |
| } |
| i++ |
| if s[i] == '/' { |
| if i+1 == len(s) { |
| return c, nil |
| } |
| i++ |
| } |
| j := eatTagName(s, i) |
| if j != i { |
| // We've found an HTML tag. |
| return context{state: stateTag}, s[j:] |
| } |
| s = s[j:] |
| } |
| panic("unreachable") |
| } |
| |
| // tTag is the context transition function for the tag state. |
| func tTag(c context, s []byte) (context, []byte) { |
| // Skip to the end tag, if there is one. |
| i := bytes.IndexByte(s, '>') |
| if i != -1 { |
| return context{state: stateText}, s[i+1:] |
| } |
| |
| // Otherwise, find the attribute name. |
| i = eatWhiteSpace(s, 0) |
| attrStart, i := i, eatAttrName(s, i) |
| if i == len(s) { |
| return context{state: stateTag}, nil |
| } |
| state := stateAttr |
| if urlAttr[strings.ToLower(string(s[attrStart:i]))] { |
| state = stateURL |
| } |
| |
| // Consume the "=". |
| i = eatWhiteSpace(s, i) |
| if i == len(s) || s[i] != '=' { |
| return context{state: stateTag}, s[i:] |
| } |
| i = eatWhiteSpace(s, i+1) |
| |
| // Find the delimiter. |
| if i == len(s) { |
| return context{state: state, delim: delimSpaceOrTagEnd}, nil |
| } |
| switch s[i] { |
| case '\'': |
| return context{state: state, delim: delimSingleQuote}, s[i+1:] |
| case '"': |
| return context{state: state, delim: delimDoubleQuote}, s[i+1:] |
| } |
| |
| // TODO: This shouldn't be an error: `<a b=1 c={{.X}}` should be valid. |
| return context{state: stateError}, nil |
| } |
| |
| // tAttr is the context transition function for the attribute state. |
| func tAttr(c context, s []byte) (context, []byte) { |
| // TODO: look for the delimiter. |
| return c, nil |
| } |
| |
| // tURL is the context transition function for the URL state. |
| func tURL(c context, s []byte) (context, []byte) { |
| // TODO: look for the delimiter. |
| return c, nil |
| } |
| |
| // tError is the context transition function for the error state. |
| func tError(c context, s []byte) (context, []byte) { |
| return c, nil |
| } |
| |
| // eatAttrName returns the largest j such that s[i:j] is an attribute name. |
| func eatAttrName(s []byte, i int) int { |
| for j := i; j < len(s); j++ { |
| switch s[j] { |
| case ' ', '\n', '\r', '\t', '=': |
| return j |
| default: |
| // No-op. |
| } |
| } |
| return len(s) |
| } |
| |
| // eatTagName returns the largest j such that s[i:j] is a tag name. |
| func eatTagName(s []byte, i int) int { |
| for j := i; j < len(s); j++ { |
| x := s[j] |
| switch { |
| case 'a' <= x && x <= 'z': |
| // No-op. |
| case 'A' <= x && x <= 'Z': |
| // No-op. |
| case '0' <= x && x <= '9' && i != j: |
| // No-op. |
| default: |
| return j |
| } |
| } |
| return len(s) |
| } |
| |
| // eatWhiteSpace returns the largest j such that s[i:j] is white space. |
| func eatWhiteSpace(s []byte, i int) int { |
| for j := i; j < len(s); j++ { |
| switch s[j] { |
| case ' ', '\n', '\r', '\t': |
| // No-op. |
| default: |
| return j |
| } |
| } |
| return len(s) |
| } |
| |
| // urlAttr is the set of attribute names whose values are URLs. |
| // It consists of all "%URI"-typed attributes from |
| // http://www.w3.org/TR/html4/index/attributes.html |
| // as well as those attributes defined at |
| // http://dev.w3.org/html5/spec/index.html#attributes-1 |
| // whose Value column in that table matches |
| // "Valid [non-empty] URL potentially surrounded by spaces". |
| var urlAttr = map[string]bool{ |
| "action": true, |
| "archive": true, |
| "background": true, |
| "cite": true, |
| "classid": true, |
| "codebase": true, |
| "data": true, |
| "formaction": true, |
| "href": true, |
| "icon": true, |
| "longdesc": true, |
| "manifest": true, |
| "poster": true, |
| "profile": true, |
| "src": true, |
| "usemap": true, |
| } |