| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package template |
| |
| import ( |
| "bytes" |
| "fmt" |
| "html" |
| "io" |
| "text/template" |
| "text/template/parse" |
| ) |
| |
| // escapeTemplate rewrites the named template, which must be |
| // associated with t, to guarantee that the output of any of the named |
| // templates is properly escaped. If no error is returned, then the named templates have |
| // been modified. Otherwise the named templates have been rendered |
| // unusable. |
| func escapeTemplate(tmpl *Template, node parse.Node, name string) error { |
| c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) |
| var err error |
| if c.err != nil { |
| err, c.err.Name = c.err, name |
| } else if c.state != stateText { |
| err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} |
| } |
| if err != nil { |
| // Prevent execution of unsafe templates. |
| if t := tmpl.set[name]; t != nil { |
| t.escapeErr = err |
| t.text.Tree = nil |
| t.Tree = nil |
| } |
| return err |
| } |
| tmpl.esc.commit() |
| if t := tmpl.set[name]; t != nil { |
| t.escapeErr = escapeOK |
| t.Tree = t.text.Tree |
| } |
| return nil |
| } |
| |
| // evalArgs formats the list of arguments into a string. It is equivalent to |
| // fmt.Sprint(args...), except that it deferences all pointers. |
| func evalArgs(args ...interface{}) string { |
| // Optimization for simple common case of a single string argument. |
| if len(args) == 1 { |
| if s, ok := args[0].(string); ok { |
| return s |
| } |
| } |
| for i, arg := range args { |
| args[i] = indirectToStringerOrError(arg) |
| } |
| return fmt.Sprint(args...) |
| } |
| |
| // funcMap maps command names to functions that render their inputs safe. |
| var funcMap = template.FuncMap{ |
| "_html_template_attrescaper": attrEscaper, |
| "_html_template_commentescaper": commentEscaper, |
| "_html_template_cssescaper": cssEscaper, |
| "_html_template_cssvaluefilter": cssValueFilter, |
| "_html_template_htmlnamefilter": htmlNameFilter, |
| "_html_template_htmlescaper": htmlEscaper, |
| "_html_template_jsregexpescaper": jsRegexpEscaper, |
| "_html_template_jsstrescaper": jsStrEscaper, |
| "_html_template_jsvalescaper": jsValEscaper, |
| "_html_template_nospaceescaper": htmlNospaceEscaper, |
| "_html_template_rcdataescaper": rcdataEscaper, |
| "_html_template_srcsetescaper": srcsetFilterAndEscaper, |
| "_html_template_urlescaper": urlEscaper, |
| "_html_template_urlfilter": urlFilter, |
| "_html_template_urlnormalizer": urlNormalizer, |
| "_eval_args_": evalArgs, |
| } |
| |
| // escaper collects type inferences about templates and changes needed to make |
| // templates injection safe. |
| type escaper struct { |
| // ns is the nameSpace that this escaper is associated with. |
| ns *nameSpace |
| // output[templateName] is the output context for a templateName that |
| // has been mangled to include its input context. |
| output map[string]context |
| // derived[c.mangle(name)] maps to a template derived from the template |
| // named name templateName for the start context c. |
| derived map[string]*template.Template |
| // called[templateName] is a set of called mangled template names. |
| called map[string]bool |
| // xxxNodeEdits are the accumulated edits to apply during commit. |
| // Such edits are not applied immediately in case a template set |
| // executes a given template in different escaping contexts. |
| actionNodeEdits map[*parse.ActionNode][]string |
| templateNodeEdits map[*parse.TemplateNode]string |
| textNodeEdits map[*parse.TextNode][]byte |
| } |
| |
| // makeEscaper creates a blank escaper for the given set. |
| func makeEscaper(n *nameSpace) escaper { |
| return escaper{ |
| n, |
| map[string]context{}, |
| map[string]*template.Template{}, |
| map[string]bool{}, |
| map[*parse.ActionNode][]string{}, |
| map[*parse.TemplateNode]string{}, |
| map[*parse.TextNode][]byte{}, |
| } |
| } |
| |
| // filterFailsafe is an innocuous word that is emitted in place of unsafe values |
| // by sanitizer functions. It is not a keyword in any programming language, |
| // contains no special characters, is not empty, and when it appears in output |
| // it is distinct enough that a developer can find the source of the problem |
| // via a search engine. |
| const filterFailsafe = "ZgotmplZ" |
| |
| // escape escapes a template node. |
| func (e *escaper) escape(c context, n parse.Node) context { |
| switch n := n.(type) { |
| case *parse.ActionNode: |
| return e.escapeAction(c, n) |
| case *parse.IfNode: |
| return e.escapeBranch(c, &n.BranchNode, "if") |
| case *parse.ListNode: |
| return e.escapeList(c, n) |
| case *parse.RangeNode: |
| return e.escapeBranch(c, &n.BranchNode, "range") |
| case *parse.TemplateNode: |
| return e.escapeTemplate(c, n) |
| case *parse.TextNode: |
| return e.escapeText(c, n) |
| case *parse.WithNode: |
| return e.escapeBranch(c, &n.BranchNode, "with") |
| } |
| panic("escaping " + n.String() + " is unimplemented") |
| } |
| |
| // escapeAction escapes an action template node. |
| func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { |
| if len(n.Pipe.Decl) != 0 { |
| // A local variable assignment, not an interpolation. |
| return c |
| } |
| c = nudge(c) |
| // Check for disallowed use of predefined escapers in the pipeline. |
| for pos, idNode := range n.Pipe.Cmds { |
| node, ok := idNode.Args[0].(*parse.IdentifierNode) |
| if !ok { |
| // A predefined escaper "esc" will never be found as an identifier in a |
| // Chain or Field node, since: |
| // - "esc.x ..." is invalid, since predefined escapers return strings, and |
| // strings do not have methods, keys or fields. |
| // - "... .esc" is invalid, since predefined escapers are global functions, |
| // not methods or fields of any types. |
| // Therefore, it is safe to ignore these two node types. |
| continue |
| } |
| ident := node.Ident |
| if _, ok := predefinedEscapers[ident]; ok { |
| if pos < len(n.Pipe.Cmds)-1 || |
| c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { |
| return context{ |
| state: stateError, |
| err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), |
| } |
| } |
| } |
| } |
| s := make([]string, 0, 3) |
| switch c.state { |
| case stateError: |
| return c |
| case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: |
| switch c.urlPart { |
| case urlPartNone: |
| s = append(s, "_html_template_urlfilter") |
| fallthrough |
| case urlPartPreQuery: |
| switch c.state { |
| case stateCSSDqStr, stateCSSSqStr: |
| s = append(s, "_html_template_cssescaper") |
| default: |
| s = append(s, "_html_template_urlnormalizer") |
| } |
| case urlPartQueryOrFrag: |
| s = append(s, "_html_template_urlescaper") |
| case urlPartUnknown: |
| return context{ |
| state: stateError, |
| err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), |
| } |
| default: |
| panic(c.urlPart.String()) |
| } |
| case stateJS: |
| s = append(s, "_html_template_jsvalescaper") |
| // A slash after a value starts a div operator. |
| c.jsCtx = jsCtxDivOp |
| case stateJSDqStr, stateJSSqStr: |
| s = append(s, "_html_template_jsstrescaper") |
| case stateJSRegexp: |
| s = append(s, "_html_template_jsregexpescaper") |
| case stateCSS: |
| s = append(s, "_html_template_cssvaluefilter") |
| case stateText: |
| s = append(s, "_html_template_htmlescaper") |
| case stateRCDATA: |
| s = append(s, "_html_template_rcdataescaper") |
| case stateAttr: |
| // Handled below in delim check. |
| case stateAttrName, stateTag: |
| c.state = stateAttrName |
| s = append(s, "_html_template_htmlnamefilter") |
| case stateSrcset: |
| s = append(s, "_html_template_srcsetescaper") |
| default: |
| if isComment(c.state) { |
| s = append(s, "_html_template_commentescaper") |
| } else { |
| panic("unexpected state " + c.state.String()) |
| } |
| } |
| switch c.delim { |
| case delimNone: |
| // No extra-escaping needed for raw text content. |
| case delimSpaceOrTagEnd: |
| s = append(s, "_html_template_nospaceescaper") |
| default: |
| s = append(s, "_html_template_attrescaper") |
| } |
| e.editActionNode(n, s) |
| return c |
| } |
| |
| // ensurePipelineContains ensures that the pipeline ends with the commands with |
| // the identifiers in s in order. If the pipeline ends with a predefined escaper |
| // (i.e. "html" or "urlquery"), merge it with the identifiers in s. |
| func ensurePipelineContains(p *parse.PipeNode, s []string) { |
| if len(s) == 0 { |
| // Do not rewrite pipeline if we have no escapers to insert. |
| return |
| } |
| // Precondition: p.Cmds contains at most one predefined escaper and the |
| // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is |
| // always true because of the checks in escapeAction. |
| pipelineLen := len(p.Cmds) |
| if pipelineLen > 0 { |
| lastCmd := p.Cmds[pipelineLen-1] |
| if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { |
| if esc := idNode.Ident; predefinedEscapers[esc] { |
| // Pipeline ends with a predefined escaper. |
| if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { |
| // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, |
| // where esc is the predefined escaper, and arg1...argN are its arguments. |
| // Convert this into the equivalent form |
| // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily |
| // merged with the escapers in s. |
| lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) |
| p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) |
| pipelineLen++ |
| } |
| // If any of the commands in s that we are about to insert is equivalent |
| // to the predefined escaper, use the predefined escaper instead. |
| dup := false |
| for i, escaper := range s { |
| if escFnsEq(esc, escaper) { |
| s[i] = idNode.Ident |
| dup = true |
| } |
| } |
| if dup { |
| // The predefined escaper will already be inserted along with the |
| // escapers in s, so do not copy it to the rewritten pipeline. |
| pipelineLen-- |
| } |
| } |
| } |
| } |
| // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. |
| newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) |
| insertedIdents := make(map[string]bool) |
| for i := 0; i < pipelineLen; i++ { |
| cmd := p.Cmds[i] |
| newCmds[i] = cmd |
| if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok { |
| insertedIdents[normalizeEscFn(idNode.Ident)] = true |
| } |
| } |
| for _, name := range s { |
| if !insertedIdents[normalizeEscFn(name)] { |
| // When two templates share an underlying parse tree via the use of |
| // AddParseTree and one template is executed after the other, this check |
| // ensures that escapers that were already inserted into the pipeline on |
| // the first escaping pass do not get inserted again. |
| newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) |
| } |
| } |
| p.Cmds = newCmds |
| } |
| |
| // predefinedEscapers contains template predefined escapers that are equivalent |
| // to some contextual escapers. Keep in sync with equivEscapers. |
| var predefinedEscapers = map[string]bool{ |
| "html": true, |
| "urlquery": true, |
| } |
| |
| // equivEscapers matches contextual escapers to equivalent predefined |
| // template escapers. |
| var equivEscapers = map[string]string{ |
| // The following pairs of HTML escapers provide equivalent security |
| // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. |
| "_html_template_attrescaper": "html", |
| "_html_template_htmlescaper": "html", |
| "_html_template_rcdataescaper": "html", |
| // These two URL escapers produce URLs safe for embedding in a URL query by |
| // percent-encoding all the reserved characters specified in RFC 3986 Section |
| // 2.2 |
| "_html_template_urlescaper": "urlquery", |
| // These two functions are not actually equivalent; urlquery is stricter as it |
| // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer |
| // does not. It is therefore only safe to replace _html_template_urlnormalizer |
| // with urlquery (this happens in ensurePipelineContains), but not the otherI've |
| // way around. We keep this entry around to preserve the behavior of templates |
| // written before Go 1.9, which might depend on this substitution taking place. |
| "_html_template_urlnormalizer": "urlquery", |
| } |
| |
| // escFnsEq reports whether the two escaping functions are equivalent. |
| func escFnsEq(a, b string) bool { |
| return normalizeEscFn(a) == normalizeEscFn(b) |
| } |
| |
| // normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of |
| // escaper functions a and b that are equivalent. |
| func normalizeEscFn(e string) string { |
| if norm := equivEscapers[e]; norm != "" { |
| return norm |
| } |
| return e |
| } |
| |
| // redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) |
| // for all x. |
| var redundantFuncs = map[string]map[string]bool{ |
| "_html_template_commentescaper": { |
| "_html_template_attrescaper": true, |
| "_html_template_nospaceescaper": true, |
| "_html_template_htmlescaper": true, |
| }, |
| "_html_template_cssescaper": { |
| "_html_template_attrescaper": true, |
| }, |
| "_html_template_jsregexpescaper": { |
| "_html_template_attrescaper": true, |
| }, |
| "_html_template_jsstrescaper": { |
| "_html_template_attrescaper": true, |
| }, |
| "_html_template_urlescaper": { |
| "_html_template_urlnormalizer": true, |
| }, |
| } |
| |
| // appendCmd appends the given command to the end of the command pipeline |
| // unless it is redundant with the last command. |
| func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { |
| if n := len(cmds); n != 0 { |
| last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) |
| next, okNext := cmd.Args[0].(*parse.IdentifierNode) |
| if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { |
| return cmds |
| } |
| } |
| return append(cmds, cmd) |
| } |
| |
| // newIdentCmd produces a command containing a single identifier node. |
| func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { |
| return &parse.CommandNode{ |
| NodeType: parse.NodeCommand, |
| Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. |
| } |
| } |
| |
| // nudge returns the context that would result from following empty string |
| // transitions from the input context. |
| // For example, parsing: |
| // `<a href=` |
| // will end in context{stateBeforeValue, attrURL}, but parsing one extra rune: |
| // `<a href=x` |
| // will end in context{stateURL, delimSpaceOrTagEnd, ...}. |
| // There are two transitions that happen when the 'x' is seen: |
| // (1) Transition from a before-value state to a start-of-value state without |
| // consuming any character. |
| // (2) Consume 'x' and transition past the first value character. |
| // In this case, nudging produces the context after (1) happens. |
| func nudge(c context) context { |
| switch c.state { |
| case stateTag: |
| // In `<foo {{.}}`, the action should emit an attribute. |
| c.state = stateAttrName |
| case stateBeforeValue: |
| // In `<foo bar={{.}}`, the action is an undelimited value. |
| c.state, c.delim, c.attr = attrStartStates[c.attr], delimSpaceOrTagEnd, attrNone |
| case stateAfterName: |
| // In `<foo bar {{.}}`, the action is an attribute name. |
| c.state, c.attr = stateAttrName, attrNone |
| } |
| return c |
| } |
| |
| // join joins the two contexts of a branch template node. The result is an |
| // error context if either of the input contexts are error contexts, or if the |
| // input contexts differ. |
| func join(a, b context, node parse.Node, nodeName string) context { |
| if a.state == stateError { |
| return a |
| } |
| if b.state == stateError { |
| return b |
| } |
| if a.eq(b) { |
| return a |
| } |
| |
| c := a |
| c.urlPart = b.urlPart |
| if c.eq(b) { |
| // The contexts differ only by urlPart. |
| c.urlPart = urlPartUnknown |
| return c |
| } |
| |
| c = a |
| c.jsCtx = b.jsCtx |
| if c.eq(b) { |
| // The contexts differ only by jsCtx. |
| c.jsCtx = jsCtxUnknown |
| return c |
| } |
| |
| // Allow a nudged context to join with an unnudged one. |
| // This means that |
| // <p title={{if .C}}{{.}}{{end}} |
| // ends in an unquoted value state even though the else branch |
| // ends in stateBeforeValue. |
| if c, d := nudge(a), nudge(b); !(c.eq(a) && d.eq(b)) { |
| if e := join(c, d, node, nodeName); e.state != stateError { |
| return e |
| } |
| } |
| |
| return context{ |
| state: stateError, |
| err: errorf(ErrBranchEnd, node, 0, "{{%s}} branches end in different contexts: %v, %v", nodeName, a, b), |
| } |
| } |
| |
| // escapeBranch escapes a branch template node: "if", "range" and "with". |
| func (e *escaper) escapeBranch(c context, n *parse.BranchNode, nodeName string) context { |
| c0 := e.escapeList(c, n.List) |
| if nodeName == "range" && c0.state != stateError { |
| // The "true" branch of a "range" node can execute multiple times. |
| // We check that executing n.List once results in the same context |
| // as executing n.List twice. |
| c1, _ := e.escapeListConditionally(c0, n.List, nil) |
| c0 = join(c0, c1, n, nodeName) |
| if c0.state == stateError { |
| // Make clear that this is a problem on loop re-entry |
| // since developers tend to overlook that branch when |
| // debugging templates. |
| c0.err.Line = n.Line |
| c0.err.Description = "on range loop re-entry: " + c0.err.Description |
| return c0 |
| } |
| } |
| c1 := e.escapeList(c, n.ElseList) |
| return join(c0, c1, n, nodeName) |
| } |
| |
| // escapeList escapes a list template node. |
| func (e *escaper) escapeList(c context, n *parse.ListNode) context { |
| if n == nil { |
| return c |
| } |
| for _, m := range n.Nodes { |
| c = e.escape(c, m) |
| } |
| return c |
| } |
| |
| // escapeListConditionally escapes a list node but only preserves edits and |
| // inferences in e if the inferences and output context satisfy filter. |
| // It returns the best guess at an output context, and the result of the filter |
| // which is the same as whether e was updated. |
| func (e *escaper) escapeListConditionally(c context, n *parse.ListNode, filter func(*escaper, context) bool) (context, bool) { |
| e1 := makeEscaper(e.ns) |
| // Make type inferences available to f. |
| for k, v := range e.output { |
| e1.output[k] = v |
| } |
| c = e1.escapeList(c, n) |
| ok := filter != nil && filter(&e1, c) |
| if ok { |
| // Copy inferences and edits from e1 back into e. |
| for k, v := range e1.output { |
| e.output[k] = v |
| } |
| for k, v := range e1.derived { |
| e.derived[k] = v |
| } |
| for k, v := range e1.called { |
| e.called[k] = v |
| } |
| for k, v := range e1.actionNodeEdits { |
| e.editActionNode(k, v) |
| } |
| for k, v := range e1.templateNodeEdits { |
| e.editTemplateNode(k, v) |
| } |
| for k, v := range e1.textNodeEdits { |
| e.editTextNode(k, v) |
| } |
| } |
| return c, ok |
| } |
| |
| // escapeTemplate escapes a {{template}} call node. |
| func (e *escaper) escapeTemplate(c context, n *parse.TemplateNode) context { |
| c, name := e.escapeTree(c, n, n.Name, n.Line) |
| if name != n.Name { |
| e.editTemplateNode(n, name) |
| } |
| return c |
| } |
| |
| // escapeTree escapes the named template starting in the given context as |
| // necessary and returns its output context. |
| func (e *escaper) escapeTree(c context, node parse.Node, name string, line int) (context, string) { |
| // Mangle the template name with the input context to produce a reliable |
| // identifier. |
| dname := c.mangle(name) |
| e.called[dname] = true |
| if out, ok := e.output[dname]; ok { |
| // Already escaped. |
| return out, dname |
| } |
| t := e.template(name) |
| if t == nil { |
| // Two cases: The template exists but is empty, or has never been mentioned at |
| // all. Distinguish the cases in the error messages. |
| if e.ns.set[name] != nil { |
| return context{ |
| state: stateError, |
| err: errorf(ErrNoSuchTemplate, node, line, "%q is an incomplete or empty template", name), |
| }, dname |
| } |
| return context{ |
| state: stateError, |
| err: errorf(ErrNoSuchTemplate, node, line, "no such template %q", name), |
| }, dname |
| } |
| if dname != name { |
| // Use any template derived during an earlier call to escapeTemplate |
| // with different top level templates, or clone if necessary. |
| dt := e.template(dname) |
| if dt == nil { |
| dt = template.New(dname) |
| dt.Tree = &parse.Tree{Name: dname, Root: t.Root.CopyList()} |
| e.derived[dname] = dt |
| } |
| t = dt |
| } |
| return e.computeOutCtx(c, t), dname |
| } |
| |
| // computeOutCtx takes a template and its start context and computes the output |
| // context while storing any inferences in e. |
| func (e *escaper) computeOutCtx(c context, t *template.Template) context { |
| // Propagate context over the body. |
| c1, ok := e.escapeTemplateBody(c, t) |
| if !ok { |
| // Look for a fixed point by assuming c1 as the output context. |
| if c2, ok2 := e.escapeTemplateBody(c1, t); ok2 { |
| c1, ok = c2, true |
| } |
| // Use c1 as the error context if neither assumption worked. |
| } |
| if !ok && c1.state != stateError { |
| return context{ |
| state: stateError, |
| err: errorf(ErrOutputContext, t.Tree.Root, 0, "cannot compute output context for template %s", t.Name()), |
| } |
| } |
| return c1 |
| } |
| |
| // escapeTemplateBody escapes the given template assuming the given output |
| // context, and returns the best guess at the output context and whether the |
| // assumption was correct. |
| func (e *escaper) escapeTemplateBody(c context, t *template.Template) (context, bool) { |
| filter := func(e1 *escaper, c1 context) bool { |
| if c1.state == stateError { |
| // Do not update the input escaper, e. |
| return false |
| } |
| if !e1.called[t.Name()] { |
| // If t is not recursively called, then c1 is an |
| // accurate output context. |
| return true |
| } |
| // c1 is accurate if it matches our assumed output context. |
| return c.eq(c1) |
| } |
| // We need to assume an output context so that recursive template calls |
| // take the fast path out of escapeTree instead of infinitely recursing. |
| // Naively assuming that the input context is the same as the output |
| // works >90% of the time. |
| e.output[t.Name()] = c |
| return e.escapeListConditionally(c, t.Tree.Root, filter) |
| } |
| |
| // delimEnds maps each delim to a string of characters that terminate it. |
| var delimEnds = [...]string{ |
| delimDoubleQuote: `"`, |
| delimSingleQuote: "'", |
| // Determined empirically by running the below in various browsers. |
| // var div = document.createElement("DIV"); |
| // for (var i = 0; i < 0x10000; ++i) { |
| // div.innerHTML = "<span title=x" + String.fromCharCode(i) + "-bar>"; |
| // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) |
| // document.write("<p>U+" + i.toString(16)); |
| // } |
| delimSpaceOrTagEnd: " \t\n\f\r>", |
| } |
| |
| var doctypeBytes = []byte("<!DOCTYPE") |
| |
| // escapeText escapes a text template node. |
| func (e *escaper) escapeText(c context, n *parse.TextNode) context { |
| s, written, i, b := n.Text, 0, 0, new(bytes.Buffer) |
| for i != len(s) { |
| c1, nread := contextAfterText(c, s[i:]) |
| i1 := i + nread |
| if c.state == stateText || c.state == stateRCDATA { |
| end := i1 |
| if c1.state != c.state { |
| for j := end - 1; j >= i; j-- { |
| if s[j] == '<' { |
| end = j |
| break |
| } |
| } |
| } |
| for j := i; j < end; j++ { |
| if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { |
| b.Write(s[written:j]) |
| b.WriteString("<") |
| written = j + 1 |
| } |
| } |
| } else if isComment(c.state) && c.delim == delimNone { |
| switch c.state { |
| case stateJSBlockCmt: |
| // https://es5.github.com/#x7.4: |
| // "Comments behave like white space and are |
| // discarded except that, if a MultiLineComment |
| // contains a line terminator character, then |
| // the entire comment is considered to be a |
| // LineTerminator for purposes of parsing by |
| // the syntactic grammar." |
| if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { |
| b.WriteByte('\n') |
| } else { |
| b.WriteByte(' ') |
| } |
| case stateCSSBlockCmt: |
| b.WriteByte(' ') |
| } |
| written = i1 |
| } |
| if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { |
| // Preserve the portion between written and the comment start. |
| cs := i1 - 2 |
| if c1.state == stateHTMLCmt { |
| // "<!--" instead of "/*" or "//" |
| cs -= 2 |
| } |
| b.Write(s[written:cs]) |
| written = i1 |
| } |
| if i == i1 && c.state == c1.state { |
| panic(fmt.Sprintf("infinite loop from %v to %v on %q..%q", c, c1, s[:i], s[i:])) |
| } |
| c, i = c1, i1 |
| } |
| |
| if written != 0 && c.state != stateError { |
| if !isComment(c.state) || c.delim != delimNone { |
| b.Write(n.Text[written:]) |
| } |
| e.editTextNode(n, b.Bytes()) |
| } |
| return c |
| } |
| |
| // contextAfterText starts in context c, consumes some tokens from the front of |
| // s, then returns the context after those tokens and the unprocessed suffix. |
| func contextAfterText(c context, s []byte) (context, int) { |
| if c.delim == delimNone { |
| c1, i := tSpecialTagEnd(c, s) |
| if i == 0 { |
| // A special end tag (`</script>`) has been seen and |
| // all content preceding it has been consumed. |
| return c1, 0 |
| } |
| // Consider all content up to any end tag. |
| return transitionFunc[c.state](c, s[:i]) |
| } |
| |
| // We are at the beginning of an attribute value. |
| |
| i := bytes.IndexAny(s, delimEnds[c.delim]) |
| if i == -1 { |
| i = len(s) |
| } |
| if c.delim == delimSpaceOrTagEnd { |
| // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state |
| // lists the runes below as error characters. |
| // Error out because HTML parsers may differ on whether |
| // "<a id= onclick=f(" ends inside id's or onclick's value, |
| // "<a class=`foo " ends inside a value, |
| // "<a style=font:'Arial'" needs open-quote fixup. |
| // IE treats '`' as a quotation character. |
| if j := bytes.IndexAny(s[:i], "\"'<=`"); j >= 0 { |
| return context{ |
| state: stateError, |
| err: errorf(ErrBadHTML, nil, 0, "%q in unquoted attr: %q", s[j:j+1], s[:i]), |
| }, len(s) |
| } |
| } |
| if i == len(s) { |
| // Remain inside the attribute. |
| // Decode the value so non-HTML rules can easily handle |
| // <button onclick="alert("Hi!")"> |
| // without having to entity decode token boundaries. |
| for u := []byte(html.UnescapeString(string(s))); len(u) != 0; { |
| c1, i1 := transitionFunc[c.state](c, u) |
| c, u = c1, u[i1:] |
| } |
| return c, len(s) |
| } |
| |
| element := c.element |
| |
| // If this is a non-JS "type" attribute inside "script" tag, do not treat the contents as JS. |
| if c.state == stateAttr && c.element == elementScript && c.attr == attrScriptType && !isJSType(string(s[:i])) { |
| element = elementNone |
| } |
| |
| if c.delim != delimSpaceOrTagEnd { |
| // Consume any quote. |
| i++ |
| } |
| // On exiting an attribute, we discard all state information |
| // except the state and element. |
| return context{state: stateTag, element: element}, i |
| } |
| |
| // editActionNode records a change to an action pipeline for later commit. |
| func (e *escaper) editActionNode(n *parse.ActionNode, cmds []string) { |
| if _, ok := e.actionNodeEdits[n]; ok { |
| panic(fmt.Sprintf("node %s shared between templates", n)) |
| } |
| e.actionNodeEdits[n] = cmds |
| } |
| |
| // editTemplateNode records a change to a {{template}} callee for later commit. |
| func (e *escaper) editTemplateNode(n *parse.TemplateNode, callee string) { |
| if _, ok := e.templateNodeEdits[n]; ok { |
| panic(fmt.Sprintf("node %s shared between templates", n)) |
| } |
| e.templateNodeEdits[n] = callee |
| } |
| |
| // editTextNode records a change to a text node for later commit. |
| func (e *escaper) editTextNode(n *parse.TextNode, text []byte) { |
| if _, ok := e.textNodeEdits[n]; ok { |
| panic(fmt.Sprintf("node %s shared between templates", n)) |
| } |
| e.textNodeEdits[n] = text |
| } |
| |
| // commit applies changes to actions and template calls needed to contextually |
| // autoescape content and adds any derived templates to the set. |
| func (e *escaper) commit() { |
| for name := range e.output { |
| e.template(name).Funcs(funcMap) |
| } |
| // Any template from the name space associated with this escaper can be used |
| // to add derived templates to the underlying text/template name space. |
| tmpl := e.arbitraryTemplate() |
| for _, t := range e.derived { |
| if _, err := tmpl.text.AddParseTree(t.Name(), t.Tree); err != nil { |
| panic("error adding derived template") |
| } |
| } |
| for n, s := range e.actionNodeEdits { |
| ensurePipelineContains(n.Pipe, s) |
| } |
| for n, name := range e.templateNodeEdits { |
| n.Name = name |
| } |
| for n, s := range e.textNodeEdits { |
| n.Text = s |
| } |
| // Reset state that is specific to this commit so that the same changes are |
| // not re-applied to the template on subsequent calls to commit. |
| e.called = make(map[string]bool) |
| e.actionNodeEdits = make(map[*parse.ActionNode][]string) |
| e.templateNodeEdits = make(map[*parse.TemplateNode]string) |
| e.textNodeEdits = make(map[*parse.TextNode][]byte) |
| } |
| |
| // template returns the named template given a mangled template name. |
| func (e *escaper) template(name string) *template.Template { |
| // Any template from the name space associated with this escaper can be used |
| // to look up templates in the underlying text/template name space. |
| t := e.arbitraryTemplate().text.Lookup(name) |
| if t == nil { |
| t = e.derived[name] |
| } |
| return t |
| } |
| |
| // arbitraryTemplate returns an arbitrary template from the name space |
| // associated with e and panics if no templates are found. |
| func (e *escaper) arbitraryTemplate() *Template { |
| for _, t := range e.ns.set { |
| return t |
| } |
| panic("no templates in name space") |
| } |
| |
| // Forwarding functions so that clients need only import this package |
| // to reach the general escaping functions of text/template. |
| |
| // HTMLEscape writes to w the escaped HTML equivalent of the plain text data b. |
| func HTMLEscape(w io.Writer, b []byte) { |
| template.HTMLEscape(w, b) |
| } |
| |
| // HTMLEscapeString returns the escaped HTML equivalent of the plain text data s. |
| func HTMLEscapeString(s string) string { |
| return template.HTMLEscapeString(s) |
| } |
| |
| // HTMLEscaper returns the escaped HTML equivalent of the textual |
| // representation of its arguments. |
| func HTMLEscaper(args ...interface{}) string { |
| return template.HTMLEscaper(args...) |
| } |
| |
| // JSEscape writes to w the escaped JavaScript equivalent of the plain text data b. |
| func JSEscape(w io.Writer, b []byte) { |
| template.JSEscape(w, b) |
| } |
| |
| // JSEscapeString returns the escaped JavaScript equivalent of the plain text data s. |
| func JSEscapeString(s string) string { |
| return template.JSEscapeString(s) |
| } |
| |
| // JSEscaper returns the escaped JavaScript equivalent of the textual |
| // representation of its arguments. |
| func JSEscaper(args ...interface{}) string { |
| return template.JSEscaper(args...) |
| } |
| |
| // URLQueryEscaper returns the escaped value of the textual representation of |
| // its arguments in a form suitable for embedding in a URL query. |
| func URLQueryEscaper(args ...interface{}) string { |
| return template.URLQueryEscaper(args...) |
| } |