| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package template |
| |
| import ( |
| "bytes" |
| "encoding/json" |
| "fmt" |
| "reflect" |
| "strings" |
| "unicode/utf8" |
| ) |
| |
| // jsWhitespace contains all of the JS whitespace characters, as defined |
| // by the \s character class. |
| // See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes. |
| const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff" |
| |
| // nextJSCtx returns the context that determines whether a slash after the |
| // given run of tokens starts a regular expression instead of a division |
| // operator: / or /=. |
| // |
| // This assumes that the token run does not include any string tokens, comment |
| // tokens, regular expression literal tokens, or division operators. |
| // |
| // This fails on some valid but nonsensical JavaScript programs like |
| // "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to |
| // fail on any known useful programs. It is based on the draft |
| // JavaScript 2.0 lexical grammar and requires one token of lookbehind: |
| // https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html |
| func nextJSCtx(s []byte, preceding jsCtx) jsCtx { |
| // Trim all JS whitespace characters |
| s = bytes.TrimRight(s, jsWhitespace) |
| if len(s) == 0 { |
| return preceding |
| } |
| |
| // All cases below are in the single-byte UTF-8 group. |
| switch c, n := s[len(s)-1], len(s); c { |
| case '+', '-': |
| // ++ and -- are not regexp preceders, but + and - are whether |
| // they are used as infix or prefix operators. |
| start := n - 1 |
| // Count the number of adjacent dashes or pluses. |
| for start > 0 && s[start-1] == c { |
| start-- |
| } |
| if (n-start)&1 == 1 { |
| // Reached for trailing minus signs since "---" is the |
| // same as "-- -". |
| return jsCtxRegexp |
| } |
| return jsCtxDivOp |
| case '.': |
| // Handle "42." |
| if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { |
| return jsCtxDivOp |
| } |
| return jsCtxRegexp |
| // Suffixes for all punctuators from section 7.7 of the language spec |
| // that only end binary operators not handled above. |
| case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': |
| return jsCtxRegexp |
| // Suffixes for all punctuators from section 7.7 of the language spec |
| // that are prefix operators not handled above. |
| case '!', '~': |
| return jsCtxRegexp |
| // Matches all the punctuators from section 7.7 of the language spec |
| // that are open brackets not handled above. |
| case '(', '[': |
| return jsCtxRegexp |
| // Matches all the punctuators from section 7.7 of the language spec |
| // that precede expression starts. |
| case ':', ';', '{': |
| return jsCtxRegexp |
| // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and |
| // are handled in the default except for '}' which can precede a |
| // division op as in |
| // ({ valueOf: function () { return 42 } } / 2 |
| // which is valid, but, in practice, developers don't divide object |
| // literals, so our heuristic works well for code like |
| // function () { ... } /foo/.test(x) && sideEffect(); |
| // The ')' punctuator can precede a regular expression as in |
| // if (b) /foo/.test(x) && ... |
| // but this is much less likely than |
| // (a + b) / c |
| case '}': |
| return jsCtxRegexp |
| default: |
| // Look for an IdentifierName and see if it is a keyword that |
| // can precede a regular expression. |
| j := n |
| for j > 0 && isJSIdentPart(rune(s[j-1])) { |
| j-- |
| } |
| if regexpPrecederKeywords[string(s[j:])] { |
| return jsCtxRegexp |
| } |
| } |
| // Otherwise is a punctuator not listed above, or |
| // a string which precedes a div op, or an identifier |
| // which precedes a div op. |
| return jsCtxDivOp |
| } |
| |
| // regexpPrecederKeywords is a set of reserved JS keywords that can precede a |
| // regular expression in JS source. |
| var regexpPrecederKeywords = map[string]bool{ |
| "break": true, |
| "case": true, |
| "continue": true, |
| "delete": true, |
| "do": true, |
| "else": true, |
| "finally": true, |
| "in": true, |
| "instanceof": true, |
| "return": true, |
| "throw": true, |
| "try": true, |
| "typeof": true, |
| "void": true, |
| } |
| |
| var jsonMarshalType = reflect.TypeFor[json.Marshaler]() |
| |
| // indirectToJSONMarshaler returns the value, after dereferencing as many times |
| // as necessary to reach the base type (or nil) or an implementation of json.Marshal. |
| func indirectToJSONMarshaler(a any) any { |
| // text/template now supports passing untyped nil as a func call |
| // argument, so we must support it. Otherwise we'd panic below, as one |
| // cannot call the Type or Interface methods on an invalid |
| // reflect.Value. See golang.org/issue/18716. |
| if a == nil { |
| return nil |
| } |
| |
| v := reflect.ValueOf(a) |
| for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { |
| v = v.Elem() |
| } |
| return v.Interface() |
| } |
| |
| // jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has |
| // neither side-effects nor free variables outside (NaN, Infinity). |
| func jsValEscaper(args ...any) string { |
| var a any |
| if len(args) == 1 { |
| a = indirectToJSONMarshaler(args[0]) |
| switch t := a.(type) { |
| case JS: |
| return string(t) |
| case JSStr: |
| // TODO: normalize quotes. |
| return `"` + string(t) + `"` |
| case json.Marshaler: |
| // Do not treat as a Stringer. |
| case fmt.Stringer: |
| a = t.String() |
| } |
| } else { |
| for i, arg := range args { |
| args[i] = indirectToJSONMarshaler(arg) |
| } |
| a = fmt.Sprint(args...) |
| } |
| // TODO: detect cycles before calling Marshal which loops infinitely on |
| // cyclic data. This may be an unacceptable DoS risk. |
| b, err := json.Marshal(a) |
| if err != nil { |
| // While the standard JSON marshaller does not include user controlled |
| // information in the error message, if a type has a MarshalJSON method, |
| // the content of the error message is not guaranteed. Since we insert |
| // the error into the template, as part of a comment, we attempt to |
| // prevent the error from either terminating the comment, or the script |
| // block itself. |
| // |
| // In particular we: |
| // * replace "*/" comment end tokens with "* /", which does not |
| // terminate the comment |
| // * replace "</script" with "\x3C/script", and "<!--" with |
| // "\x3C!--", which prevents confusing script block termination |
| // semantics |
| // |
| // We also put a space before the comment so that if it is flush against |
| // a division operator it is not turned into a line comment: |
| // x/{{y}} |
| // turning into |
| // x//* error marshaling y: |
| // second line of error message */null |
| errStr := err.Error() |
| errStr = strings.ReplaceAll(errStr, "*/", "* /") |
| errStr = strings.ReplaceAll(errStr, "</script", `\x3C/script`) |
| errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`) |
| return fmt.Sprintf(" /* %s */null ", errStr) |
| } |
| |
| // TODO: maybe post-process output to prevent it from containing |
| // "<!--", "-->", "<![CDATA[", "]]>", or "</script" |
| // in case custom marshalers produce output containing those. |
| // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper |
| // supports ld+json content-type. |
| if len(b) == 0 { |
| // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should |
| // not cause the output `x=y/*z`. |
| return " null " |
| } |
| first, _ := utf8.DecodeRune(b) |
| last, _ := utf8.DecodeLastRune(b) |
| var buf strings.Builder |
| // Prevent IdentifierNames and NumericLiterals from running into |
| // keywords: in, instanceof, typeof, void |
| pad := isJSIdentPart(first) || isJSIdentPart(last) |
| if pad { |
| buf.WriteByte(' ') |
| } |
| written := 0 |
| // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 |
| // so it falls within the subset of JSON which is valid JS. |
| for i := 0; i < len(b); { |
| rune, n := utf8.DecodeRune(b[i:]) |
| repl := "" |
| if rune == 0x2028 { |
| repl = `\u2028` |
| } else if rune == 0x2029 { |
| repl = `\u2029` |
| } |
| if repl != "" { |
| buf.Write(b[written:i]) |
| buf.WriteString(repl) |
| written = i + n |
| } |
| i += n |
| } |
| if buf.Len() != 0 { |
| buf.Write(b[written:]) |
| if pad { |
| buf.WriteByte(' ') |
| } |
| return buf.String() |
| } |
| return string(b) |
| } |
| |
| // jsStrEscaper produces a string that can be included between quotes in |
| // JavaScript source, in JavaScript embedded in an HTML5 <script> element, |
| // or in an HTML5 event handler attribute such as onclick. |
| func jsStrEscaper(args ...any) string { |
| s, t := stringify(args...) |
| if t == contentTypeJSStr { |
| return replace(s, jsStrNormReplacementTable) |
| } |
| return replace(s, jsStrReplacementTable) |
| } |
| |
| func jsTmplLitEscaper(args ...any) string { |
| s, _ := stringify(args...) |
| return replace(s, jsBqStrReplacementTable) |
| } |
| |
| // jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression |
| // specials so the result is treated literally when included in a regular |
| // expression literal. /foo{{.X}}bar/ matches the string "foo" followed by |
| // the literal text of {{.X}} followed by the string "bar". |
| func jsRegexpEscaper(args ...any) string { |
| s, _ := stringify(args...) |
| s = replace(s, jsRegexpReplacementTable) |
| if s == "" { |
| // /{{.X}}/ should not produce a line comment when .X == "". |
| return "(?:)" |
| } |
| return s |
| } |
| |
| // replace replaces each rune r of s with replacementTable[r], provided that |
| // r < len(replacementTable). If replacementTable[r] is the empty string then |
| // no replacement is made. |
| // It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and |
| // `\u2029`. |
| func replace(s string, replacementTable []string) string { |
| var b strings.Builder |
| r, w, written := rune(0), 0, 0 |
| for i := 0; i < len(s); i += w { |
| // See comment in htmlEscaper. |
| r, w = utf8.DecodeRuneInString(s[i:]) |
| var repl string |
| switch { |
| case int(r) < len(lowUnicodeReplacementTable): |
| repl = lowUnicodeReplacementTable[r] |
| case int(r) < len(replacementTable) && replacementTable[r] != "": |
| repl = replacementTable[r] |
| case r == '\u2028': |
| repl = `\u2028` |
| case r == '\u2029': |
| repl = `\u2029` |
| default: |
| continue |
| } |
| if written == 0 { |
| b.Grow(len(s)) |
| } |
| b.WriteString(s[written:i]) |
| b.WriteString(repl) |
| written = i + w |
| } |
| if written == 0 { |
| return s |
| } |
| b.WriteString(s[written:]) |
| return b.String() |
| } |
| |
| var lowUnicodeReplacementTable = []string{ |
| 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, |
| '\a': `\u0007`, |
| '\b': `\u0008`, |
| '\t': `\t`, |
| '\n': `\n`, |
| '\v': `\u000b`, // "\v" == "v" on IE 6. |
| '\f': `\f`, |
| '\r': `\r`, |
| 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, |
| 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, |
| 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, |
| } |
| |
| var jsStrReplacementTable = []string{ |
| 0: `\u0000`, |
| '\t': `\t`, |
| '\n': `\n`, |
| '\v': `\u000b`, // "\v" == "v" on IE 6. |
| '\f': `\f`, |
| '\r': `\r`, |
| // Encode HTML specials as hex so the output can be embedded |
| // in HTML attributes without further encoding. |
| '"': `\u0022`, |
| '`': `\u0060`, |
| '&': `\u0026`, |
| '\'': `\u0027`, |
| '+': `\u002b`, |
| '/': `\/`, |
| '<': `\u003c`, |
| '>': `\u003e`, |
| '\\': `\\`, |
| } |
| |
| // jsBqStrReplacementTable is like jsStrReplacementTable except it also contains |
| // the special characters for JS template literals: $, {, and }. |
| var jsBqStrReplacementTable = []string{ |
| 0: `\u0000`, |
| '\t': `\t`, |
| '\n': `\n`, |
| '\v': `\u000b`, // "\v" == "v" on IE 6. |
| '\f': `\f`, |
| '\r': `\r`, |
| // Encode HTML specials as hex so the output can be embedded |
| // in HTML attributes without further encoding. |
| '"': `\u0022`, |
| '`': `\u0060`, |
| '&': `\u0026`, |
| '\'': `\u0027`, |
| '+': `\u002b`, |
| '/': `\/`, |
| '<': `\u003c`, |
| '>': `\u003e`, |
| '\\': `\\`, |
| '$': `\u0024`, |
| '{': `\u007b`, |
| '}': `\u007d`, |
| } |
| |
| // jsStrNormReplacementTable is like jsStrReplacementTable but does not |
| // overencode existing escapes since this table has no entry for `\`. |
| var jsStrNormReplacementTable = []string{ |
| 0: `\u0000`, |
| '\t': `\t`, |
| '\n': `\n`, |
| '\v': `\u000b`, // "\v" == "v" on IE 6. |
| '\f': `\f`, |
| '\r': `\r`, |
| // Encode HTML specials as hex so the output can be embedded |
| // in HTML attributes without further encoding. |
| '"': `\u0022`, |
| '&': `\u0026`, |
| '\'': `\u0027`, |
| '`': `\u0060`, |
| '+': `\u002b`, |
| '/': `\/`, |
| '<': `\u003c`, |
| '>': `\u003e`, |
| } |
| var jsRegexpReplacementTable = []string{ |
| 0: `\u0000`, |
| '\t': `\t`, |
| '\n': `\n`, |
| '\v': `\u000b`, // "\v" == "v" on IE 6. |
| '\f': `\f`, |
| '\r': `\r`, |
| // Encode HTML specials as hex so the output can be embedded |
| // in HTML attributes without further encoding. |
| '"': `\u0022`, |
| '$': `\$`, |
| '&': `\u0026`, |
| '\'': `\u0027`, |
| '(': `\(`, |
| ')': `\)`, |
| '*': `\*`, |
| '+': `\u002b`, |
| '-': `\-`, |
| '.': `\.`, |
| '/': `\/`, |
| '<': `\u003c`, |
| '>': `\u003e`, |
| '?': `\?`, |
| '[': `\[`, |
| '\\': `\\`, |
| ']': `\]`, |
| '^': `\^`, |
| '{': `\{`, |
| '|': `\|`, |
| '}': `\}`, |
| } |
| |
| // isJSIdentPart reports whether the given rune is a JS identifier part. |
| // It does not handle all the non-Latin letters, joiners, and combining marks, |
| // but it does handle every codepoint that can occur in a numeric literal or |
| // a keyword. |
| func isJSIdentPart(r rune) bool { |
| switch { |
| case r == '$': |
| return true |
| case '0' <= r && r <= '9': |
| return true |
| case 'A' <= r && r <= 'Z': |
| return true |
| case r == '_': |
| return true |
| case 'a' <= r && r <= 'z': |
| return true |
| } |
| return false |
| } |
| |
| // isJSType reports whether the given MIME type should be considered JavaScript. |
| // |
| // It is used to determine whether a script tag with a type attribute is a javascript container. |
| func isJSType(mimeType string) bool { |
| // per |
| // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type |
| // https://tools.ietf.org/html/rfc7231#section-3.1.1 |
| // https://tools.ietf.org/html/rfc4329#section-3 |
| // https://www.ietf.org/rfc/rfc4627.txt |
| // discard parameters |
| mimeType, _, _ = strings.Cut(mimeType, ";") |
| mimeType = strings.ToLower(mimeType) |
| mimeType = strings.TrimSpace(mimeType) |
| switch mimeType { |
| case |
| "application/ecmascript", |
| "application/javascript", |
| "application/json", |
| "application/ld+json", |
| "application/x-ecmascript", |
| "application/x-javascript", |
| "module", |
| "text/ecmascript", |
| "text/javascript", |
| "text/javascript1.0", |
| "text/javascript1.1", |
| "text/javascript1.2", |
| "text/javascript1.3", |
| "text/javascript1.4", |
| "text/javascript1.5", |
| "text/jscript", |
| "text/livescript", |
| "text/x-ecmascript", |
| "text/x-javascript": |
| return true |
| default: |
| return false |
| } |
| } |