Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
Rob Pike | 6ab6c49 | 2011-11-08 15:38:47 -0800 | [diff] [blame] | 5 | package template |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 6 | |
| 7 | import ( |
| 8 | "bytes" |
| 9 | "fmt" |
Mike Samuel | 967d68c | 2011-09-23 09:25:10 -0700 | [diff] [blame] | 10 | "strings" |
Rob Pike | 6ab6c49 | 2011-11-08 15:38:47 -0800 | [diff] [blame] | 11 | "unicode/utf8" |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 12 | ) |
| 13 | |
| 14 | // htmlNospaceEscaper escapes for inclusion in unquoted attribute values. |
| 15 | func htmlNospaceEscaper(args ...interface{}) string { |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 16 | s, t := stringify(args...) |
| 17 | if t == contentTypeHTML { |
| 18 | return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false) |
| 19 | } |
| 20 | return htmlReplacer(s, htmlNospaceReplacementTable, false) |
| 21 | } |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 22 | |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 23 | // attrEscaper escapes for inclusion in quoted attribute values. |
| 24 | func attrEscaper(args ...interface{}) string { |
| 25 | s, t := stringify(args...) |
| 26 | if t == contentTypeHTML { |
| 27 | return htmlReplacer(stripTags(s), htmlNormReplacementTable, true) |
| 28 | } |
| 29 | return htmlReplacer(s, htmlReplacementTable, true) |
| 30 | } |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 31 | |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 32 | // rcdataEscaper escapes for inclusion in an RCDATA element body. |
| 33 | func rcdataEscaper(args ...interface{}) string { |
| 34 | s, t := stringify(args...) |
| 35 | if t == contentTypeHTML { |
| 36 | return htmlReplacer(s, htmlNormReplacementTable, true) |
| 37 | } |
| 38 | return htmlReplacer(s, htmlReplacementTable, true) |
| 39 | } |
| 40 | |
| 41 | // htmlEscaper escapes for inclusion in HTML text. |
| 42 | func htmlEscaper(args ...interface{}) string { |
| 43 | s, t := stringify(args...) |
| 44 | if t == contentTypeHTML { |
| 45 | return s |
| 46 | } |
| 47 | return htmlReplacer(s, htmlReplacementTable, true) |
| 48 | } |
| 49 | |
| 50 | // htmlReplacementTable contains the runes that need to be escaped |
| 51 | // inside a quoted attribute value or in a text node. |
| 52 | var htmlReplacementTable = []string{ |
Shenghou Ma | 16b9550 | 2014-04-01 02:57:51 -0400 | [diff] [blame] | 53 | // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 54 | // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT |
| 55 | // CHARACTER character to the current attribute's value. |
| 56 | // " |
| 57 | // and similarly |
Shenghou Ma | 16b9550 | 2014-04-01 02:57:51 -0400 | [diff] [blame] | 58 | // http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 59 | 0: "\uFFFD", |
| 60 | '"': """, |
| 61 | '&': "&", |
| 62 | '\'': "'", |
| 63 | '+': "+", |
| 64 | '<': "<", |
| 65 | '>': ">", |
| 66 | } |
| 67 | |
| 68 | // htmlNormReplacementTable is like htmlReplacementTable but without '&' to |
| 69 | // avoid over-encoding existing entities. |
| 70 | var htmlNormReplacementTable = []string{ |
| 71 | 0: "\uFFFD", |
| 72 | '"': """, |
| 73 | '\'': "'", |
| 74 | '+': "+", |
| 75 | '<': "<", |
| 76 | '>': ">", |
| 77 | } |
| 78 | |
| 79 | // htmlNospaceReplacementTable contains the runes that need to be escaped |
| 80 | // inside an unquoted attribute value. |
| 81 | // The set of runes escaped is the union of the HTML specials and |
| 82 | // those determined by running the JS below in browsers: |
| 83 | // <div id=d></div> |
| 84 | // <script>(function () { |
| 85 | // var a = [], d = document.getElementById("d"), i, c, s; |
| 86 | // for (i = 0; i < 0x10000; ++i) { |
| 87 | // c = String.fromCharCode(i); |
| 88 | // d.innerHTML = "<span title=" + c + "lt" + c + "></span>" |
| 89 | // s = d.getElementsByTagName("SPAN")[0]; |
| 90 | // if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); } |
| 91 | // } |
| 92 | // document.write(a.join(", ")); |
| 93 | // })()</script> |
| 94 | var htmlNospaceReplacementTable = []string{ |
| 95 | 0: "�", |
| 96 | '\t': "	", |
| 97 | '\n': " ", |
| 98 | '\v': "", |
| 99 | '\f': "", |
| 100 | '\r': " ", |
| 101 | ' ': " ", |
| 102 | '"': """, |
| 103 | '&': "&", |
| 104 | '\'': "'", |
| 105 | '+': "+", |
| 106 | '<': "<", |
| 107 | '=': "=", |
| 108 | '>': ">", |
Robert Griesemer | 465b9c3 | 2012-10-30 13:38:01 -0700 | [diff] [blame] | 109 | // A parse error in the attribute value (unquoted) and |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 110 | // before attribute value states. |
| 111 | // Treated as a quoting character by IE. |
| 112 | '`': "`", |
| 113 | } |
| 114 | |
| 115 | // htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but |
| 116 | // without '&' to avoid over-encoding existing entities. |
| 117 | var htmlNospaceNormReplacementTable = []string{ |
| 118 | 0: "�", |
| 119 | '\t': "	", |
| 120 | '\n': " ", |
| 121 | '\v': "", |
| 122 | '\f': "", |
| 123 | '\r': " ", |
| 124 | ' ': " ", |
| 125 | '"': """, |
| 126 | '\'': "'", |
| 127 | '+': "+", |
| 128 | '<': "<", |
| 129 | '=': "=", |
| 130 | '>': ">", |
Robert Griesemer | 465b9c3 | 2012-10-30 13:38:01 -0700 | [diff] [blame] | 131 | // A parse error in the attribute value (unquoted) and |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 132 | // before attribute value states. |
| 133 | // Treated as a quoting character by IE. |
| 134 | '`': "`", |
| 135 | } |
| 136 | |
Scott Lawrence | 30f9c99 | 2012-03-05 10:58:43 +1100 | [diff] [blame] | 137 | // htmlReplacer returns s with runes replaced according to replacementTable |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 138 | // and when badRunes is true, certain bad runes are allowed through unescaped. |
| 139 | func htmlReplacer(s string, replacementTable []string, badRunes bool) string { |
| 140 | written, b := 0, new(bytes.Buffer) |
Didier Spezia | a1c1a76 | 2015-05-14 22:36:59 +0000 | [diff] [blame] | 141 | r, w := rune(0), 0 |
| 142 | for i := 0; i < len(s); i += w { |
| 143 | // Cannot use 'for range s' because we need to preserve the width |
| 144 | // of the runes in the input. If we see a decoding error, the input |
| 145 | // width will not be utf8.Runelen(r) and we will overrun the buffer. |
| 146 | r, w = utf8.DecodeRuneInString(s[i:]) |
Russ Cox | 4911622 | 2011-10-25 22:22:26 -0700 | [diff] [blame] | 147 | if int(r) < len(replacementTable) { |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 148 | if repl := replacementTable[r]; len(repl) != 0 { |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 149 | b.WriteString(s[written:i]) |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 150 | b.WriteString(repl) |
Didier Spezia | a1c1a76 | 2015-05-14 22:36:59 +0000 | [diff] [blame] | 151 | written = i + w |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 152 | } |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 153 | } else if badRunes { |
| 154 | // No-op. |
| 155 | // IE does not allow these ranges in unquoted attrs. |
| 156 | } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { |
| 157 | fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) |
Didier Spezia | a1c1a76 | 2015-05-14 22:36:59 +0000 | [diff] [blame] | 158 | written = i + w |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 159 | } |
Mike Samuel | 4670d9e | 2011-09-09 07:18:20 +1000 | [diff] [blame] | 160 | } |
| 161 | if written == 0 { |
| 162 | return s |
| 163 | } |
| 164 | b.WriteString(s[written:]) |
| 165 | return b.String() |
| 166 | } |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 167 | |
| 168 | // stripTags takes a snippet of HTML and returns only the text content. |
| 169 | // For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `. |
| 170 | func stripTags(html string) string { |
| 171 | var b bytes.Buffer |
Mike Samuel | 1f577d2 | 2011-09-21 19:04:41 -0700 | [diff] [blame] | 172 | s, c, i, allText := []byte(html), context{}, 0, true |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 173 | // Using the transition funcs helps us avoid mangling |
| 174 | // `<div title="1>2">` or `I <3 Ponies!`. |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 175 | for i != len(s) { |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 176 | if c.delim == delimNone { |
Mike Samuel | 1f577d2 | 2011-09-21 19:04:41 -0700 | [diff] [blame] | 177 | st := c.state |
| 178 | // Use RCDATA instead of parsing into JS or CSS styles. |
| 179 | if c.element != elementNone && !isInTag(st) { |
| 180 | st = stateRCDATA |
| 181 | } |
| 182 | d, nread := transitionFunc[st](c, s[i:]) |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 183 | i1 := i + nread |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 184 | if c.state == stateText || c.state == stateRCDATA { |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 185 | // Emit text up to the start of the tag or comment. |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 186 | j := i1 |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 187 | if d.state != c.state { |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 188 | for j1 := j - 1; j1 >= i; j1-- { |
| 189 | if s[j1] == '<' { |
| 190 | j = j1 |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 191 | break |
| 192 | } |
| 193 | } |
| 194 | } |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 195 | b.Write(s[i:j]) |
Mike Samuel | 1f577d2 | 2011-09-21 19:04:41 -0700 | [diff] [blame] | 196 | } else { |
| 197 | allText = false |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 198 | } |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 199 | c, i = d, i1 |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 200 | continue |
| 201 | } |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 202 | i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim]) |
| 203 | if i1 < i { |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 204 | break |
| 205 | } |
| 206 | if c.delim != delimSpaceOrTagEnd { |
| 207 | // Consume any quote. |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 208 | i1++ |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 209 | } |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 210 | c, i = context{state: stateTag, element: c.element}, i1 |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 211 | } |
Mike Samuel | 1f577d2 | 2011-09-21 19:04:41 -0700 | [diff] [blame] | 212 | if allText { |
| 213 | return html |
| 214 | } else if c.state == stateText || c.state == stateRCDATA { |
Mike Samuel | 3a013f1 | 2011-09-19 20:52:14 -0700 | [diff] [blame] | 215 | b.Write(s[i:]) |
Mike Samuel | ce008f8 | 2011-09-15 08:51:55 -0700 | [diff] [blame] | 216 | } |
| 217 | return b.String() |
| 218 | } |
Mike Samuel | b4e1ca2 | 2011-09-18 19:10:15 -0700 | [diff] [blame] | 219 | |
| 220 | // htmlNameFilter accepts valid parts of an HTML attribute or tag name or |
| 221 | // a known-safe HTML attribute. |
| 222 | func htmlNameFilter(args ...interface{}) string { |
| 223 | s, t := stringify(args...) |
| 224 | if t == contentTypeHTMLAttr { |
| 225 | return s |
| 226 | } |
Mike Samuel | 967d68c | 2011-09-23 09:25:10 -0700 | [diff] [blame] | 227 | if len(s) == 0 { |
| 228 | // Avoid violation of structure preservation. |
| 229 | // <input checked {{.K}}={{.V}}>. |
| 230 | // Without this, if .K is empty then .V is the value of |
| 231 | // checked, but otherwise .V is the value of the attribute |
| 232 | // named .K. |
| 233 | return filterFailsafe |
| 234 | } |
| 235 | s = strings.ToLower(s) |
Mike Samuel | f17e3d2 | 2011-09-28 14:07:48 -0700 | [diff] [blame] | 236 | if t := attrType(s); t != contentTypePlain { |
Mike Samuel | 967d68c | 2011-09-23 09:25:10 -0700 | [diff] [blame] | 237 | // TODO: Split attr and element name part filters so we can whitelist |
| 238 | // attributes. |
| 239 | return filterFailsafe |
| 240 | } |
Mike Samuel | b4e1ca2 | 2011-09-18 19:10:15 -0700 | [diff] [blame] | 241 | for _, r := range s { |
| 242 | switch { |
| 243 | case '0' <= r && r <= '9': |
Mike Samuel | b4e1ca2 | 2011-09-18 19:10:15 -0700 | [diff] [blame] | 244 | case 'a' <= r && r <= 'z': |
| 245 | default: |
| 246 | return filterFailsafe |
| 247 | } |
| 248 | } |
| 249 | return s |
| 250 | } |
Mike Samuel | 8bc5ef6 | 2011-09-19 19:52:31 -0700 | [diff] [blame] | 251 | |
| 252 | // commentEscaper returns the empty string regardless of input. |
| 253 | // Comment content does not correspond to any parsed structure or |
| 254 | // human-readable content, so the simplest and most secure policy is to drop |
| 255 | // content interpolated into comments. |
| 256 | // This approach is equally valid whether or not static comment content is |
| 257 | // removed from the template. |
| 258 | func commentEscaper(args ...interface{}) string { |
| 259 | return "" |
| 260 | } |