blob: de4aa4abb26efcc415adc45ddc0f2312aedb09ac [file] [log] [blame]
Mike Samuel4670d9e2011-09-09 07:18:20 +10001// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
Rob Pike6ab6c492011-11-08 15:38:47 -08005package template
Mike Samuel4670d9e2011-09-09 07:18:20 +10006
7import (
8 "bytes"
9 "fmt"
Mike Samuel967d68c2011-09-23 09:25:10 -070010 "strings"
Rob Pike6ab6c492011-11-08 15:38:47 -080011 "unicode/utf8"
Mike Samuel4670d9e2011-09-09 07:18:20 +100012)
13
14// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
15func htmlNospaceEscaper(args ...interface{}) string {
Mike Samuelce008f82011-09-15 08:51:55 -070016 s, t := stringify(args...)
17 if t == contentTypeHTML {
18 return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
19 }
20 return htmlReplacer(s, htmlNospaceReplacementTable, false)
21}
Mike Samuel4670d9e2011-09-09 07:18:20 +100022
Mike Samuelce008f82011-09-15 08:51:55 -070023// attrEscaper escapes for inclusion in quoted attribute values.
24func attrEscaper(args ...interface{}) string {
25 s, t := stringify(args...)
26 if t == contentTypeHTML {
27 return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
28 }
29 return htmlReplacer(s, htmlReplacementTable, true)
30}
Mike Samuel4670d9e2011-09-09 07:18:20 +100031
Mike Samuelce008f82011-09-15 08:51:55 -070032// rcdataEscaper escapes for inclusion in an RCDATA element body.
33func rcdataEscaper(args ...interface{}) string {
34 s, t := stringify(args...)
35 if t == contentTypeHTML {
36 return htmlReplacer(s, htmlNormReplacementTable, true)
37 }
38 return htmlReplacer(s, htmlReplacementTable, true)
39}
40
41// htmlEscaper escapes for inclusion in HTML text.
42func htmlEscaper(args ...interface{}) string {
43 s, t := stringify(args...)
44 if t == contentTypeHTML {
45 return s
46 }
47 return htmlReplacer(s, htmlReplacementTable, true)
48}
49
50// htmlReplacementTable contains the runes that need to be escaped
51// inside a quoted attribute value or in a text node.
52var htmlReplacementTable = []string{
Shenghou Ma16b95502014-04-01 02:57:51 -040053 // http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
Mike Samuelce008f82011-09-15 08:51:55 -070054 // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
55 // CHARACTER character to the current attribute's value.
56 // "
57 // and similarly
Shenghou Ma16b95502014-04-01 02:57:51 -040058 // http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
Mike Samuelce008f82011-09-15 08:51:55 -070059 0: "\uFFFD",
60 '"': """,
61 '&': "&",
62 '\'': "'",
63 '+': "+",
64 '<': "&lt;",
65 '>': "&gt;",
66}
67
68// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
69// avoid over-encoding existing entities.
70var htmlNormReplacementTable = []string{
71 0: "\uFFFD",
72 '"': "&#34;",
73 '\'': "&#39;",
74 '+': "&#43;",
75 '<': "&lt;",
76 '>': "&gt;",
77}
78
79// htmlNospaceReplacementTable contains the runes that need to be escaped
80// inside an unquoted attribute value.
81// The set of runes escaped is the union of the HTML specials and
82// those determined by running the JS below in browsers:
83// <div id=d></div>
84// <script>(function () {
85// var a = [], d = document.getElementById("d"), i, c, s;
86// for (i = 0; i < 0x10000; ++i) {
87// c = String.fromCharCode(i);
88// d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
89// s = d.getElementsByTagName("SPAN")[0];
90// if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); }
91// }
92// document.write(a.join(", "));
93// })()</script>
94var htmlNospaceReplacementTable = []string{
95 0: "&#xfffd;",
96 '\t': "&#9;",
97 '\n': "&#10;",
98 '\v': "&#11;",
99 '\f': "&#12;",
100 '\r': "&#13;",
101 ' ': "&#32;",
102 '"': "&#34;",
103 '&': "&amp;",
104 '\'': "&#39;",
105 '+': "&#43;",
106 '<': "&lt;",
107 '=': "&#61;",
108 '>': "&gt;",
Robert Griesemer465b9c32012-10-30 13:38:01 -0700109 // A parse error in the attribute value (unquoted) and
Mike Samuelce008f82011-09-15 08:51:55 -0700110 // before attribute value states.
111 // Treated as a quoting character by IE.
112 '`': "&#96;",
113}
114
115// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
116// without '&' to avoid over-encoding existing entities.
117var htmlNospaceNormReplacementTable = []string{
118 0: "&#xfffd;",
119 '\t': "&#9;",
120 '\n': "&#10;",
121 '\v': "&#11;",
122 '\f': "&#12;",
123 '\r': "&#13;",
124 ' ': "&#32;",
125 '"': "&#34;",
126 '\'': "&#39;",
127 '+': "&#43;",
128 '<': "&lt;",
129 '=': "&#61;",
130 '>': "&gt;",
Robert Griesemer465b9c32012-10-30 13:38:01 -0700131 // A parse error in the attribute value (unquoted) and
Mike Samuelce008f82011-09-15 08:51:55 -0700132 // before attribute value states.
133 // Treated as a quoting character by IE.
134 '`': "&#96;",
135}
136
Scott Lawrence30f9c992012-03-05 10:58:43 +1100137// htmlReplacer returns s with runes replaced according to replacementTable
Mike Samuelce008f82011-09-15 08:51:55 -0700138// and when badRunes is true, certain bad runes are allowed through unescaped.
139func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
140 written, b := 0, new(bytes.Buffer)
Didier Speziaa1c1a762015-05-14 22:36:59 +0000141 r, w := rune(0), 0
142 for i := 0; i < len(s); i += w {
143 // Cannot use 'for range s' because we need to preserve the width
144 // of the runes in the input. If we see a decoding error, the input
145 // width will not be utf8.Runelen(r) and we will overrun the buffer.
146 r, w = utf8.DecodeRuneInString(s[i:])
Russ Cox49116222011-10-25 22:22:26 -0700147 if int(r) < len(replacementTable) {
Mike Samuelce008f82011-09-15 08:51:55 -0700148 if repl := replacementTable[r]; len(repl) != 0 {
Mike Samuel4670d9e2011-09-09 07:18:20 +1000149 b.WriteString(s[written:i])
Mike Samuelce008f82011-09-15 08:51:55 -0700150 b.WriteString(repl)
Didier Speziaa1c1a762015-05-14 22:36:59 +0000151 written = i + w
Mike Samuel4670d9e2011-09-09 07:18:20 +1000152 }
Mike Samuelce008f82011-09-15 08:51:55 -0700153 } else if badRunes {
154 // No-op.
155 // IE does not allow these ranges in unquoted attrs.
156 } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff {
157 fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
Didier Speziaa1c1a762015-05-14 22:36:59 +0000158 written = i + w
Mike Samuel4670d9e2011-09-09 07:18:20 +1000159 }
Mike Samuel4670d9e2011-09-09 07:18:20 +1000160 }
161 if written == 0 {
162 return s
163 }
164 b.WriteString(s[written:])
165 return b.String()
166}
Mike Samuelce008f82011-09-15 08:51:55 -0700167
168// stripTags takes a snippet of HTML and returns only the text content.
169// For example, `<b>&iexcl;Hi!</b> <script>...</script>` -> `&iexcl;Hi! `.
170func stripTags(html string) string {
171 var b bytes.Buffer
Mike Samuel1f577d22011-09-21 19:04:41 -0700172 s, c, i, allText := []byte(html), context{}, 0, true
Mike Samuelce008f82011-09-15 08:51:55 -0700173 // Using the transition funcs helps us avoid mangling
174 // `<div title="1>2">` or `I <3 Ponies!`.
Mike Samuel3a013f12011-09-19 20:52:14 -0700175 for i != len(s) {
Mike Samuelce008f82011-09-15 08:51:55 -0700176 if c.delim == delimNone {
Mike Samuel1f577d22011-09-21 19:04:41 -0700177 st := c.state
178 // Use RCDATA instead of parsing into JS or CSS styles.
179 if c.element != elementNone && !isInTag(st) {
180 st = stateRCDATA
181 }
182 d, nread := transitionFunc[st](c, s[i:])
Mike Samuel3a013f12011-09-19 20:52:14 -0700183 i1 := i + nread
Mike Samuelce008f82011-09-15 08:51:55 -0700184 if c.state == stateText || c.state == stateRCDATA {
Mike Samuelce008f82011-09-15 08:51:55 -0700185 // Emit text up to the start of the tag or comment.
Mike Samuel3a013f12011-09-19 20:52:14 -0700186 j := i1
Mike Samuelce008f82011-09-15 08:51:55 -0700187 if d.state != c.state {
Mike Samuel3a013f12011-09-19 20:52:14 -0700188 for j1 := j - 1; j1 >= i; j1-- {
189 if s[j1] == '<' {
190 j = j1
Mike Samuelce008f82011-09-15 08:51:55 -0700191 break
192 }
193 }
194 }
Mike Samuel3a013f12011-09-19 20:52:14 -0700195 b.Write(s[i:j])
Mike Samuel1f577d22011-09-21 19:04:41 -0700196 } else {
197 allText = false
Mike Samuelce008f82011-09-15 08:51:55 -0700198 }
Mike Samuel3a013f12011-09-19 20:52:14 -0700199 c, i = d, i1
Mike Samuelce008f82011-09-15 08:51:55 -0700200 continue
201 }
Mike Samuel3a013f12011-09-19 20:52:14 -0700202 i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
203 if i1 < i {
Mike Samuelce008f82011-09-15 08:51:55 -0700204 break
205 }
206 if c.delim != delimSpaceOrTagEnd {
207 // Consume any quote.
Mike Samuel3a013f12011-09-19 20:52:14 -0700208 i1++
Mike Samuelce008f82011-09-15 08:51:55 -0700209 }
Mike Samuel3a013f12011-09-19 20:52:14 -0700210 c, i = context{state: stateTag, element: c.element}, i1
Mike Samuelce008f82011-09-15 08:51:55 -0700211 }
Mike Samuel1f577d22011-09-21 19:04:41 -0700212 if allText {
213 return html
214 } else if c.state == stateText || c.state == stateRCDATA {
Mike Samuel3a013f12011-09-19 20:52:14 -0700215 b.Write(s[i:])
Mike Samuelce008f82011-09-15 08:51:55 -0700216 }
217 return b.String()
218}
Mike Samuelb4e1ca22011-09-18 19:10:15 -0700219
220// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
221// a known-safe HTML attribute.
222func htmlNameFilter(args ...interface{}) string {
223 s, t := stringify(args...)
224 if t == contentTypeHTMLAttr {
225 return s
226 }
Mike Samuel967d68c2011-09-23 09:25:10 -0700227 if len(s) == 0 {
228 // Avoid violation of structure preservation.
229 // <input checked {{.K}}={{.V}}>.
230 // Without this, if .K is empty then .V is the value of
231 // checked, but otherwise .V is the value of the attribute
232 // named .K.
233 return filterFailsafe
234 }
235 s = strings.ToLower(s)
Mike Samuelf17e3d22011-09-28 14:07:48 -0700236 if t := attrType(s); t != contentTypePlain {
Mike Samuel967d68c2011-09-23 09:25:10 -0700237 // TODO: Split attr and element name part filters so we can whitelist
238 // attributes.
239 return filterFailsafe
240 }
Mike Samuelb4e1ca22011-09-18 19:10:15 -0700241 for _, r := range s {
242 switch {
243 case '0' <= r && r <= '9':
Mike Samuelb4e1ca22011-09-18 19:10:15 -0700244 case 'a' <= r && r <= 'z':
245 default:
246 return filterFailsafe
247 }
248 }
249 return s
250}
Mike Samuel8bc5ef62011-09-19 19:52:31 -0700251
252// commentEscaper returns the empty string regardless of input.
253// Comment content does not correspond to any parsed structure or
254// human-readable content, so the simplest and most secure policy is to drop
255// content interpolated into comments.
256// This approach is equally valid whether or not static comment content is
257// removed from the template.
258func commentEscaper(args ...interface{}) string {
259 return ""
260}