Blame - src/html/template/html.go - go

blob: de4aa4abb26efcc415adc45ddc0f2312aedb09ac [file] [log] [blame]

Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	1	// Copyright 2011 The Go Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style
				3	// license that can be found in the LICENSE file.
				4
Rob Pike	6ab6c49	2011-11-08 15:38:47 -0800	[diff] [blame]	5	package template
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	6
				7	import (
				8	"bytes"
				9	"fmt"
Mike Samuel	967d68c	2011-09-23 09:25:10 -0700	[diff] [blame]	10	"strings"
Rob Pike	6ab6c49	2011-11-08 15:38:47 -0800	[diff] [blame]	11	"unicode/utf8"
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	12	)
				13
				14	// htmlNospaceEscaper escapes for inclusion in unquoted attribute values.
				15	func htmlNospaceEscaper(args ...interface{}) string {
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	16	s, t := stringify(args...)
				17	if t == contentTypeHTML {
				18	return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false)
				19	}
				20	return htmlReplacer(s, htmlNospaceReplacementTable, false)
				21	}
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	22
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	23	// attrEscaper escapes for inclusion in quoted attribute values.
				24	func attrEscaper(args ...interface{}) string {
				25	s, t := stringify(args...)
				26	if t == contentTypeHTML {
				27	return htmlReplacer(stripTags(s), htmlNormReplacementTable, true)
				28	}
				29	return htmlReplacer(s, htmlReplacementTable, true)
				30	}
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	31
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	32	// rcdataEscaper escapes for inclusion in an RCDATA element body.
				33	func rcdataEscaper(args ...interface{}) string {
				34	s, t := stringify(args...)
				35	if t == contentTypeHTML {
				36	return htmlReplacer(s, htmlNormReplacementTable, true)
				37	}
				38	return htmlReplacer(s, htmlReplacementTable, true)
				39	}
				40
				41	// htmlEscaper escapes for inclusion in HTML text.
				42	func htmlEscaper(args ...interface{}) string {
				43	s, t := stringify(args...)
				44	if t == contentTypeHTML {
				45	return s
				46	}
				47	return htmlReplacer(s, htmlReplacementTable, true)
				48	}
				49
				50	// htmlReplacementTable contains the runes that need to be escaped
				51	// inside a quoted attribute value or in a text node.
				52	var htmlReplacementTable = []string{
Shenghou Ma	16b9550	2014-04-01 02:57:51 -0400	[diff] [blame]	53	// http://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	54	// U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT
				55	// CHARACTER character to the current attribute's value.
				56	// "
				57	// and similarly
Shenghou Ma	16b9550	2014-04-01 02:57:51 -0400	[diff] [blame]	58	// http://www.w3.org/TR/html5/syntax.html#before-attribute-value-state
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	59	0: "\uFFFD",
				60	'"': """,
				61	'&': "&",
				62	'\'': "'",
				63	'+': "+",
				64	'<': "<",
				65	'>': ">",
				66	}
				67
				68	// htmlNormReplacementTable is like htmlReplacementTable but without '&' to
				69	// avoid over-encoding existing entities.
				70	var htmlNormReplacementTable = []string{
				71	0: "\uFFFD",
				72	'"': """,
				73	'\'': "'",
				74	'+': "+",
				75	'<': "<",
				76	'>': ">",
				77	}
				78
				79	// htmlNospaceReplacementTable contains the runes that need to be escaped
				80	// inside an unquoted attribute value.
				81	// The set of runes escaped is the union of the HTML specials and
				82	// those determined by running the JS below in browsers:
				83	// <div id=d></div>
				84	// <script>(function () {
				85	// var a = [], d = document.getElementById("d"), i, c, s;
				86	// for (i = 0; i < 0x10000; ++i) {
				87	// c = String.fromCharCode(i);
				88	// d.innerHTML = "<span title=" + c + "lt" + c + "></span>"
				89	// s = d.getElementsByTagName("SPAN")[0];
				90	// if (!s \|\| s.title !== c + "lt" + c) { a.push(i.toString(16)); }
				91	// }
				92	// document.write(a.join(", "));
				93	// })()</script>
				94	var htmlNospaceReplacementTable = []string{
				95	0: "�",
				96	'\t': " ",
				97	'\n': " ",
				98	'\v': "",
				99	'\f': "",
				100	'\r': " ",
				101	' ': " ",
				102	'"': """,
				103	'&': "&",
				104	'\'': "'",
				105	'+': "+",
				106	'<': "<",
				107	'=': "=",
				108	'>': ">",
Robert Griesemer	465b9c3	2012-10-30 13:38:01 -0700	[diff] [blame]	109	// A parse error in the attribute value (unquoted) and
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	110	// before attribute value states.
				111	// Treated as a quoting character by IE.
				112	'`': "`",
				113	}
				114
				115	// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but
				116	// without '&' to avoid over-encoding existing entities.
				117	var htmlNospaceNormReplacementTable = []string{
				118	0: "�",
				119	'\t': " ",
				120	'\n': " ",
				121	'\v': "",
				122	'\f': "",
				123	'\r': " ",
				124	' ': " ",
				125	'"': """,
				126	'\'': "'",
				127	'+': "+",
				128	'<': "<",
				129	'=': "=",
				130	'>': ">",
Robert Griesemer	465b9c3	2012-10-30 13:38:01 -0700	[diff] [blame]	131	// A parse error in the attribute value (unquoted) and
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	132	// before attribute value states.
				133	// Treated as a quoting character by IE.
				134	'`': "`",
				135	}
				136
Scott Lawrence	30f9c99	2012-03-05 10:58:43 +1100	[diff] [blame]	137	// htmlReplacer returns s with runes replaced according to replacementTable
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	138	// and when badRunes is true, certain bad runes are allowed through unescaped.
				139	func htmlReplacer(s string, replacementTable []string, badRunes bool) string {
				140	written, b := 0, new(bytes.Buffer)
Didier Spezia	a1c1a76	2015-05-14 22:36:59 +0000	[diff] [blame]	141	r, w := rune(0), 0
				142	for i := 0; i < len(s); i += w {
				143	// Cannot use 'for range s' because we need to preserve the width
				144	// of the runes in the input. If we see a decoding error, the input
				145	// width will not be utf8.Runelen(r) and we will overrun the buffer.
				146	r, w = utf8.DecodeRuneInString(s[i:])
Russ Cox	4911622	2011-10-25 22:22:26 -0700	[diff] [blame]	147	if int(r) < len(replacementTable) {
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	148	if repl := replacementTable[r]; len(repl) != 0 {
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	149	b.WriteString(s[written:i])
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	150	b.WriteString(repl)
Didier Spezia	a1c1a76	2015-05-14 22:36:59 +0000	[diff] [blame]	151	written = i + w
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	152	}
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	153	} else if badRunes {
				154	// No-op.
				155	// IE does not allow these ranges in unquoted attrs.
				156	} else if 0xfdd0 <= r && r <= 0xfdef \|\| 0xfff0 <= r && r <= 0xffff {
				157	fmt.Fprintf(b, "%s&#x%x;", s[written:i], r)
Didier Spezia	a1c1a76	2015-05-14 22:36:59 +0000	[diff] [blame]	158	written = i + w
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	159	}
Mike Samuel	4670d9e	2011-09-09 07:18:20 +1000	[diff] [blame]	160	}
				161	if written == 0 {
				162	return s
				163	}
				164	b.WriteString(s[written:])
				165	return b.String()
				166	}
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	167
				168	// stripTags takes a snippet of HTML and returns only the text content.
				169	// For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `.
				170	func stripTags(html string) string {
				171	var b bytes.Buffer
Mike Samuel	1f577d2	2011-09-21 19:04:41 -0700	[diff] [blame]	172	s, c, i, allText := []byte(html), context{}, 0, true
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	173	// Using the transition funcs helps us avoid mangling
				174	// `<div title="1>2">` or `I <3 Ponies!`.
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	175	for i != len(s) {
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	176	if c.delim == delimNone {
Mike Samuel	1f577d2	2011-09-21 19:04:41 -0700	[diff] [blame]	177	st := c.state
				178	// Use RCDATA instead of parsing into JS or CSS styles.
				179	if c.element != elementNone && !isInTag(st) {
				180	st = stateRCDATA
				181	}
				182	d, nread := transitionFunc[st](c, s[i:])
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	183	i1 := i + nread
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	184	if c.state == stateText \|\| c.state == stateRCDATA {
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	185	// Emit text up to the start of the tag or comment.
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	186	j := i1
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	187	if d.state != c.state {
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	188	for j1 := j - 1; j1 >= i; j1-- {
				189	if s[j1] == '<' {
				190	j = j1
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	191	break
				192	}
				193	}
				194	}
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	195	b.Write(s[i:j])
Mike Samuel	1f577d2	2011-09-21 19:04:41 -0700	[diff] [blame]	196	} else {
				197	allText = false
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	198	}
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	199	c, i = d, i1
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	200	continue
				201	}
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	202	i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim])
				203	if i1 < i {
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	204	break
				205	}
				206	if c.delim != delimSpaceOrTagEnd {
				207	// Consume any quote.
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	208	i1++
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	209	}
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	210	c, i = context{state: stateTag, element: c.element}, i1
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	211	}
Mike Samuel	1f577d2	2011-09-21 19:04:41 -0700	[diff] [blame]	212	if allText {
				213	return html
				214	} else if c.state == stateText \|\| c.state == stateRCDATA {
Mike Samuel	3a013f1	2011-09-19 20:52:14 -0700	[diff] [blame]	215	b.Write(s[i:])
Mike Samuel	ce008f8	2011-09-15 08:51:55 -0700	[diff] [blame]	216	}
				217	return b.String()
				218	}
Mike Samuel	b4e1ca2	2011-09-18 19:10:15 -0700	[diff] [blame]	219
				220	// htmlNameFilter accepts valid parts of an HTML attribute or tag name or
				221	// a known-safe HTML attribute.
				222	func htmlNameFilter(args ...interface{}) string {
				223	s, t := stringify(args...)
				224	if t == contentTypeHTMLAttr {
				225	return s
				226	}
Mike Samuel	967d68c	2011-09-23 09:25:10 -0700	[diff] [blame]	227	if len(s) == 0 {
				228	// Avoid violation of structure preservation.
				229	// <input checked {{.K}}={{.V}}>.
				230	// Without this, if .K is empty then .V is the value of
				231	// checked, but otherwise .V is the value of the attribute
				232	// named .K.
				233	return filterFailsafe
				234	}
				235	s = strings.ToLower(s)
Mike Samuel	f17e3d2	2011-09-28 14:07:48 -0700	[diff] [blame]	236	if t := attrType(s); t != contentTypePlain {
Mike Samuel	967d68c	2011-09-23 09:25:10 -0700	[diff] [blame]	237	// TODO: Split attr and element name part filters so we can whitelist
				238	// attributes.
				239	return filterFailsafe
				240	}
Mike Samuel	b4e1ca2	2011-09-18 19:10:15 -0700	[diff] [blame]	241	for _, r := range s {
				242	switch {
				243	case '0' <= r && r <= '9':
Mike Samuel	b4e1ca2	2011-09-18 19:10:15 -0700	[diff] [blame]	244	case 'a' <= r && r <= 'z':
				245	default:
				246	return filterFailsafe
				247	}
				248	}
				249	return s
				250	}
Mike Samuel	8bc5ef6	2011-09-19 19:52:31 -0700	[diff] [blame]	251
				252	// commentEscaper returns the empty string regardless of input.
				253	// Comment content does not correspond to any parsed structure or
				254	// human-readable content, so the simplest and most secure policy is to drop
				255	// content interpolated into comments.
				256	// This approach is equally valid whether or not static comment content is
				257	// removed from the template.
				258	func commentEscaper(args ...interface{}) string {
				259	return ""
				260	}