blob: 497e132042b01fa0eb6062c7e8d3faf3478664d3 [file] [log] [blame]
Nigel Taoea127e82013-02-11 11:55:20 +11001// Copyright 2011 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package html
6
7import (
8 "bufio"
9 "errors"
10 "fmt"
11 "io"
12 "strings"
13)
14
15type writer interface {
16 io.Writer
Dmitri Shuralyovedab5dc2015-06-27 16:16:30 -070017 io.ByteWriter
Nigel Taoea127e82013-02-11 11:55:20 +110018 WriteString(string) (int, error)
19}
20
21// Render renders the parse tree n to the given writer.
22//
23// Rendering is done on a 'best effort' basis: calling Parse on the output of
24// Render will always result in something similar to the original tree, but it
25// is not necessarily an exact clone unless the original tree was 'well-formed'.
26// 'Well-formed' is not easily specified; the HTML5 specification is
27// complicated.
28//
29// Calling Parse on arbitrary input typically results in a 'well-formed' parse
30// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
31// For example, in a 'well-formed' parse tree, no <a> element is a child of
32// another <a> element: parsing "<a><a>" results in two sibling elements.
33// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
34// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
35// children; the <a> is reparented to the <table>'s parent. However, calling
36// Parse on "<a><table><a>" does not return an error, but the result has an <a>
37// element with an <a> child, and is therefore not 'well-formed'.
38//
39// Programmatically constructed trees are typically also 'well-formed', but it
40// is possible to construct a tree that looks innocuous but, when rendered and
41// re-parsed, results in a different tree. A simple example is that a solitary
42// text node would become a tree containing <html>, <head> and <body> elements.
43// Another example is that the programmatic equivalent of "a<head>b</head>c"
44// becomes "<html><head><head/><body>abc</body></html>".
45func Render(w io.Writer, n *Node) error {
46 if x, ok := w.(writer); ok {
47 return render(x, n)
48 }
49 buf := bufio.NewWriter(w)
50 if err := render(buf, n); err != nil {
51 return err
52 }
53 return buf.Flush()
54}
55
56// plaintextAbort is returned from render1 when a <plaintext> element
57// has been rendered. No more end tags should be rendered after that.
58var plaintextAbort = errors.New("html: internal error (plaintext abort)")
59
60func render(w writer, n *Node) error {
61 err := render1(w, n)
62 if err == plaintextAbort {
63 err = nil
64 }
65 return err
66}
67
68func render1(w writer, n *Node) error {
69 // Render non-element nodes; these are the easy cases.
70 switch n.Type {
71 case ErrorNode:
72 return errors.New("html: cannot render an ErrorNode node")
73 case TextNode:
74 return escape(w, n.Data)
75 case DocumentNode:
76 for c := n.FirstChild; c != nil; c = c.NextSibling {
77 if err := render1(w, c); err != nil {
78 return err
79 }
80 }
81 return nil
82 case ElementNode:
83 // No-op.
84 case CommentNode:
85 if _, err := w.WriteString("<!--"); err != nil {
86 return err
87 }
Nigel Tao06994582022-07-24 21:17:08 +100088 if err := escape(w, n.Data); err != nil {
Nigel Taoea127e82013-02-11 11:55:20 +110089 return err
90 }
91 if _, err := w.WriteString("-->"); err != nil {
92 return err
93 }
94 return nil
95 case DoctypeNode:
96 if _, err := w.WriteString("<!DOCTYPE "); err != nil {
97 return err
98 }
Nigel Tao06994582022-07-24 21:17:08 +100099 if err := escape(w, n.Data); err != nil {
Nigel Taoea127e82013-02-11 11:55:20 +1100100 return err
101 }
102 if n.Attr != nil {
103 var p, s string
104 for _, a := range n.Attr {
105 switch a.Key {
106 case "public":
107 p = a.Val
108 case "system":
109 s = a.Val
110 }
111 }
112 if p != "" {
113 if _, err := w.WriteString(" PUBLIC "); err != nil {
114 return err
115 }
116 if err := writeQuoted(w, p); err != nil {
117 return err
118 }
119 if s != "" {
120 if err := w.WriteByte(' '); err != nil {
121 return err
122 }
123 if err := writeQuoted(w, s); err != nil {
124 return err
125 }
126 }
127 } else if s != "" {
128 if _, err := w.WriteString(" SYSTEM "); err != nil {
129 return err
130 }
131 if err := writeQuoted(w, s); err != nil {
132 return err
133 }
134 }
135 }
136 return w.WriteByte('>')
Nigel Tao16171242020-01-30 21:58:50 +1100137 case RawNode:
138 _, err := w.WriteString(n.Data)
139 return err
Nigel Taoea127e82013-02-11 11:55:20 +1100140 default:
141 return errors.New("html: unknown node type")
142 }
143
144 // Render the <xxx> opening tag.
145 if err := w.WriteByte('<'); err != nil {
146 return err
147 }
148 if _, err := w.WriteString(n.Data); err != nil {
149 return err
150 }
151 for _, a := range n.Attr {
152 if err := w.WriteByte(' '); err != nil {
153 return err
154 }
155 if a.Namespace != "" {
156 if _, err := w.WriteString(a.Namespace); err != nil {
157 return err
158 }
159 if err := w.WriteByte(':'); err != nil {
160 return err
161 }
162 }
163 if _, err := w.WriteString(a.Key); err != nil {
164 return err
165 }
166 if _, err := w.WriteString(`="`); err != nil {
167 return err
168 }
169 if err := escape(w, a.Val); err != nil {
170 return err
171 }
172 if err := w.WriteByte('"'); err != nil {
173 return err
174 }
175 }
176 if voidElements[n.Data] {
177 if n.FirstChild != nil {
178 return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
179 }
180 _, err := w.WriteString("/>")
181 return err
182 }
183 if err := w.WriteByte('>'); err != nil {
184 return err
185 }
186
187 // Add initial newline where there is danger of a newline beging ignored.
188 if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
189 switch n.Data {
190 case "pre", "listing", "textarea":
191 if err := w.WriteByte('\n'); err != nil {
192 return err
193 }
194 }
195 }
196
197 // Render any child nodes.
198 switch n.Data {
199 case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
200 for c := n.FirstChild; c != nil; c = c.NextSibling {
201 if c.Type == TextNode {
202 if _, err := w.WriteString(c.Data); err != nil {
203 return err
204 }
205 } else {
206 if err := render1(w, c); err != nil {
207 return err
208 }
209 }
210 }
211 if n.Data == "plaintext" {
212 // Don't render anything else. <plaintext> must be the
213 // last element in the file, with no closing tag.
214 return plaintextAbort
215 }
216 default:
217 for c := n.FirstChild; c != nil; c = c.NextSibling {
218 if err := render1(w, c); err != nil {
219 return err
220 }
221 }
222 }
223
224 // Render the </xxx> closing tag.
225 if _, err := w.WriteString("</"); err != nil {
226 return err
227 }
228 if _, err := w.WriteString(n.Data); err != nil {
229 return err
230 }
231 return w.WriteByte('>')
232}
233
234// writeQuoted writes s to w surrounded by quotes. Normally it will use double
235// quotes, but if s contains a double quote, it will use single quotes.
236// It is used for writing the identifiers in a doctype declaration.
237// In valid HTML, they can't contain both types of quotes.
238func writeQuoted(w writer, s string) error {
239 var q byte = '"'
240 if strings.Contains(s, `"`) {
241 q = '\''
242 }
243 if err := w.WriteByte(q); err != nil {
244 return err
245 }
246 if _, err := w.WriteString(s); err != nil {
247 return err
248 }
249 if err := w.WriteByte(q); err != nil {
250 return err
251 }
252 return nil
253}
254
255// Section 12.1.2, "Elements", gives this list of void elements. Void elements
256// are those that can't have any contents.
257var voidElements = map[string]bool{
Kunpei Sakaiafd1edf2019-11-26 23:07:40 +0900258 "area": true,
259 "base": true,
260 "br": true,
261 "col": true,
262 "embed": true,
263 "hr": true,
264 "img": true,
265 "input": true,
Kunpei Sakai4f7140c2020-10-10 12:40:50 +0900266 "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
Kunpei Sakaiafd1edf2019-11-26 23:07:40 +0900267 "link": true,
268 "meta": true,
269 "param": true,
270 "source": true,
271 "track": true,
272 "wbr": true,
Nigel Taoea127e82013-02-11 11:55:20 +1100273}