| // Copyright 2011 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package html |
| |
| import ( |
| "bufio" |
| "errors" |
| "fmt" |
| "io" |
| "strings" |
| ) |
| |
| type writer interface { |
| io.Writer |
| io.ByteWriter |
| WriteString(string) (int, error) |
| } |
| |
| // Render renders the parse tree n to the given writer. |
| // |
| // Rendering is done on a 'best effort' basis: calling Parse on the output of |
| // Render will always result in something similar to the original tree, but it |
| // is not necessarily an exact clone unless the original tree was 'well-formed'. |
| // 'Well-formed' is not easily specified; the HTML5 specification is |
| // complicated. |
| // |
| // Calling Parse on arbitrary input typically results in a 'well-formed' parse |
| // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. |
| // For example, in a 'well-formed' parse tree, no <a> element is a child of |
| // another <a> element: parsing "<a><a>" results in two sibling elements. |
| // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a |
| // <table> element: parsing "<p><table><a>" results in a <p> with two sibling |
| // children; the <a> is reparented to the <table>'s parent. However, calling |
| // Parse on "<a><table><a>" does not return an error, but the result has an <a> |
| // element with an <a> child, and is therefore not 'well-formed'. |
| // |
| // Programmatically constructed trees are typically also 'well-formed', but it |
| // is possible to construct a tree that looks innocuous but, when rendered and |
| // re-parsed, results in a different tree. A simple example is that a solitary |
| // text node would become a tree containing <html>, <head> and <body> elements. |
| // Another example is that the programmatic equivalent of "a<head>b</head>c" |
| // becomes "<html><head><head/><body>abc</body></html>". |
| func Render(w io.Writer, n *Node) error { |
| if x, ok := w.(writer); ok { |
| return render(x, n) |
| } |
| buf := bufio.NewWriter(w) |
| if err := render(buf, n); err != nil { |
| return err |
| } |
| return buf.Flush() |
| } |
| |
| // plaintextAbort is returned from render1 when a <plaintext> element |
| // has been rendered. No more end tags should be rendered after that. |
| var plaintextAbort = errors.New("html: internal error (plaintext abort)") |
| |
| func render(w writer, n *Node) error { |
| err := render1(w, n) |
| if err == plaintextAbort { |
| err = nil |
| } |
| return err |
| } |
| |
| func render1(w writer, n *Node) error { |
| // Render non-element nodes; these are the easy cases. |
| switch n.Type { |
| case ErrorNode: |
| return errors.New("html: cannot render an ErrorNode node") |
| case TextNode: |
| return escape(w, n.Data) |
| case DocumentNode: |
| for c := n.FirstChild; c != nil; c = c.NextSibling { |
| if err := render1(w, c); err != nil { |
| return err |
| } |
| } |
| return nil |
| case ElementNode: |
| // No-op. |
| case CommentNode: |
| if _, err := w.WriteString("<!--"); err != nil { |
| return err |
| } |
| if err := escapeComment(w, n.Data); err != nil { |
| return err |
| } |
| if _, err := w.WriteString("-->"); err != nil { |
| return err |
| } |
| return nil |
| case DoctypeNode: |
| if _, err := w.WriteString("<!DOCTYPE "); err != nil { |
| return err |
| } |
| if err := escape(w, n.Data); err != nil { |
| return err |
| } |
| if n.Attr != nil { |
| var p, s string |
| for _, a := range n.Attr { |
| switch a.Key { |
| case "public": |
| p = a.Val |
| case "system": |
| s = a.Val |
| } |
| } |
| if p != "" { |
| if _, err := w.WriteString(" PUBLIC "); err != nil { |
| return err |
| } |
| if err := writeQuoted(w, p); err != nil { |
| return err |
| } |
| if s != "" { |
| if err := w.WriteByte(' '); err != nil { |
| return err |
| } |
| if err := writeQuoted(w, s); err != nil { |
| return err |
| } |
| } |
| } else if s != "" { |
| if _, err := w.WriteString(" SYSTEM "); err != nil { |
| return err |
| } |
| if err := writeQuoted(w, s); err != nil { |
| return err |
| } |
| } |
| } |
| return w.WriteByte('>') |
| case RawNode: |
| _, err := w.WriteString(n.Data) |
| return err |
| default: |
| return errors.New("html: unknown node type") |
| } |
| |
| // Render the <xxx> opening tag. |
| if err := w.WriteByte('<'); err != nil { |
| return err |
| } |
| if _, err := w.WriteString(n.Data); err != nil { |
| return err |
| } |
| for _, a := range n.Attr { |
| if err := w.WriteByte(' '); err != nil { |
| return err |
| } |
| if a.Namespace != "" { |
| if _, err := w.WriteString(a.Namespace); err != nil { |
| return err |
| } |
| if err := w.WriteByte(':'); err != nil { |
| return err |
| } |
| } |
| if _, err := w.WriteString(a.Key); err != nil { |
| return err |
| } |
| if _, err := w.WriteString(`="`); err != nil { |
| return err |
| } |
| if err := escape(w, a.Val); err != nil { |
| return err |
| } |
| if err := w.WriteByte('"'); err != nil { |
| return err |
| } |
| } |
| if voidElements[n.Data] { |
| if n.FirstChild != nil { |
| return fmt.Errorf("html: void element <%s> has child nodes", n.Data) |
| } |
| _, err := w.WriteString("/>") |
| return err |
| } |
| if err := w.WriteByte('>'); err != nil { |
| return err |
| } |
| |
| // Add initial newline where there is danger of a newline beging ignored. |
| if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { |
| switch n.Data { |
| case "pre", "listing", "textarea": |
| if err := w.WriteByte('\n'); err != nil { |
| return err |
| } |
| } |
| } |
| |
| // Render any child nodes |
| if childTextNodesAreLiteral(n) { |
| for c := n.FirstChild; c != nil; c = c.NextSibling { |
| if c.Type == TextNode { |
| if _, err := w.WriteString(c.Data); err != nil { |
| return err |
| } |
| } else { |
| if err := render1(w, c); err != nil { |
| return err |
| } |
| } |
| } |
| if n.Data == "plaintext" { |
| // Don't render anything else. <plaintext> must be the |
| // last element in the file, with no closing tag. |
| return plaintextAbort |
| } |
| } else { |
| for c := n.FirstChild; c != nil; c = c.NextSibling { |
| if err := render1(w, c); err != nil { |
| return err |
| } |
| } |
| } |
| |
| // Render the </xxx> closing tag. |
| if _, err := w.WriteString("</"); err != nil { |
| return err |
| } |
| if _, err := w.WriteString(n.Data); err != nil { |
| return err |
| } |
| return w.WriteByte('>') |
| } |
| |
| func childTextNodesAreLiteral(n *Node) bool { |
| // Per WHATWG HTML 13.3, if the parent of the current node is a style, |
| // script, xmp, iframe, noembed, noframes, or plaintext element, and the |
| // current node is a text node, append the value of the node's data |
| // literally. The specification is not explicit about it, but we only |
| // enforce this if we are in the HTML namespace (i.e. when the namespace is |
| // ""). |
| // NOTE: we also always include noscript elements, although the |
| // specification states that they should only be rendered as such if |
| // scripting is enabled for the node (which is not something we track). |
| if n.Namespace != "" { |
| return false |
| } |
| switch n.Data { |
| case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": |
| return true |
| default: |
| return false |
| } |
| } |
| |
| // writeQuoted writes s to w surrounded by quotes. Normally it will use double |
| // quotes, but if s contains a double quote, it will use single quotes. |
| // It is used for writing the identifiers in a doctype declaration. |
| // In valid HTML, they can't contain both types of quotes. |
| func writeQuoted(w writer, s string) error { |
| var q byte = '"' |
| if strings.Contains(s, `"`) { |
| q = '\'' |
| } |
| if err := w.WriteByte(q); err != nil { |
| return err |
| } |
| if _, err := w.WriteString(s); err != nil { |
| return err |
| } |
| if err := w.WriteByte(q); err != nil { |
| return err |
| } |
| return nil |
| } |
| |
| // Section 12.1.2, "Elements", gives this list of void elements. Void elements |
| // are those that can't have any contents. |
| var voidElements = map[string]bool{ |
| "area": true, |
| "base": true, |
| "br": true, |
| "col": true, |
| "embed": true, |
| "hr": true, |
| "img": true, |
| "input": true, |
| "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility. |
| "link": true, |
| "meta": true, |
| "param": true, |
| "source": true, |
| "track": true, |
| "wbr": true, |
| } |