html: only render content literally in the HTML namespace
Per the WHATWG HTML specification, section 13.3, only append the literal
content of a text node if we are in the HTML namespace.
Thanks to Mohammad Thoriq Aziz for reporting this issue.
Fixes golang/go#61615
Fixes CVE-2023-3978
Change-Id: I332152904d4e7646bd2441602bcbe591fc655fa4
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1942896
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-on: https://go-review.googlesource.com/c/net/+/514896
Reviewed-by: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
diff --git a/html/render.go b/html/render.go
index 8b28031..e8c1233 100644
--- a/html/render.go
+++ b/html/render.go
@@ -194,9 +194,8 @@
}
}
- // Render any child nodes.
- switch n.Data {
- case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+ // Render any child nodes
+ if childTextNodesAreLiteral(n) {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
@@ -213,7 +212,7 @@
// last element in the file, with no closing tag.
return plaintextAbort
}
- default:
+ } else {
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
@@ -231,6 +230,27 @@
return w.WriteByte('>')
}
+func childTextNodesAreLiteral(n *Node) bool {
+ // Per WHATWG HTML 13.3, if the parent of the current node is a style,
+ // script, xmp, iframe, noembed, noframes, or plaintext element, and the
+ // current node is a text node, append the value of the node's data
+ // literally. The specification is not explicit about it, but we only
+ // enforce this if we are in the HTML namespace (i.e. when the namespace is
+ // "").
+ // NOTE: we also always include noscript elements, although the
+ // specification states that they should only be rendered as such if
+ // scripting is enabled for the node (which is not something we track).
+ if n.Namespace != "" {
+ return false
+ }
+ switch n.Data {
+ case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+ return true
+ default:
+ return false
+ }
+}
+
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
diff --git a/html/render_test.go b/html/render_test.go
index 08e592b..22d0864 100644
--- a/html/render_test.go
+++ b/html/render_test.go
@@ -6,6 +6,8 @@
import (
"bytes"
+ "fmt"
+ "strings"
"testing"
)
@@ -108,16 +110,16 @@
// just commentary. The "0:" prefixes are for easy cross-reference with
// the nodes array.
treeAsText := [...]string{
- 0: `<html>`,
- 1: `. <head>`,
- 2: `. <body>`,
- 3: `. . "0<1"`,
- 4: `. . <p id="A" foo="abc"def">`,
- 5: `. . . "2"`,
- 6: `. . . <b empty="">`,
- 7: `. . . . "3"`,
- 8: `. . . <i backslash="\">`,
- 9: `. . . . "&4"`,
+ 0: `<html>`,
+ 1: `. <head>`,
+ 2: `. <body>`,
+ 3: `. . "0<1"`,
+ 4: `. . <p id="A" foo="abc"def">`,
+ 5: `. . . "2"`,
+ 6: `. . . <b empty="">`,
+ 7: `. . . . "3"`,
+ 8: `. . . <i backslash="\">`,
+ 9: `. . . . "&4"`,
10: `. . "5"`,
11: `. . <blockquote>`,
12: `. . <br>`,
@@ -169,3 +171,37 @@
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}
+
+func TestRenderTextNodes(t *testing.T) {
+ elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
+ for _, namespace := range []string{
+ "", // html
+ "svg",
+ "math",
+ } {
+ for _, e := range elements {
+ var namespaceOpen, namespaceClose string
+ if namespace != "" {
+ namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
+ }
+ doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
+ n, err := Parse(strings.NewReader(doc))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := bytes.NewBuffer(nil)
+ if err := Render(b, n); err != nil {
+ t.Fatal(err)
+ }
+
+ expected := doc
+ if namespace != "" {
+ expected = strings.Replace(expected, "&", "&", 1)
+ }
+
+ if b.String() != expected {
+ t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
+ }
+ }
+ }
+}