html: only render content literally in the HTML namespace

Per the WHATWG HTML specification, section 13.3, only append the literal
content of a text node if we are in the HTML namespace.

Thanks to Mohammad Thoriq Aziz for reporting this issue.

Fixes golang/go#61615
Fixes CVE-2023-3978

Change-Id: I332152904d4e7646bd2441602bcbe591fc655fa4
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1942896
Reviewed-by: Tatiana Bradley <tatianabradley@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-on: https://go-review.googlesource.com/c/net/+/514896
Reviewed-by: Roland Shoemaker <roland@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
diff --git a/html/render.go b/html/render.go
index 8b28031..e8c1233 100644
--- a/html/render.go
+++ b/html/render.go
@@ -194,9 +194,8 @@
 		}
 	}
 
-	// Render any child nodes.
-	switch n.Data {
-	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+	// Render any child nodes
+	if childTextNodesAreLiteral(n) {
 		for c := n.FirstChild; c != nil; c = c.NextSibling {
 			if c.Type == TextNode {
 				if _, err := w.WriteString(c.Data); err != nil {
@@ -213,7 +212,7 @@
 			// last element in the file, with no closing tag.
 			return plaintextAbort
 		}
-	default:
+	} else {
 		for c := n.FirstChild; c != nil; c = c.NextSibling {
 			if err := render1(w, c); err != nil {
 				return err
@@ -231,6 +230,27 @@
 	return w.WriteByte('>')
 }
 
+func childTextNodesAreLiteral(n *Node) bool {
+	// Per WHATWG HTML 13.3, if the parent of the current node is a style,
+	// script, xmp, iframe, noembed, noframes, or plaintext element, and the
+	// current node is a text node, append the value of the node's data
+	// literally. The specification is not explicit about it, but we only
+	// enforce this if we are in the HTML namespace (i.e. when the namespace is
+	// "").
+	// NOTE: we also always include noscript elements, although the
+	// specification states that they should only be rendered as such if
+	// scripting is enabled for the node (which is not something we track).
+	if n.Namespace != "" {
+		return false
+	}
+	switch n.Data {
+	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
+		return true
+	default:
+		return false
+	}
+}
+
 // writeQuoted writes s to w surrounded by quotes. Normally it will use double
 // quotes, but if s contains a double quote, it will use single quotes.
 // It is used for writing the identifiers in a doctype declaration.
diff --git a/html/render_test.go b/html/render_test.go
index 08e592b..22d0864 100644
--- a/html/render_test.go
+++ b/html/render_test.go
@@ -6,6 +6,8 @@
 
 import (
 	"bytes"
+	"fmt"
+	"strings"
 	"testing"
 )
 
@@ -108,16 +110,16 @@
 	// just commentary. The "0:" prefixes are for easy cross-reference with
 	// the nodes array.
 	treeAsText := [...]string{
-		0: `<html>`,
-		1: `.	<head>`,
-		2: `.	<body>`,
-		3: `.	.	"0&lt;1"`,
-		4: `.	.	<p id="A" foo="abc&#34;def">`,
-		5: `.	.	.	"2"`,
-		6: `.	.	.	<b empty="">`,
-		7: `.	.	.	.	"3"`,
-		8: `.	.	.	<i backslash="\">`,
-		9: `.	.	.	.	"&amp;4"`,
+		0:  `<html>`,
+		1:  `.	<head>`,
+		2:  `.	<body>`,
+		3:  `.	.	"0&lt;1"`,
+		4:  `.	.	<p id="A" foo="abc&#34;def">`,
+		5:  `.	.	.	"2"`,
+		6:  `.	.	.	<b empty="">`,
+		7:  `.	.	.	.	"3"`,
+		8:  `.	.	.	<i backslash="\">`,
+		9:  `.	.	.	.	"&amp;4"`,
 		10: `.	.	"5"`,
 		11: `.	.	<blockquote>`,
 		12: `.	.	<br>`,
@@ -169,3 +171,37 @@
 		t.Errorf("got vs want:\n%s\n%s\n", got, want)
 	}
 }
+
+func TestRenderTextNodes(t *testing.T) {
+	elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
+	for _, namespace := range []string{
+		"", // html
+		"svg",
+		"math",
+	} {
+		for _, e := range elements {
+			var namespaceOpen, namespaceClose string
+			if namespace != "" {
+				namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
+			}
+			doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
+			n, err := Parse(strings.NewReader(doc))
+			if err != nil {
+				t.Fatal(err)
+			}
+			b := bytes.NewBuffer(nil)
+			if err := Render(b, n); err != nil {
+				t.Fatal(err)
+			}
+
+			expected := doc
+			if namespace != "" {
+				expected = strings.Replace(expected, "&", "&amp;", 1)
+			}
+
+			if b.String() != expected {
+				t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
+			}
+		}
+	}
+}