html: ignore templates nested within foreign content

Fixes #46288
Fixes CVE-2021-33194

Change-Id: I2fe39702de8e9aab29965c1526e377a6f9cdf056
Reviewed-on: https://go-review.googlesource.com/c/net/+/311090
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Run-TryBot: Filippo Valsorda <filippo@golang.org>
Trust: Roland Shoemaker <roland@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
diff --git a/html/parse.go b/html/parse.go
index f91466f..038941d 100644
--- a/html/parse.go
+++ b/html/parse.go
@@ -663,6 +663,24 @@
 			// Ignore the token.
 			return true
 		case a.Template:
+			// TODO: remove this divergence from the HTML5 spec.
+			//
+			// We don't handle all of the corner cases when mixing foreign
+			// content (i.e. <math> or <svg>) with <template>. Without this
+			// early return, we can get into an infinite loop, possibly because
+			// of the "TODO... further divergence" a little below.
+			//
+			// As a workaround, if we are mixing foreign content and templates,
+			// just ignore the rest of the HTML. Foreign content is rare and a
+			// relatively old HTML feature. Templates are also rare and a
+			// relatively new HTML feature. Their combination is very rare.
+			for _, e := range p.oe {
+				if e.Namespace != "" {
+					p.im = ignoreTheRemainingTokens
+					return true
+				}
+			}
+
 			p.addElement()
 			p.afe = append(p.afe, &scopeMarker)
 			p.framesetOK = false
@@ -683,7 +701,7 @@
 			if !p.oe.contains(a.Template) {
 				return true
 			}
-			// TODO: remove this divergence from the HTML5 spec.
+			// TODO: remove this further divergence from the HTML5 spec.
 			//
 			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 			p.generateImpliedEndTags()
@@ -2127,6 +2145,10 @@
 	return true
 }
 
+func ignoreTheRemainingTokens(p *parser) bool {
+	return true
+}
+
 const whitespaceOrNUL = whitespace + "\x00"
 
 // Section 12.2.6.5
diff --git a/html/parse_test.go b/html/parse_test.go
index 58dce5f..019333d 100644
--- a/html/parse_test.go
+++ b/html/parse_test.go
@@ -267,6 +267,9 @@
 				if err != nil {
 					t.Fatal(err)
 				}
+				if parseTestBlacklist[ta.text] {
+					continue
+				}
 
 				err = testParseCase(ta.text, ta.want, ta.context, ParseOptionEnableScripting(ta.scripting))
 
@@ -379,6 +382,14 @@
 	return nil
 }
 
+// Some test inputs are simply skipped - we would otherwise fail the test. We
+// blacklist such inputs from the parse test.
+var parseTestBlacklist = map[string]bool{
+	// See the a.Template TODO in inHeadIM.
+	`<math><template><mo><template>`:                                     true,
+	`<template><svg><foo><template><foreignObject><div></template><div>`: true,
+}
+
 // Some test input result in parse trees are not 'well-formed' despite
 // following the HTML5 recovery algorithms. Rendering and re-parsing such a
 // tree will not result in an exact clone of that tree. We blacklist such
@@ -454,6 +465,17 @@
 	ParseFragment(strings.NewReader("<p>hello</p>"), nil)
 }
 
+func TestParseFragmentForeignContentTemplates(t *testing.T) {
+	srcs := []string{
+		"<math><html><template><mn><template></template></template>",
+		"<math><math><head><mi><template>",
+	}
+	for _, src := range srcs {
+		// The next line shouldn't infinite-loop.
+		ParseFragment(strings.NewReader(src), nil)
+	}
+}
+
 func BenchmarkParser(b *testing.B) {
 	buf, err := ioutil.ReadFile("testdata/go1.html")
 	if err != nil {