html: avoid using raw text mode if there are raw tags to be ignored in select IM

This follows up on https://golang.org/cl/264977

Change-Id: I5d0e2f39173a8bbd07ca53de4df2a7e8772d4197
Reviewed-on: https://go-review.googlesource.com/c/net/+/265960
Trust: Kunpei Sakai <namusyaka@gmail.com>
Trust: Nigel Tao <nigeltao@golang.org>
Run-TryBot: Kunpei Sakai <namusyaka@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/html/parse.go b/html/parse.go
index 2cd12fc..392327a 100644
--- a/html/parse.go
+++ b/html/parse.go
@@ -1790,6 +1790,13 @@
 			return true
 		case a.Script, a.Template:
 			return inHeadIM(p)
+		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
+			// Don't let the tokenizer go into raw text mode when there are raw tags
+			// to be ignored. These tags should be ignored from the tokenizer
+			// properly.
+			p.tokenizer.NextIsNotRawText()
+			// Ignore the token.
+			return true
 		}
 	case EndTagToken:
 		switch p.tok.DataAtom {
diff --git a/html/parse_test.go b/html/parse_test.go
index f1eba07..333dd59 100644
--- a/html/parse_test.go
+++ b/html/parse_test.go
@@ -289,10 +289,6 @@
 // text is the HTML to be parsed, want is a dump of the correct parse tree,
 // and context is the name of the context node, if any.
 func testParseCase(text, want, context string, opts ...ParseOption) (err error) {
-	if parserTestBlacklist[text] {
-		return nil
-	}
-
 	defer func() {
 		if x := recover(); x != nil {
 			switch e := x.(type) {
@@ -370,18 +366,6 @@
 	return nil
 }
 
-// Some tests of html5lib-tests are beyond the scope of the parsing algorithm
-// and are out of scope for the go's parser. The items listed here are limited
-// to testing for behavior outside the whatwg parsing algorithm.
-var parserTestBlacklist = map[string]bool{
-	// Even if there is a <plaintext> tag inside a <select> tag, the tokenizer
-	// should not go into the PLAINTEXT state, but it is not mentioned in the
-	// parsing algorithm.
-	// See: https://github.com/whatwg/html/issues/2252
-	`<!doctype html><select><plaintext></plaintext>X`:      true,
-	`<!doctype html><table><select><plaintext>a<caption>b`: true,
-}
-
 // Some test input result in parse trees are not 'well-formed' despite
 // following the HTML5 recovery algorithms. Rendering and re-parsing such a
 // tree will not result in an exact clone of that tree. We blacklist such
diff --git a/html/testdata/go/raw_tags_to_be_ignored.dat b/html/testdata/go/raw_tags_to_be_ignored.dat
new file mode 100644
index 0000000..50bac59
--- /dev/null
+++ b/html/testdata/go/raw_tags_to_be_ignored.dat
@@ -0,0 +1,97 @@
+#data
+<!doctype html><table><select><iframe>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><noembed>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><noframes>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><noscript>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><style>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><title>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><table><select><xmp>a<caption>b
+#errors
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"