html: skip tests for behavior outside the parsing algorithm

This also updates webkit/tests18.dat to latest.

Change-Id: I4ed37e918a7db63afd8d515dd3a2494699cc5b74
Reviewed-on: https://go-review.googlesource.com/c/net/+/264977
Trust: Kunpei Sakai <namusyaka@gmail.com>
Trust: Nigel Tao <nigeltao@golang.org>
Run-TryBot: Kunpei Sakai <namusyaka@gmail.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/html/parse_test.go b/html/parse_test.go
index f864503..f1eba07 100644
--- a/html/parse_test.go
+++ b/html/parse_test.go
@@ -289,6 +289,10 @@
 // text is the HTML to be parsed, want is a dump of the correct parse tree,
 // and context is the name of the context node, if any.
 func testParseCase(text, want, context string, opts ...ParseOption) (err error) {
+	if parserTestBlacklist[text] {
+		return nil
+	}
+
 	defer func() {
 		if x := recover(); x != nil {
 			switch e := x.(type) {
@@ -366,6 +370,18 @@
 	return nil
 }
 
+// Some tests of html5lib-tests are beyond the scope of the parsing algorithm
+// and are out of scope for the go's parser. The items listed here are limited
+// to testing for behavior outside the whatwg parsing algorithm.
+var parserTestBlacklist = map[string]bool{
+	// Even if there is a <plaintext> tag inside a <select> tag, the tokenizer
+	// should not go into the PLAINTEXT state, but it is not mentioned in the
+	// parsing algorithm.
+	// See: https://github.com/whatwg/html/issues/2252
+	`<!doctype html><select><plaintext></plaintext>X`:      true,
+	`<!doctype html><table><select><plaintext>a<caption>b`: true,
+}
+
 // Some test input result in parse trees are not 'well-formed' despite
 // following the HTML5 recovery algorithms. Rendering and re-parsing such a
 // tree will not result in an exact clone of that tree. We blacklist such
@@ -418,8 +434,10 @@
 	`<script><!--<script </s`:                      true,
 	// Reconstructing the active formatting elements results in a <plaintext>
 	// element that contains an <a> element.
-	`<!doctype html><p><a><plaintext>b`:         true,
-	`<table><math><select><mi><select></table>`: true,
+	`<!doctype html><p><a><plaintext>b`:                       true,
+	`<table><math><select><mi><select></table>`:               true,
+	`<!doctype html><table><colgroup><plaintext></plaintext>`: true,
+	`<!doctype html><svg><plaintext>a</plaintext>b`:           true,
 }
 
 func TestNodeConsistency(t *testing.T) {
diff --git a/html/testdata/webkit/tests18.dat b/html/testdata/webkit/tests18.dat
index 926bccb..05363b3 100644
--- a/html/testdata/webkit/tests18.dat
+++ b/html/testdata/webkit/tests18.dat
@@ -1,4 +1,17 @@
 #data
+<plaintext></plaintext>
+#errors
+11: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
+23: End of file seen and there were open elements.
+11: Unclosed element “plaintext”.
+#document
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
 <!doctype html><plaintext></plaintext>
 #errors
 (1,38): expected-closing-tag-but-got-eof
@@ -11,20 +24,77 @@
 |       "</plaintext>"
 
 #data
+<!doctype html><html><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><head><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><html><noscript><plaintext></plaintext>
+#errors
+42: Bad start tag in “plaintext” in “head”.
+54: End of file seen and there were open elements.
+42: Unclosed element “plaintext”.
+#script-off
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <noscript>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html></head><plaintext></plaintext>
+#errors
+45: End of file seen and there were open elements.
+33: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><body><plaintext></plaintext>
+#errors
+44: End of file seen and there were open elements.
+32: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
 <!doctype html><table><plaintext></plaintext>
 #errors
 (1,33): foster-parenting-start-tag
-(1,34): foster-parenting-character
-(1,35): foster-parenting-character
-(1,36): foster-parenting-character
-(1,37): foster-parenting-character
-(1,38): foster-parenting-character
-(1,39): foster-parenting-character
-(1,40): foster-parenting-character
-(1,41): foster-parenting-character
-(1,42): foster-parenting-character
-(1,43): foster-parenting-character
-(1,44): foster-parenting-character
 (1,45): foster-parenting-character
 (1,45): eof-in-table
 #document
@@ -41,17 +111,6 @@
 #errors
 (1,40): foster-parenting-start-tag
 (1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
-(1,41): foster-parenting-character
 (1,52): eof-in-table
 #document
 | <!DOCTYPE html>
@@ -67,17 +126,6 @@
 <!doctype html><table><tbody><tr><plaintext></plaintext>
 #errors
 (1,44): foster-parenting-start-tag
-(1,45): foster-parenting-character
-(1,46): foster-parenting-character
-(1,47): foster-parenting-character
-(1,48): foster-parenting-character
-(1,49): foster-parenting-character
-(1,50): foster-parenting-character
-(1,51): foster-parenting-character
-(1,52): foster-parenting-character
-(1,53): foster-parenting-character
-(1,54): foster-parenting-character
-(1,55): foster-parenting-character
 (1,56): foster-parenting-character
 (1,56): eof-in-table
 #document
@@ -123,6 +171,170 @@
 |           "</plaintext>"
 
 #data
+<!doctype html><table><colgroup><plaintext></plaintext>
+#errors
+43: Start tag “plaintext” seen in “table”.
+55: Misplaced non-space characters inside a table.
+55: End of file seen and there were open elements.
+43: Unclosed element “plaintext”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+|     <table>
+|       <colgroup>
+
+#data
+<!doctype html><select><plaintext></plaintext>X
+#errors
+34: Stray start tag “plaintext”.
+46: Stray end tag “plaintext”.
+47: End of file seen and there were open elements.
+23: Unclosed element “select”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "X"
+
+#data
+<!doctype html><table><select><plaintext>a<caption>b
+#errors
+30: Start tag “select” seen in “table”.
+41: Stray start tag “plaintext”.
+51: “caption” start tag with “select” open.
+52: End of file seen and there were open elements.
+51: Unclosed element “caption”.
+22: Unclosed element “table”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <select>
+|       "a"
+|     <table>
+|       <caption>
+|         "b"
+
+#data
+<!doctype html><template><plaintext>a</template>b
+#errors
+49: End of file seen and there were open elements.
+36: Unclosed element “plaintext”.
+25: Unclosed element “template”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|     <template>
+|       content
+|         <plaintext>
+|           "a</template>b"
+|   <body>
+
+#data
+<!doctype html><body></body><plaintext></plaintext>
+#errors
+39: Stray start tag “plaintext”.
+51: End of file seen and there were open elements.
+39: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><frameset><plaintext></plaintext>
+#errors
+36: Stray start tag “plaintext”.
+48: Stray end tag “plaintext”.
+48: End of file seen and there were open elements.
+25: Unclosed element “frameset”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><frameset></frameset><plaintext></plaintext>
+#errors
+47: Stray start tag “plaintext”.
+59: Stray end tag “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><body></body></html><plaintext></plaintext>
+#errors
+46: Stray start tag “plaintext”.
+58: End of file seen and there were open elements.
+46: Unclosed element “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <plaintext>
+|       "</plaintext>"
+
+#data
+<!doctype html><frameset></frameset></html><plaintext></plaintext>
+#errors
+54: Stray start tag “plaintext”.
+66: Stray end tag “plaintext”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <frameset>
+
+#data
+<!doctype html><svg><plaintext>a</plaintext>b
+#errors
+45: End of file seen and there were open elements.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg plaintext>
+|         "a"
+|       "b"
+
+#data
+<!doctype html><svg><title><plaintext>a</plaintext>b
+#errors
+52: End of file seen and there were open elements.
+38: Unclosed element “plaintext”.
+27: Unclosed element “title”.
+20: Unclosed element “svg”.
+#document
+| <!DOCTYPE html>
+| <html>
+|   <head>
+|   <body>
+|     <svg svg>
+|       <svg title>
+|         <plaintext>
+|           "a</plaintext>b"
+
+#data
 <!doctype html><table><tr><style></script></style>abc
 #errors
 (1,51): foster-parenting-character