html: handle end-of-file cases correctly
Updates golang/go#23071
Change-Id: I02a61109b5738759a9ee3e448981778de7d0ff62
Reviewed-on: https://go-review.googlesource.com/130795
Run-TryBot: Kunpei Sakai <namusyaka@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Nigel Tao <nigeltao@golang.org>
diff --git a/html/parse.go b/html/parse.go
index 1489ff9..bbe0116 100644
--- a/html/parse.go
+++ b/html/parse.go
@@ -209,27 +209,6 @@
p.oe = p.oe[:i+1]
}
-// generateAllImpliedEndTags pops nodes off the stack of open elements as long as
-// the top node has a tag name of caption, colgroup, dd, div, dt, li, optgroup, option, p, rb,
-// rp, rt, rtc, span, tbody, td, tfoot, th, thead or tr.
-func (p *parser) generateAllImpliedEndTags() {
- var i int
- for i = len(p.oe) - 1; i >= 0; i-- {
- n := p.oe[i]
- if n.Type == ElementNode {
- switch n.DataAtom {
- // TODO: remove this divergence from the HTML5 spec
- case a.Caption, a.Colgroup, a.Dd, a.Div, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb,
- a.Rp, a.Rt, a.Rtc, a.Span, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
- continue
- }
- }
- break
- }
-
- p.oe = p.oe[:i+1]
-}
-
// addChild adds a child node n to the top element, and pushes n onto the stack
// of open elements if it is an element node.
func (p *parser) addChild(n *Node) {
@@ -679,8 +658,16 @@
if !p.oe.contains(a.Template) {
return true
}
- p.generateAllImpliedEndTags()
- p.popUntil(defaultScope, a.Template)
+ // TODO: remove this divergence from the HTML5 spec.
+ //
+ // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
+ p.generateImpliedEndTags()
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
+ p.oe = p.oe[:i]
+ break
+ }
+ }
p.clearActiveFormattingElements()
p.templateStack.pop()
p.resetInsertionMode()
@@ -1342,9 +1329,6 @@
// Section 12.2.6.4.9.
func inTableIM(p *parser) bool {
switch p.tok.Type {
- case ErrorToken:
- // Stop parsing.
- return true
case TextToken:
p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
switch p.oe.top().DataAtom {
@@ -1439,6 +1423,8 @@
case DoctypeToken:
// Ignore the token.
return true
+ case ErrorToken:
+ return inBodyIM(p)
}
p.fosterParenting = true
@@ -1541,6 +1527,8 @@
case a.Template:
return inHeadIM(p)
}
+ case ErrorToken:
+ return inBodyIM(p)
}
if p.oe.top().DataAtom != a.Colgroup {
return true
@@ -1705,9 +1693,6 @@
// Section 12.2.6.4.16.
func inSelectIM(p *parser) bool {
switch p.tok.Type {
- case ErrorToken:
- // Stop parsing.
- return true
case TextToken:
p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
case StartTagToken:
@@ -1771,6 +1756,8 @@
case DoctypeToken:
// Ignore the token.
return true
+ case ErrorToken:
+ return inBodyIM(p)
}
return true
@@ -1837,15 +1824,26 @@
// Ignore the token.
return true
}
+ case ErrorToken:
+ if !p.oe.contains(a.Template) {
+ // Ignore the token.
+ return true
+ }
+ // TODO: remove this divergence from the HTML5 spec.
+ //
+ // See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
+ p.generateImpliedEndTags()
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
+ p.oe = p.oe[:i]
+ break
+ }
+ }
+ p.clearActiveFormattingElements()
+ p.templateStack.pop()
+ p.resetInsertionMode()
+ return false
}
- if !p.oe.contains(a.Template) {
- // Ignore the token.
- return true
- }
- p.popUntil(defaultScope, a.Template)
- p.clearActiveFormattingElements()
- p.templateStack.pop()
- p.resetInsertionMode()
return false
}
diff --git a/html/testdata/webkit/template.dat b/html/testdata/webkit/template.dat
index f130961..641419c 100644
--- a/html/testdata/webkit/template.dat
+++ b/html/testdata/webkit/template.dat
@@ -1026,6 +1026,123 @@
| <body>
#data
+<template><template><table>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <table>
+| <body>
+
+#data
+<template><template><tbody>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <tbody>
+| <body>
+
+#data
+<template><template><tr>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <tr>
+| <body>
+
+#data
+<template><template><td>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <td>
+| <body>
+
+#data
+<template><template><caption>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <caption>
+| <body>
+
+#data
+<template><template><colgroup>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <colgroup>
+| <body>
+
+#data
+<template><template><col>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <col>
+| <body>
+
+#data
+<template><template><tbody><select>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <tbody>
+| <select>
+| <body>
+
+#data
+<template><template><frame>
+#errors
+#document
+| <html>
+| <head>
+| <template>
+| content
+| <template>
+| content
+| <body>
+
+#data
<template><template><script>var i
#errors
#document