html: improve parsing of lists
Make a <li> tag close the previous <li> element.
Make a </ul> tag close <li> elements.
Pass tests1.dat, test 33:
<!DOCTYPE html><li>hello<li>world<ul>how<li>do</ul>you</body><!--do-->
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <li>
| "hello"
| <li>
| "world"
| <ul>
| "how"
| <li>
| "do"
| "you"
| <!-- do -->
R=nigeltao
CC=golang-dev
https://golang.org/cl/5321051
diff --git a/src/pkg/html/parse.go b/src/pkg/html/parse.go
index 292fbaf..530942a 100644
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -576,6 +576,24 @@
p.framesetOK = false
// TODO: detect <select> inside a table.
return inSelectIM, true
+ case "li":
+ p.framesetOK = false
+ for i := len(p.oe) - 1; i >= 0; i-- {
+ node := p.oe[i]
+ switch node.Data {
+ case "li":
+ p.popUntil(listItemScopeStopTags, "li")
+ case "address", "div", "p":
+ continue
+ default:
+ if !isSpecialElement[node.Data] {
+ continue
+ }
+ }
+ break
+ }
+ p.popUntil(buttonScopeStopTags, "p")
+ p.addElement("li", p.tok.Attr)
default:
// TODO.
p.addElement(p.tok.Data, p.tok.Attr)
@@ -592,6 +610,8 @@
p.popUntil(buttonScopeStopTags, "p")
case "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u":
p.inBodyEndTagFormatting(p.tok.Data)
+ case "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul":
+ p.popUntil(defaultScopeStopTags, p.tok.Data)
default:
p.inBodyEndTagOther(p.tok.Data)
}
diff --git a/src/pkg/html/parse_test.go b/src/pkg/html/parse_test.go
index 865a47d..b0ddd92 100644
--- a/src/pkg/html/parse_test.go
+++ b/src/pkg/html/parse_test.go
@@ -132,7 +132,7 @@
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
- for i := 0; i < 33; i++ {
+ for i := 0; i < 34; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {