internal/testing/htmlcheck: replace use of cascadia in In
This cl implements the :nth-child() and :nth-of-type() pseudoclasses
for the css selector query function, allowing it to be used for
htmlcheck.In.
It also replaces the single use of a a child combinator '>' in a test
with a descendant combinator ' ' so that we don't need to implement
the child combinator. The test should have the same behavior.
For golang/go#61399
Change-Id: I09d9b8fbcd0eafd37aceb5994515687c85244ef8
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/542058
kokoro-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/testing/htmlcheck/htmlcheck.go b/internal/testing/htmlcheck/htmlcheck.go
index 46cdf87..4accdb5 100644
--- a/internal/testing/htmlcheck/htmlcheck.go
+++ b/internal/testing/htmlcheck/htmlcheck.go
@@ -12,7 +12,6 @@
"regexp"
"strings"
- "github.com/andybalholm/cascadia"
"golang.org/x/net/html"
)
@@ -38,15 +37,9 @@
//
// A nil Checker is valid and always succeeds.
func In(selector string, checkers ...Checker) Checker {
- sel := mustParseCascadiaSelector(selector)
+ sel := mustParseSelector(selector)
return func(n *html.Node) error {
- var m *html.Node
- // cascadia.Query does not test against its argument node.
- if sel.Match(n) {
- m = n
- } else {
- m = cascadia.Query(n, sel)
- }
+ m := query(n, sel)
if m == nil {
return fmt.Errorf("no element matches selector %q", selector)
}
@@ -84,19 +77,6 @@
return nil
}
-// mustParseCascadiaSelector parses the given CSS selector. An empty string
-// is treated as "*" (match everything).
-func mustParseCascadiaSelector(s string) cascadia.Sel {
- if s == "" {
- s = "*"
- }
- sel, err := cascadia.Parse(s)
- if err != nil {
- panic(fmt.Sprintf("parsing %q: %v", s, err))
- }
- return sel
-}
-
// mustParseSelector parses the given CSS selector. An empty string
// is treated as matching everything.
func mustParseSelector(s string) *selector {
diff --git a/internal/testing/htmlcheck/query.go b/internal/testing/htmlcheck/query.go
index 1e5a671..75a196d 100644
--- a/internal/testing/htmlcheck/query.go
+++ b/internal/testing/htmlcheck/query.go
@@ -7,6 +7,7 @@
import (
"errors"
"fmt"
+ "strconv"
"strings"
"golang.org/x/net/html"
@@ -122,13 +123,62 @@
}
sel.next = next
return sel, nil
+ case s[0] == ':':
+ atom, rest, err := parsePseudoClass(s)
+ if err != nil {
+ return nil, err
+ }
+ sel.atoms = append(sel.atoms, atom)
+ s = rest
default:
- return nil, fmt.Errorf("unexpected character %q in input", s[0])
+ return nil, fmt.Errorf("unexpected character '%v' in input", string(s[0]))
}
}
return sel, nil
}
+// parsePseudoClass parses a :nth-child(n) or :nth-of-type(n). It only supports those functions and
+// number arguments to those functions, not even, odd, or an+b.
+func parsePseudoClass(s string) (selectorAtom, string, error) {
+ if s[0] != ':' {
+ return nil, "", errors.New("expected ':' at beginning of pseudo class")
+ }
+ ident, rest := consumeIdentifier(s[1:])
+ if len(ident) == 0 {
+ return nil, "", errors.New("expected identifier after : in pseudo class")
+ }
+ if ident != "nth-of-type" && ident != "nth-child" {
+ return nil, "", errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported")
+ }
+ s = rest
+ if len(s) == 0 || s[0] != '(' {
+ return nil, "", errors.New("expected '(' after :nth-of-type or nth-child")
+ }
+ numstr, rest := consumeNumber(s[1:])
+ if len(numstr) == 0 {
+ return nil, "", errors.New("only number arguments are supported for :nth-of-type() or :nth-child()")
+ }
+ num, err := strconv.Atoi(numstr)
+ if err != nil {
+ // This shouldn't ever happen because consumeNumber should only return valid numbers and we
+ // check that the length is greater than 0.
+ panic(fmt.Errorf("unexpected parse error for number: %v", err))
+ }
+ s = rest
+ if len(s) == 0 || s[0] != ')' {
+ return nil, "", errors.New("expected ')' after number argument to nth-of-type or nth-child")
+ }
+ rest = s[1:]
+ switch ident {
+ case "nth-of-type":
+ return &nthOfType{num}, rest, nil
+ case "nth-child":
+ return &nthChild{num}, rest, nil
+ default:
+ panic("we should only have allowed nth-of-type or nth-child up to this point")
+ }
+}
+
// parseAttributeSelector parses an attribute selector of the form [attribute-name="attribute=value"]
func parseAttributeSelector(s string) (*attributeSelector, string, error) {
if s[0] != '[' {
@@ -192,6 +242,15 @@
return s[:i], s[i:]
}
+// consumeNumber consumes and returns a (0-9)+ number at the beginning
+// of the given string, and the rest of the string.
+func consumeNumber(s string) (letters, rest string) {
+ i := 0
+ for ; i < len(s) && isNumber(s[i]); i++ {
+ }
+ return s[:i], s[i:]
+}
+
func isAscii(s string) bool {
for i := 0; i < len(s); i++ {
if s[i] > 127 {
@@ -268,3 +327,65 @@
}
return false
}
+
+// nthOfType implements the :nth-of-type() pseudoclass.
+type nthOfType struct {
+ n int
+}
+
+func (s *nthOfType) match(n *html.Node) bool {
+ if n.Type != html.ElementNode || n.Parent == nil {
+ return false
+ }
+ curChild := n.Parent.FirstChild
+ i := 0
+ for {
+ if curChild.Type == html.ElementNode && curChild.Data == n.Data {
+ i++
+ if i == s.n {
+ break
+ }
+ }
+ if curChild.NextSibling == nil {
+ break
+ }
+ curChild = curChild.NextSibling
+ }
+ if i != s.n {
+ // there were fewer than n children of this element type.
+ return false
+ }
+ return curChild == n
+}
+
+// nthChild implements the :nth-child() pseudoclass
+type nthChild struct {
+ n int
+}
+
+func (s *nthChild) match(n *html.Node) bool {
+ if n.Type != html.ElementNode || n.Parent == nil {
+ return false
+ }
+ curChild := n.Parent.FirstChild
+ // Advance to next element node.
+ for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
+ curChild = curChild.NextSibling
+ }
+ i := 1
+ for ; i < s.n; i++ {
+ if curChild.NextSibling == nil {
+ break
+ }
+ curChild = curChild.NextSibling
+ // Advance to next element node.
+ for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
+ curChild = curChild.NextSibling
+ }
+ }
+ if i != s.n {
+ // there were fewer than n children.
+ return false
+ }
+ return curChild == n
+}
diff --git a/internal/testing/htmlcheck/query_test.go b/internal/testing/htmlcheck/query_test.go
index f128289..39081f1 100644
--- a/internal/testing/htmlcheck/query_test.go
+++ b/internal/testing/htmlcheck/query_test.go
@@ -120,6 +120,52 @@
nil,
errors.New("expected ']' at end of attribute selector"),
},
+ {
+ `.VulnMain-title:nth-of-type(4)`,
+ &selector{atoms: []selectorAtom{&classSelector{"VulnMain-title"}, &nthOfType{4}}},
+ nil,
+ },
+ {
+ `th:nth-child(2)`,
+ &selector{atoms: []selectorAtom{&elementSelector{"th"}, &nthChild{2}}},
+ nil,
+ },
+ {
+ `th:(2)`,
+ nil,
+ errors.New("expected identifier after : in pseudo class"),
+ },
+ {
+ `th:32(2)`,
+ nil,
+ errors.New("expected identifier after : in pseudo class"),
+ },
+ {
+ `th:active`,
+ nil,
+ errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported"),
+ },
+ {
+ `th:nth-child`,
+ nil,
+ errors.New("expected '(' after :nth-of-type or nth-child"),
+ },
+ {
+ `th:nth-child(odd)`,
+ nil,
+ errors.New("only number arguments are supported for :nth-of-type() or :nth-child()"),
+ },
+ {
+ `th:nth-child(14`,
+ nil,
+ errors.New("expected ')' after number argument to nth-of-type or nth-child"),
+ },
+ // We don't support the child combinator. Make sure it returns a parse error.
+ {
+ ".Documentation-sinceVersion > .Documentation-sinceVersionVersion",
+ nil,
+ errors.New("unexpected character '>' in input"),
+ },
}
for _, tc := range testCases {
sel, err := parse(tc.text)
@@ -156,6 +202,17 @@
{`<div></div><div><div>wrong</div></div><div id="wrong-id"><div class="my-class">also wrong</div></div><div id="my-id"><div class="wrong-class">still wrong</div></div><div id="my-id"><div class="my-class">match</div></div>`, "div#my-id div.my-class", `<div class="my-class">match</div>`},
{`<a></a><div class="UnitMeta-repo"><a href="foo" title="">link body</a></div>`, ".UnitMeta-repo a", `<a href="foo" title="">link body</a>`},
{`<ul class="UnitFiles-fileList"><li><a href="foo">a.go</a></li></ul>`, ".UnitFiles-fileList a", `<a href="foo">a.go</a>`},
+ {`<ul><li>first child</li><li>second child</li></ul>`, "li:nth-child(2)", `<li>second child</li>`},
+ {`<ul> <li>first child</li> <li>second child</li> </ul>`, "li:nth-child(2)", `<li>second child</li>`},
+ {`<div><div>not counted</div><p class="class">first paragraph</p></div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
+ {`<div><div>not counted</div> <p class="class">first paragraph</p> </div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
+ {`<div><div class="class">not counted</div><p class="class">first paragraph</p>`, ".class:nth-of-type(2)", ``},
+ {`<div><div class="class">not counted</div><p class="class">first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
+ {`<div><div>not counted</div><p>first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
+ {`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
+ {`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p><td>also not counted</td><p>third paragraph</p><p>fourth paragraph</p><p class="class">fifth paragraph</p></div>`, ".class:nth-of-type(5)", `<p class="class">fifth paragraph</p>`},
+ {`<table class="UnitDirectories-table"><tbody><tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis">Synopsis</th></tr>`, "th:nth-child(1)", "<th>Path</th>"},
+ {`<table class="UnitDirectories-table"> <tbody> <tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis"> Synopsis </th> </tr>`, "th:nth-child(1)", "<th>Path</th>"},
}
for _, tc := range testCases {
n, err := html.Parse(strings.NewReader(tc.queriedText))
diff --git a/internal/testing/integration/frontend_main_test.go b/internal/testing/integration/frontend_main_test.go
index 009fc2b..fea314a 100644
--- a/internal/testing/integration/frontend_main_test.go
+++ b/internal/testing/integration/frontend_main_test.go
@@ -51,7 +51,7 @@
in("#S2",
in(".Documentation-sinceVersion", hasText(""))),
in("#String",
- in(".Documentation-sinceVersion > .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
+ in(".Documentation-sinceVersion .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
in("#T",
in(".Documentation-sinceVersion", hasText(""))),
in("#TF",