internal/testing/htmlcheck: replace use of cascadia in In

This cl implements the :nth-child() and :nth-of-type() pseudoclasses
for the css selector query function, allowing it to be used for
htmlcheck.In.

It also replaces the single use of a a child combinator '>' in a test
with a descendant combinator ' ' so that we don't need to implement
the child combinator. The test should have the same behavior.

For golang/go#61399

Change-Id: I09d9b8fbcd0eafd37aceb5994515687c85244ef8
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/542058
kokoro-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/testing/htmlcheck/htmlcheck.go b/internal/testing/htmlcheck/htmlcheck.go
index 46cdf87..4accdb5 100644
--- a/internal/testing/htmlcheck/htmlcheck.go
+++ b/internal/testing/htmlcheck/htmlcheck.go
@@ -12,7 +12,6 @@
 	"regexp"
 	"strings"
 
-	"github.com/andybalholm/cascadia"
 	"golang.org/x/net/html"
 )
 
@@ -38,15 +37,9 @@
 //
 // A nil Checker is valid and always succeeds.
 func In(selector string, checkers ...Checker) Checker {
-	sel := mustParseCascadiaSelector(selector)
+	sel := mustParseSelector(selector)
 	return func(n *html.Node) error {
-		var m *html.Node
-		// cascadia.Query does not test against its argument node.
-		if sel.Match(n) {
-			m = n
-		} else {
-			m = cascadia.Query(n, sel)
-		}
+		m := query(n, sel)
 		if m == nil {
 			return fmt.Errorf("no element matches selector %q", selector)
 		}
@@ -84,19 +77,6 @@
 	return nil
 }
 
-// mustParseCascadiaSelector parses the given CSS selector. An empty string
-// is treated as "*" (match everything).
-func mustParseCascadiaSelector(s string) cascadia.Sel {
-	if s == "" {
-		s = "*"
-	}
-	sel, err := cascadia.Parse(s)
-	if err != nil {
-		panic(fmt.Sprintf("parsing %q: %v", s, err))
-	}
-	return sel
-}
-
 // mustParseSelector parses the given CSS selector. An empty string
 // is treated as matching everything.
 func mustParseSelector(s string) *selector {
diff --git a/internal/testing/htmlcheck/query.go b/internal/testing/htmlcheck/query.go
index 1e5a671..75a196d 100644
--- a/internal/testing/htmlcheck/query.go
+++ b/internal/testing/htmlcheck/query.go
@@ -7,6 +7,7 @@
 import (
 	"errors"
 	"fmt"
+	"strconv"
 	"strings"
 
 	"golang.org/x/net/html"
@@ -122,13 +123,62 @@
 			}
 			sel.next = next
 			return sel, nil
+		case s[0] == ':':
+			atom, rest, err := parsePseudoClass(s)
+			if err != nil {
+				return nil, err
+			}
+			sel.atoms = append(sel.atoms, atom)
+			s = rest
 		default:
-			return nil, fmt.Errorf("unexpected character %q in input", s[0])
+			return nil, fmt.Errorf("unexpected character '%v' in input", string(s[0]))
 		}
 	}
 	return sel, nil
 }
 
+// parsePseudoClass parses a :nth-child(n) or :nth-of-type(n). It only supports those functions and
+// number arguments to those functions, not even, odd, or an+b.
+func parsePseudoClass(s string) (selectorAtom, string, error) {
+	if s[0] != ':' {
+		return nil, "", errors.New("expected ':' at beginning of pseudo class")
+	}
+	ident, rest := consumeIdentifier(s[1:])
+	if len(ident) == 0 {
+		return nil, "", errors.New("expected identifier after : in pseudo class")
+	}
+	if ident != "nth-of-type" && ident != "nth-child" {
+		return nil, "", errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported")
+	}
+	s = rest
+	if len(s) == 0 || s[0] != '(' {
+		return nil, "", errors.New("expected '(' after :nth-of-type or nth-child")
+	}
+	numstr, rest := consumeNumber(s[1:])
+	if len(numstr) == 0 {
+		return nil, "", errors.New("only number arguments are supported for :nth-of-type() or :nth-child()")
+	}
+	num, err := strconv.Atoi(numstr)
+	if err != nil {
+		// This shouldn't ever happen because consumeNumber should only return valid numbers and we
+		// check that the length is greater than 0.
+		panic(fmt.Errorf("unexpected parse error for number: %v", err))
+	}
+	s = rest
+	if len(s) == 0 || s[0] != ')' {
+		return nil, "", errors.New("expected ')' after number argument to nth-of-type or nth-child")
+	}
+	rest = s[1:]
+	switch ident {
+	case "nth-of-type":
+		return &nthOfType{num}, rest, nil
+	case "nth-child":
+		return &nthChild{num}, rest, nil
+	default:
+		panic("we should only have allowed nth-of-type or nth-child up to this point")
+	}
+}
+
 // parseAttributeSelector parses an attribute selector of the form [attribute-name="attribute=value"]
 func parseAttributeSelector(s string) (*attributeSelector, string, error) {
 	if s[0] != '[' {
@@ -192,6 +242,15 @@
 	return s[:i], s[i:]
 }
 
+// consumeNumber consumes and returns a (0-9)+ number at the beginning
+// of the given string, and the rest of the string.
+func consumeNumber(s string) (letters, rest string) {
+	i := 0
+	for ; i < len(s) && isNumber(s[i]); i++ {
+	}
+	return s[:i], s[i:]
+}
+
 func isAscii(s string) bool {
 	for i := 0; i < len(s); i++ {
 		if s[i] > 127 {
@@ -268,3 +327,65 @@
 	}
 	return false
 }
+
+// nthOfType implements the :nth-of-type() pseudoclass.
+type nthOfType struct {
+	n int
+}
+
+func (s *nthOfType) match(n *html.Node) bool {
+	if n.Type != html.ElementNode || n.Parent == nil {
+		return false
+	}
+	curChild := n.Parent.FirstChild
+	i := 0
+	for {
+		if curChild.Type == html.ElementNode && curChild.Data == n.Data {
+			i++
+			if i == s.n {
+				break
+			}
+		}
+		if curChild.NextSibling == nil {
+			break
+		}
+		curChild = curChild.NextSibling
+	}
+	if i != s.n {
+		// there were fewer than n children of this element type.
+		return false
+	}
+	return curChild == n
+}
+
+// nthChild implements the :nth-child() pseudoclass
+type nthChild struct {
+	n int
+}
+
+func (s *nthChild) match(n *html.Node) bool {
+	if n.Type != html.ElementNode || n.Parent == nil {
+		return false
+	}
+	curChild := n.Parent.FirstChild
+	// Advance to next element node.
+	for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
+		curChild = curChild.NextSibling
+	}
+	i := 1
+	for ; i < s.n; i++ {
+		if curChild.NextSibling == nil {
+			break
+		}
+		curChild = curChild.NextSibling
+		// Advance to next element node.
+		for curChild.Type != html.ElementNode && curChild.NextSibling != nil {
+			curChild = curChild.NextSibling
+		}
+	}
+	if i != s.n {
+		// there were fewer than n children.
+		return false
+	}
+	return curChild == n
+}
diff --git a/internal/testing/htmlcheck/query_test.go b/internal/testing/htmlcheck/query_test.go
index f128289..39081f1 100644
--- a/internal/testing/htmlcheck/query_test.go
+++ b/internal/testing/htmlcheck/query_test.go
@@ -120,6 +120,52 @@
 			nil,
 			errors.New("expected ']' at end of attribute selector"),
 		},
+		{
+			`.VulnMain-title:nth-of-type(4)`,
+			&selector{atoms: []selectorAtom{&classSelector{"VulnMain-title"}, &nthOfType{4}}},
+			nil,
+		},
+		{
+			`th:nth-child(2)`,
+			&selector{atoms: []selectorAtom{&elementSelector{"th"}, &nthChild{2}}},
+			nil,
+		},
+		{
+			`th:(2)`,
+			nil,
+			errors.New("expected identifier after : in pseudo class"),
+		},
+		{
+			`th:32(2)`,
+			nil,
+			errors.New("expected identifier after : in pseudo class"),
+		},
+		{
+			`th:active`,
+			nil,
+			errors.New("only :nth-of-type() and :nth-child() pseudoclasses are supported"),
+		},
+		{
+			`th:nth-child`,
+			nil,
+			errors.New("expected '(' after :nth-of-type or nth-child"),
+		},
+		{
+			`th:nth-child(odd)`,
+			nil,
+			errors.New("only number arguments are supported for :nth-of-type() or :nth-child()"),
+		},
+		{
+			`th:nth-child(14`,
+			nil,
+			errors.New("expected ')' after number argument to nth-of-type or nth-child"),
+		},
+		// We don't support the child combinator. Make sure it returns a parse error.
+		{
+			".Documentation-sinceVersion > .Documentation-sinceVersionVersion",
+			nil,
+			errors.New("unexpected character '>' in input"),
+		},
 	}
 	for _, tc := range testCases {
 		sel, err := parse(tc.text)
@@ -156,6 +202,17 @@
 		{`<div></div><div><div>wrong</div></div><div id="wrong-id"><div class="my-class">also wrong</div></div><div id="my-id"><div class="wrong-class">still wrong</div></div><div id="my-id"><div class="my-class">match</div></div>`, "div#my-id div.my-class", `<div class="my-class">match</div>`},
 		{`<a></a><div class="UnitMeta-repo"><a href="foo" title="">link body</a></div>`, ".UnitMeta-repo a", `<a href="foo" title="">link body</a>`},
 		{`<ul class="UnitFiles-fileList"><li><a href="foo">a.go</a></li></ul>`, ".UnitFiles-fileList a", `<a href="foo">a.go</a>`},
+		{`<ul><li>first child</li><li>second child</li></ul>`, "li:nth-child(2)", `<li>second child</li>`},
+		{`<ul> <li>first child</li> <li>second child</li> </ul>`, "li:nth-child(2)", `<li>second child</li>`},
+		{`<div><div>not counted</div><p class="class">first paragraph</p></div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
+		{`<div><div>not counted</div> <p class="class">first paragraph</p> </div>`, ".class:nth-of-type(1)", `<p class="class">first paragraph</p>`},
+		{`<div><div class="class">not counted</div><p class="class">first paragraph</p>`, ".class:nth-of-type(2)", ``},
+		{`<div><div class="class">not counted</div><p class="class">first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
+		{`<div><div>not counted</div><p>first paragraph</p><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
+		{`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p></div>`, ".class:nth-of-type(2)", `<p class="class">second paragraph</p>`},
+		{`<div><div>not counted</div><p>first paragraph</p><div>also not counted</div><p class="class">second paragraph</p><td>also not counted</td><p>third paragraph</p><p>fourth paragraph</p><p class="class">fifth paragraph</p></div>`, ".class:nth-of-type(5)", `<p class="class">fifth paragraph</p>`},
+		{`<table class="UnitDirectories-table"><tbody><tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis">Synopsis</th></tr>`, "th:nth-child(1)", "<th>Path</th>"},
+		{`<table class="UnitDirectories-table"> <tbody> <tr class="UnitDirectories-tableHeader"> <th>Path</th> <th class="UnitDirectories-desktopSynopsis"> Synopsis </th> </tr>`, "th:nth-child(1)", "<th>Path</th>"},
 	}
 	for _, tc := range testCases {
 		n, err := html.Parse(strings.NewReader(tc.queriedText))
diff --git a/internal/testing/integration/frontend_main_test.go b/internal/testing/integration/frontend_main_test.go
index 009fc2b..fea314a 100644
--- a/internal/testing/integration/frontend_main_test.go
+++ b/internal/testing/integration/frontend_main_test.go
@@ -51,7 +51,7 @@
 				in("#S2",
 					in(".Documentation-sinceVersion", hasText(""))),
 				in("#String",
-					in(".Documentation-sinceVersion > .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
+					in(".Documentation-sinceVersion .Documentation-sinceVersionVersion", hasText("v1.1.0"))),
 				in("#T",
 					in(".Documentation-sinceVersion", hasText(""))),
 				in("#TF",