html: add Node.{Ancestors,ChildNodes,Descendants}()

Adds iterators for the parents, immediate children, and all children of a Node respectively.

Fixes golang/go#62113

Change-Id: Iab015872cc3a20fe5e7cae3bc90b89cba68cc3f8
GitHub-Last-Rev: d99de580ab8c30bb04c9fee401d6bfc5dd55d1ec
GitHub-Pull-Request: golang/net#215
Reviewed-on: https://go-review.googlesource.com/c/net/+/594195
Reviewed-by: Ian Lance Taylor <iant@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Ian Lance Taylor <iant@google.com>
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/html/doc.go b/html/doc.go
index 3a7e5ab..885c4c5 100644
--- a/html/doc.go
+++ b/html/doc.go
@@ -78,16 +78,11 @@
 	if err != nil {
 		// ...
 	}
-	var f func(*html.Node)
-	f = func(n *html.Node) {
+	for n := range doc.Descendants() {
 		if n.Type == html.ElementNode && n.Data == "a" {
 			// Do something with n...
 		}
-		for c := n.FirstChild; c != nil; c = c.NextSibling {
-			f(c)
-		}
 	}
-	f(doc)
 
 The relevant specifications include:
 https://html.spec.whatwg.org/multipage/syntax.html and
diff --git a/html/example_test.go b/html/example_test.go
index 0b06ed7..830f0b2 100644
--- a/html/example_test.go
+++ b/html/example_test.go
@@ -2,6 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+//go:build go1.23
+
 // This example demonstrates parsing HTML data and walking the resulting tree.
 package html_test
 
@@ -11,6 +13,7 @@
 	"strings"
 
 	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
 )
 
 func ExampleParse() {
@@ -19,9 +22,8 @@
 	if err != nil {
 		log.Fatal(err)
 	}
-	var f func(*html.Node)
-	f = func(n *html.Node) {
-		if n.Type == html.ElementNode && n.Data == "a" {
+	for n := range doc.Descendants() {
+		if n.Type == html.ElementNode && n.DataAtom == atom.A {
 			for _, a := range n.Attr {
 				if a.Key == "href" {
 					fmt.Println(a.Val)
@@ -29,11 +31,8 @@
 				}
 			}
 		}
-		for c := n.FirstChild; c != nil; c = c.NextSibling {
-			f(c)
-		}
 	}
-	f(doc)
+
 	// Output:
 	// foo
 	// /bar/baz
diff --git a/html/iter.go b/html/iter.go
new file mode 100644
index 0000000..54be8fd
--- /dev/null
+++ b/html/iter.go
@@ -0,0 +1,56 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.23
+
+package html
+
+import "iter"
+
+// Ancestors returns an iterator over the ancestors of n, starting with n.Parent.
+//
+// Mutating a Node or its parents while iterating may have unexpected results.
+func (n *Node) Ancestors() iter.Seq[*Node] {
+	_ = n.Parent // eager nil check
+
+	return func(yield func(*Node) bool) {
+		for p := n.Parent; p != nil && yield(p); p = p.Parent {
+		}
+	}
+}
+
+// ChildNodes returns an iterator over the immediate children of n,
+// starting with n.FirstChild.
+//
+// Mutating a Node or its children while iterating may have unexpected results.
+func (n *Node) ChildNodes() iter.Seq[*Node] {
+	_ = n.FirstChild // eager nil check
+
+	return func(yield func(*Node) bool) {
+		for c := n.FirstChild; c != nil && yield(c); c = c.NextSibling {
+		}
+	}
+
+}
+
+// Descendants returns an iterator over all nodes recursively beneath
+// n, excluding n itself. Nodes are visited in depth-first preorder.
+//
+// Mutating a Node or its descendants while iterating may have unexpected results.
+func (n *Node) Descendants() iter.Seq[*Node] {
+	_ = n.FirstChild // eager nil check
+
+	return func(yield func(*Node) bool) {
+		n.descendants(yield)
+	}
+}
+
+func (n *Node) descendants(yield func(*Node) bool) bool {
+	for c := range n.ChildNodes() {
+		if !yield(c) || !c.descendants(yield) {
+			return false
+		}
+	}
+	return true
+}
diff --git a/html/iter_test.go b/html/iter_test.go
new file mode 100644
index 0000000..cca7f82
--- /dev/null
+++ b/html/iter_test.go
@@ -0,0 +1,96 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.23
+
+package html
+
+import (
+	"strings"
+	"testing"
+)
+
+func TestNode_ChildNodes(t *testing.T) {
+	tests := []struct {
+		in   string
+		want string
+	}{
+		{"", ""},
+		{"<a></a>", "a"},
+		{"a", "a"},
+		{"<a></a><!--b-->", "a b"},
+		{"a<b></b>c", "a b c"},
+		{"a<b><!--c--></b>d", "a b d"},
+		{"<a><b>c<!--d-->e</b></a>f<!--g--><h>i</h>", "a f g h"},
+	}
+	for _, test := range tests {
+		doc, err := Parse(strings.NewReader(test.in))
+		if err != nil {
+			t.Fatal(err)
+		}
+		// Drill to <html><head></head><body>
+		n := doc.FirstChild.FirstChild.NextSibling
+		var results []string
+		for c := range n.ChildNodes() {
+			results = append(results, c.Data)
+		}
+		if got := strings.Join(results, " "); got != test.want {
+			t.Errorf("ChildNodes = %q, want %q", got, test.want)
+		}
+	}
+}
+
+func TestNode_Descendants(t *testing.T) {
+	tests := []struct {
+		in   string
+		want string
+	}{
+		{"", ""},
+		{"<a></a>", "a"},
+		{"<a><b></b></a>", "a b"},
+		{"<a>b</a>", "a b"},
+		{"<a><!--b--></a>", "a b"},
+		{"<a>b<c></c>d</a>", "a b c d"},
+		{"<a>b<c><!--d--></c>e</a>", "a b c d e"},
+		{"<a><b><c>d<!--e-->f</c></b>g<!--h--><i>j</i></a>", "a b c d e f g h i j"},
+	}
+	for _, test := range tests {
+		doc, err := Parse(strings.NewReader(test.in))
+		if err != nil {
+			t.Fatal(err)
+		}
+		// Drill to <html><head></head><body>
+		n := doc.FirstChild.FirstChild.NextSibling
+		var results []string
+		for c := range n.Descendants() {
+			results = append(results, c.Data)
+		}
+		if got := strings.Join(results, " "); got != test.want {
+			t.Errorf("Descendants = %q; want: %q", got, test.want)
+		}
+	}
+}
+
+func TestNode_Ancestors(t *testing.T) {
+	for _, size := range []int{0, 1, 2, 10, 100, 10_000} {
+		n := buildChain(size)
+		nParents := 0
+		for _ = range n.Ancestors() {
+			nParents++
+		}
+		if nParents != size {
+			t.Errorf("number of Ancestors = %d; want: %d", nParents, size)
+		}
+	}
+}
+
+func buildChain(size int) *Node {
+	child := new(Node)
+	for range size {
+		parent := child
+		child = new(Node)
+		parent.AppendChild(child)
+	}
+	return child
+}
diff --git a/html/node.go b/html/node.go
index 1350eef..77741a1 100644
--- a/html/node.go
+++ b/html/node.go
@@ -38,6 +38,10 @@
 // that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
 // is the atom for Data, or zero if Data is not a known tag name.
 //
+// Node trees may be navigated using the link fields (Parent,
+// FirstChild, and so on) or a range loop over iterators such as
+// [Node.Descendants].
+//
 // An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
 // Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
 // "svg" is short for "http://www.w3.org/2000/svg".