[x/go.dev] cmd/internal/site: switch to Goldmark

x/website uses Goldmark, so this brings go.dev in line with golang.org.

The only change to the golden output is a few automatically
generated heading IDs. It used to be that

	# Go's Concurrency

produced <h1 id="go-s-concurrency">.
Now it produces <h1 id="gos-concurrency">.

Similarly,

	# Go & You

used to produce <h1 id="go-you">.
Now it produces <h1 id="go--you">.

In theory this could lead to slightly broken links that
no longer target what they meant to target.
In practice it is doubtful that people have links to the
specific affected sections.

Change-Id: Ia0827e2f13fbd7f304983025dade694c50c49d6e
X-GoDev-Commit: 416036ecdf33ce51eeeb0d2bbd8ca98fd5fc7dc9
diff --git a/go.dev/cmd/internal/site/md.go b/go.dev/cmd/internal/site/md.go
index 5faa799..72ac255 100644
--- a/go.dev/cmd/internal/site/md.go
+++ b/go.dev/cmd/internal/site/md.go
@@ -6,37 +6,77 @@
 
 import (
 	"bytes"
+	"regexp"
 	"strings"
 
-	"github.com/russross/blackfriday"
+	"github.com/yuin/goldmark"
+	"github.com/yuin/goldmark/ast"
+	"github.com/yuin/goldmark/extension"
+	"github.com/yuin/goldmark/parser"
+	"github.com/yuin/goldmark/renderer/html"
+	"github.com/yuin/goldmark/text"
+	"github.com/yuin/goldmark/util"
 	"golang.org/x/go.dev/cmd/internal/html/template"
 	"golang.org/x/go.dev/cmd/internal/tmplfunc"
 )
 
 // markdownToHTML converts markdown to HTML using the renderer and settings that Hugo uses.
-func markdownToHTML(markdown string) template.HTML {
-	markdown = strings.TrimLeft(markdown, "\n")
-	renderer := blackfriday.HtmlRenderer(blackfriday.HTML_USE_XHTML|
-		blackfriday.HTML_USE_SMARTYPANTS|
-		blackfriday.HTML_SMARTYPANTS_FRACTIONS|
-		blackfriday.HTML_SMARTYPANTS_DASHES|
-		blackfriday.HTML_SMARTYPANTS_LATEX_DASHES|
-		blackfriday.HTML_NOREFERRER_LINKS|
-		blackfriday.HTML_HREF_TARGET_BLANK,
-		"", "")
-	options := blackfriday.Options{
-		Extensions: blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
-			blackfriday.EXTENSION_TABLES |
-			blackfriday.EXTENSION_FENCED_CODE |
-			blackfriday.EXTENSION_AUTOLINK |
-			blackfriday.EXTENSION_STRIKETHROUGH |
-			blackfriday.EXTENSION_SPACE_HEADERS |
-			blackfriday.EXTENSION_HEADER_IDS |
-			blackfriday.EXTENSION_BACKSLASH_LINE_BREAK |
-			blackfriday.EXTENSION_DEFINITION_LISTS |
-			blackfriday.EXTENSION_AUTO_HEADER_IDS,
+func markdownToHTML(markdown string) (template.HTML, error) {
+	// parser.WithHeadingAttribute allows custom ids on headings.
+	// html.WithUnsafe allows use of raw HTML, which we need for tables.
+	md := goldmark.New(
+		goldmark.WithParserOptions(
+			parser.WithHeadingAttribute(),
+			parser.WithAutoHeadingID(),
+			parser.WithASTTransformers(util.Prioritized(mdTransformFunc(mdLink), 1)),
+		),
+		goldmark.WithRendererOptions(html.WithUnsafe()),
+		goldmark.WithExtensions(
+			extension.NewTypographer(),
+			extension.NewLinkify(
+				extension.WithLinkifyAllowedProtocols([][]byte{[]byte("http"), []byte("https")}),
+				extension.WithLinkifyEmailRegexp(regexp.MustCompile(`[^\x00-\x{10FFFF}]`)), // impossible
+			),
+		),
+	)
+	var buf bytes.Buffer
+	if err := md.Convert([]byte(markdown), &buf); err != nil {
+		return "", err
 	}
-	return template.HTML(blackfriday.MarkdownOptions([]byte(markdown), renderer, options))
+	return template.HTML(buf.Bytes()), nil
+}
+
+// mdTransformFunc is a func implementing parser.ASTTransformer.
+type mdTransformFunc func(*ast.Document, text.Reader, parser.Context)
+
+func (f mdTransformFunc) Transform(node *ast.Document, reader text.Reader, pc parser.Context) {
+	f(node, reader, pc)
+}
+
+// mdLink walks doc, adding rel=noreferrer target=_blank to non-relative links.
+func mdLink(doc *ast.Document, _ text.Reader, _ parser.Context) {
+	mdLinkWalk(doc)
+}
+
+func mdLinkWalk(n ast.Node) {
+	switch n := n.(type) {
+	case *ast.Link:
+		dest := string(n.Destination)
+		if strings.HasPrefix(dest, "https://") || strings.HasPrefix(dest, "http://") {
+			n.SetAttributeString("rel", []byte("noreferrer"))
+			n.SetAttributeString("target", []byte("_blank"))
+		}
+		return
+	case *ast.AutoLink:
+		// All autolinks are non-relative.
+		n.SetAttributeString("rel", []byte("noreferrer"))
+		n.SetAttributeString("target", []byte("_blank"))
+		return
+	}
+
+	for child := n.FirstChild(); child != nil; child = child.NextSibling() {
+		mdLinkWalk(child)
+	}
 }
 
 // markdownTemplateToHTML converts a markdown template to HTML,
@@ -52,5 +92,5 @@
 	if err := t.Execute(&buf, p.params); err != nil {
 		return "", err
 	}
-	return markdownToHTML(buf.String()), nil
+	return markdownToHTML(buf.String())
 }
diff --git a/go.dev/cmd/internal/site/site_test.go b/go.dev/cmd/internal/site/site_test.go
index 566793d..f19b95c 100644
--- a/go.dev/cmd/internal/site/site_test.go
+++ b/go.dev/cmd/internal/site/site_test.go
@@ -10,6 +10,7 @@
 	"os"
 	"path"
 	"path/filepath"
+	"regexp"
 	"testing"
 	"time"
 
@@ -102,6 +103,11 @@
 // to match Hugo's whitespace heuristics exactly or where we are
 // refactoring templates a little which changes spacing in inconsequential ways.
 func canonicalize(data []byte) []byte {
+	data = bytes.ReplaceAll(data, []byte("<li>"), []byte("<li>\n"))
+	data = bytes.ReplaceAll(data, []byte("</p>"), []byte("</p>\n"))
+	data = bytes.ReplaceAll(data, []byte("</ul>"), []byte("</ul>\n"))
+	data = regexp.MustCompile(`(<(img|hr)([^<>]*[^ <>])?) */>`).ReplaceAll(data, []byte("$1>")) // <img/> to <img>
+
 	lines := bytes.Split(data, []byte("\n"))
 	for i, line := range lines {
 		lines[i] = bytes.Trim(line, " \t")
diff --git a/go.dev/cmd/internal/site/tmpl.go b/go.dev/cmd/internal/site/tmpl.go
index 1b33c12..e245fd0 100644
--- a/go.dev/cmd/internal/site/tmpl.go
+++ b/go.dev/cmd/internal/site/tmpl.go
@@ -89,13 +89,16 @@
 }
 
 // markdown is the function provided to templates.
-func markdown(data interface{}) template.HTML {
-	h := markdownToHTML(toString(data))
+func markdown(data interface{}) (template.HTML, error) {
+	h, err := markdownToHTML(toString(data))
+	if err != nil {
+		return "", err
+	}
 	s := strings.TrimSpace(string(h))
 	if strings.HasPrefix(s, "<p>") && strings.HasSuffix(s, "</p>") && strings.Count(s, "<p>") == 1 {
 		h = template.HTML(strings.TrimSpace(s[len("<p>") : len(s)-len("</p>")]))
 	}
-	return h
+	return h, nil
 }
 
 func replace(input, x, y interface{}) string {
diff --git a/go.dev/go.mod b/go.dev/go.mod
index 4ffd817..91e27b1 100644
--- a/go.dev/go.mod
+++ b/go.dev/go.mod
@@ -5,7 +5,7 @@
 require (
 	github.com/google/go-cmp v0.3.1
 	github.com/microcosm-cc/bluemonday v1.0.2
-	github.com/russross/blackfriday v1.6.0
+	github.com/yuin/goldmark v1.2.1
 	google.golang.org/api v0.13.0
 	gopkg.in/yaml.v2 v2.2.2
 	gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
diff --git a/go.dev/go.sum b/go.dev/go.sum
index 5b62548..e2f0231 100644
--- a/go.dev/go.sum
+++ b/go.dev/go.sum
@@ -27,8 +27,7 @@
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
 github.com/microcosm-cc/bluemonday v1.0.2 h1:5lPfLTTAvAbtS0VqT+94yOtFnGfUWYyx0+iToC3Os3s=
 github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc=
-github.com/russross/blackfriday v1.6.0 h1:KqfZb0pUVN2lYqZUYRddxF4OR8ZMURnJIG5Y3VRLtww=
-github.com/russross/blackfriday v1.6.0/go.mod h1:ti0ldHuxg49ri4ksnFxlkCfN+hvslNlmVHqNRXXJNAY=
+github.com/yuin/goldmark v1.2.1 h1:ruQGxdhGHe7FWOJPT0mKs5+pD2Xs1Bm/kdGlHO04FmM=
 github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
 go.opencensus.io v0.21.0 h1:mU6zScU4U1YAFPHEHYk+3JC4SY7JxgkqS10ZOSyksNg=
 go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
diff --git a/go.dev/testdata/golden/solutions/devops/index.html b/go.dev/testdata/golden/solutions/devops/index.html
index 54d57dd..f0d19dc 100644
--- a/go.dev/testdata/golden/solutions/devops/index.html
+++ b/go.dev/testdata/golden/solutions/devops/index.html
@@ -216,16 +216,16 @@
 
 <h2 id="key-benefits" class="sectionHeading">Key Benefits</h2>
 
-<h3 id="easily-build-small-scripts-with-go-s-robust-standard-library-and-static-typing">Easily build small scripts with Go’s robust standard library and static typing</h3>
+<h3 id="easily-build-small-scripts-with-gos-robust-standard-library-and-static-typing">Easily build small scripts with Go’s robust standard library and static typing</h3>
 
 <p>Go’s fast build and startup times. Go’s extensive standard library—including packages for
 common needs like HTTP, file I/O, time, regular expressions, exec, and JSON/CSV formats—lets DevOps/SREs get right into their business logic. Plus, Go’s static type system and explicit error handling make even small scripts more robust.</p>
 
-<h3 id="quickly-deploy-clis-with-go-s-fast-build-times">Quickly deploy CLIs with Go’s fast build times</h3>
+<h3 id="quickly-deploy-clis-with-gos-fast-build-times">Quickly deploy CLIs with Go’s fast build times</h3>
 
 <p>Every site reliability engineer has written “one-time use” scripts that turned into CLIs used by dozens of other engineers every day. And small deployment automation scripts turn into rollout management services. With Go, DevOps/SREs are in a great position to be successful when software scope inevitably creeps. Starting with Go puts you in a great position to be successful when that happens.</p>
 
-<h3 id="scale-and-maintain-larger-applications-with-go-s-low-memory-footprint-and-doc-generator">Scale and maintain larger applications with Go’s low memory footprint and doc generator</h3>
+<h3 id="scale-and-maintain-larger-applications-with-gos-low-memory-footprint-and-doc-generator">Scale and maintain larger applications with Go’s low memory footprint and doc generator</h3>
 
 <p>Go’s garbage collector means DevOps/SRE teams don’t have to worry about memory management. And Go’s automatic documentation generator (godoc) makes code self-documenting–lowering maintenance overhead and establishing best practices from the get-go.</p>
 
@@ -462,7 +462,7 @@
 
 <h2 id="get-started" class="sectionHeading">Get Started</h2>
 
-<h3 id="go-books-on-devops-sre">Go books on DevOps &amp; SRE</h3>
+<h3 id="go-books-on-devops--sre">Go books on DevOps &amp; SRE</h3>
 
 <ul class="Learn-tileList">
 
diff --git a/go.dev/testdata/golden/solutions/google/index.html b/go.dev/testdata/golden/solutions/google/index.html
index 1a2856b..fa6f8e2 100644
--- a/go.dev/testdata/golden/solutions/google/index.html
+++ b/go.dev/testdata/golden/solutions/google/index.html
@@ -207,7 +207,7 @@
 
 <p>The following stories are a small sample of the many ways that Go is used at Google.</p>
 
-<h3 id="how-google-s-core-data-solutions-team-uses-go">How Google&rsquo;s Core Data Solutions Team Uses Go</h3>
+<h3 id="how-googles-core-data-solutions-team-uses-go">How Google&rsquo;s Core Data Solutions Team Uses Go</h3>
 
 <p>Google&rsquo;s mission is “to organize the world&rsquo;s information and make it universally
 accessible and useful.”  One of the teams responsible for organizing that
@@ -236,7 +236,7 @@
 
 <hr />
 
-<h3 id="actuating-google-production-how-google-s-site-reliability-engineering-team-uses-go">Actuating Google Production: How Google’s Site Reliability Engineering Team Uses Go</h3>
+<h3 id="actuating-google-production-how-googles-site-reliability-engineering-team-uses-go">Actuating Google Production: How Google’s Site Reliability Engineering Team Uses Go</h3>
 
 <p>Google runs a small number of very large services. Those services are powered by a global infrastructure covering everything one needs: storage systems, load balancers, network, logging, monitoring, and many more. Nevertheless, it is not a static system - it cannot be. Architecture evolves, new products and ideas are created, new versions must be rolled out, configs pushed, database schema updated, and more. We end up deploying changes to our systems dozens of times per second.</p>
 
diff --git a/go.dev/testdata/golden/solutions/paypal/index.html b/go.dev/testdata/golden/solutions/paypal/index.html
index d6f1277..8794e43 100644
--- a/go.dev/testdata/golden/solutions/paypal/index.html
+++ b/go.dev/testdata/golden/solutions/paypal/index.html
@@ -253,7 +253,7 @@
 
 <p>Most importantly, PayPal developers have increased their productivity with Go. Go’s concurrency mechanisms have made it easy to write programs that get the most out of PayPal’s multicore and networked machines. Developers using Go also benefit from the fact that it compiles quickly to machine code and their apps gain the convenience of garbage collection and the power of run-time reflection.</p>
 
-<h2 id="speeding-paypal-s-time-to-market">Speeding PayPal’s time to market</h2>
+<h2 id="speeding-paypals-time-to-market">Speeding PayPal’s time to market</h2>
 
 <p>The first-class languages at PayPal today are Java and Node, with Go primarily used as an infrastructure language. While Go may never replace Node.js for certain applications, Natarajan is pushing to make Go a first-class language at PayPal.</p>
 
diff --git a/go.dev/testdata/golden/solutions/webdev/index.html b/go.dev/testdata/golden/solutions/webdev/index.html
index 20de566..252de81 100644
--- a/go.dev/testdata/golden/solutions/webdev/index.html
+++ b/go.dev/testdata/golden/solutions/webdev/index.html
@@ -206,7 +206,7 @@
 
 <p>For enterprises, Go is preferred for providing rapid cross-platform deployment. With its goroutines, native compilation, and the URI-based package namespacing, Go code compiles to a single, small binary—with zero dependencies—making it very fast.</p>
 
-<h3 id="leverage-go-s-out-of-the-box-performance-to-scale-with-ease">Leverage Go’s out-of-the-box performance to scale with ease</h3>
+<h3 id="leverage-gos-out-of-the-box-performance-to-scale-with-ease">Leverage Go’s out-of-the-box performance to scale with ease</h3>
 
 <p>Tigran Bayburtsyan, Co-Founder and CTO at Hexact Inc., summarizes five key reasons his company switched to Go:</p>