internal/frontend: limit heading ids to ASCII characters

It seems like bluemonday intended to only allow ascii IDs. Limit the
heading ids produced to ASCII characters to keep with what we think
the intended behavior is.

Change-Id: Ifa9aaad5fcc5308d9efcaa75fafb65547839fde2
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/544356
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
kokoro-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/internal/frontend/goldmark.go b/internal/frontend/goldmark.go
index ab75979..4ba5666 100644
--- a/internal/frontend/goldmark.go
+++ b/internal/frontend/goldmark.go
@@ -12,7 +12,6 @@
 	"fmt"
 	"regexp"
 	"strings"
-	"unicode"
 
 	"github.com/yuin/goldmark/ast"
 	"github.com/yuin/goldmark/parser"
@@ -180,7 +179,7 @@
 }
 
 // Generate turns heading content from a markdown document into a heading id.
-// First HTML markup and markdown images are stripped then unicode letters
+// First HTML markup and markdown images are stripped then ASCII letters
 // and numbers are used to generate the final result. Finally, all heading ids
 // are prefixed with "readme-" to avoid name collisions with other ids on the
 // unit page. Duplicated heading ids are given an incremental suffix. See
@@ -190,7 +189,7 @@
 	r := regexp.MustCompile(`(<[^<>]+>|\[\!\[[^\]]+]\([^\)]+\)\]\([^\)]+\))`)
 	str := r.ReplaceAllString(string(value), "")
 	f := func(c rune) bool {
-		return !unicode.IsLetter(c) && !unicode.IsNumber(c)
+		return !('a' <= c && c <= 'z') && !('A' <= c && c <= 'Z') && !('0' <= c && c <= '9')
 	}
 	str = strings.Join(strings.FieldsFunc(str, f), "-")
 	str = strings.ToLower(str)
diff --git a/internal/frontend/readme_test.go b/internal/frontend/readme_test.go
index 9878452..be41a09 100644
--- a/internal/frontend/readme_test.go
+++ b/internal/frontend/readme_test.go
@@ -145,6 +145,16 @@
 			},
 		},
 		{
+			name: "Non-ASCII Heading",
+			unit: unit,
+			readme: &internal.Readme{
+				Filepath: sample.ReadmeFilePath,
+				Contents: "# 中文¹",
+			},
+			wantHTML:    "<h3 class=\"h1\" id=\"readme-heading\">中文¹</h3>",
+			wantOutline: []*Heading{{Level: 1, Text: "中文¹", ID: "readme-heading"}},
+		},
+		{
 			name: "Github markdown emoji markup is properly rendered",
 			unit: unit,
 			readme: &internal.Readme{