godoc: fix quadratic and ASCII-only struct field linkification

Fixes two problems with adding the #StructType.FieldName anchors for
linkified struct fields:

* the old code was quadratic

* the old code only dealt with ASCII only

Change-Id: If03a367a94d05d3d470e1326dfb573037088ff78
Reviewed-on: https://go-review.googlesource.com/35486
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Robert Griesemer <gri@golang.org>
diff --git a/godoc/godoc.go b/godoc/godoc.go
index 8bda89a..1063244 100644
--- a/godoc/godoc.go
+++ b/godoc/godoc.go
@@ -233,24 +233,28 @@
 	if st.Fields == nil {
 		return
 	}
-	var scratch bytes.Buffer
+	// needsLink is a set of identifiers that still need to be
+	// linked, where value == key, to avoid an allocation in func
+	// linkedField.
+	needsLink := make(map[string]string)
+
 	for _, f := range st.Fields.List {
 		if len(f.Names) == 0 {
 			continue
 		}
 		fieldName := f.Names[0].Name
-		scratch.Reset()
-		var added bool
-		foreachLine(buf.Bytes(), func(line []byte) {
-			if !added && isLineForStructFieldID(line, fieldName) {
-				added = true
-				fmt.Fprintf(&scratch, `<span id="%s.%s"></span>`, name, fieldName)
-			}
-			scratch.Write(line)
-		})
-		buf.Reset()
-		buf.Write(scratch.Bytes())
+		needsLink[fieldName] = fieldName
 	}
+	var newBuf bytes.Buffer
+	foreachLine(buf.Bytes(), func(line []byte) {
+		if fieldName := linkedField(line, needsLink); fieldName != "" {
+			fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
+			delete(needsLink, fieldName)
+		}
+		newBuf.Write(line)
+	})
+	buf.Reset()
+	buf.Write(newBuf.Bytes())
 }
 
 // foreachLine calls fn for each line of in, where a line includes
@@ -270,9 +274,12 @@
 // commentPrefix is the line prefix for comments after they've been HTMLified.
 var commentPrefix = []byte(`<span class="comment">// `)
 
-// isLineForStructFieldID reports whether line is a line we should
-// add a <span id="#StructName.FieldName"> to. Only the fieldName is provided.
-func isLineForStructFieldID(line []byte, fieldName string) bool {
+// linkedField determines whether the given line starts with an
+// identifer in the provided ids map (mapping from identifier to the
+// same identifier). The line can start with either an identifier or
+// an identifier in a comment. If one matches, it returns the
+// identifier that matched. Otherwise it returns the empty string.
+func linkedField(line []byte, ids map[string]string) string {
 	line = bytes.TrimSpace(line)
 
 	// For fields with a doc string of the
@@ -292,13 +299,39 @@
 	//
 	// TODO: do this better, so it works for all
 	// comments, including unconventional ones.
-	// For comments
 	if bytes.HasPrefix(line, commentPrefix) {
-		if matchesIdentBoundary(line[len(commentPrefix):], fieldName) {
-			return true
-		}
+		line = line[len(commentPrefix):]
 	}
-	return matchesIdentBoundary(line, fieldName)
+	id := scanIdentifier(line)
+	if len(id) == 0 {
+		// No leading identifier. Avoid map lookup for
+		// somewhat common case.
+		return ""
+	}
+	return ids[string(id)]
+}
+
+// scanIdentifier scans a valid Go identifier off the front of v and
+// either returns a subslice of v if there's a valid identifier, or
+// returns a zero-length slice.
+func scanIdentifier(v []byte) []byte {
+	var n int // number of leading bytes of v belonging to an identifier
+	for {
+		r, width := utf8.DecodeRune(v[n:])
+		if !(isLetter(r) || n > 0 && isDigit(r)) {
+			break
+		}
+		n += width
+	}
+	return v[:n]
+}
+
+func isLetter(ch rune) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
+}
+
+func isDigit(ch rune) bool {
+	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
 }
 
 // matchesIdentBoundary reports whether line matches /^ident\b/.