internal/godoc: split text formatting into internal/texthtml Change-Id: Iaef43e66c32dbeb7c3fc3a4a769112d153c57ce1 Reviewed-on: https://go-review.googlesource.com/c/website/+/295409 Trust: Russ Cox <rsc@golang.org> Run-TryBot: Russ Cox <rsc@golang.org> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>

commit: 4eb9b856a32a6593f95a0083eff258e19ca35647 [log] [tgz]
author: Russ Cox <rsc@golang.org> Wed Feb 17 22:50:57 2021 -0500
committer: Russ Cox <rsc@golang.org> Fri Mar 12 19:58:00 2021 +0000
tree: e74b377024c36c46ff96e7a458c42ac196daa3c9
parent: 2d643c47cec3fefc956fac35b780e9e6ddaf9c49 [diff]
diff --git a/internal/godoc/format.go b/internal/godoc/format.go
deleted file mode 100644
index e005dcb..0000000
--- a/internal/godoc/format.go
+++ /dev/null

@@ -1,374 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.16
-// +build go1.16
-
-// This file implements FormatSelections and FormatText.
-// FormatText is used to HTML-format Go and non-Go source
-// text with line numbers and highlighted sections. It is
-// built on top of FormatSelections, a generic formatter
-// for "selected" text.
-
-package godoc
-
-import (
-	"fmt"
-	"go/scanner"
-	"go/token"
-	"io"
-	"regexp"
-	"strconv"
-	"text/template"
-)
-
-// ----------------------------------------------------------------------------
-// Implementation of FormatSelections
-
-// A Segment describes a text segment [start, end).
-// The zero value of a Segment is a ready-to-use empty segment.
-//
-type Segment struct {
-	start, end int
-}
-
-func (seg *Segment) isEmpty() bool { return seg.start >= seg.end }
-
-// A Selection is an "iterator" function returning a text segment.
-// Repeated calls to a selection return consecutive, non-overlapping,
-// non-empty segments, followed by an infinite sequence of empty
-// segments. The first empty segment marks the end of the selection.
-//
-type Selection func() Segment
-
-// A LinkWriter writes some start or end "tag" to w for the text offset offs.
-// It is called by FormatSelections at the start or end of each link segment.
-//
-type LinkWriter func(w io.Writer, offs int, start bool)
-
-// A SegmentWriter formats a text according to selections and writes it to w.
-// The selections parameter is a bit set indicating which selections provided
-// to FormatSelections overlap with the text segment: If the n'th bit is set
-// in selections, the n'th selection provided to FormatSelections is overlapping
-// with the text.
-//
-type SegmentWriter func(w io.Writer, text []byte, selections int)
-
-// FormatSelections takes a text and writes it to w using link and segment
-// writers lw and sw as follows: lw is invoked for consecutive segment starts
-// and ends as specified through the links selection, and sw is invoked for
-// consecutive segments of text overlapped by the same selections as specified
-// by selections. The link writer lw may be nil, in which case the links
-// Selection is ignored.
-//
-func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
-	// If we have a link writer, make the links
-	// selection the last entry in selections
-	if lw != nil {
-		selections = append(selections, links)
-	}
-
-	// compute the sequence of consecutive segment changes
-	changes := newMerger(selections)
-
-	// The i'th bit in bitset indicates that the text
-	// at the current offset is covered by selections[i].
-	bitset := 0
-	lastOffs := 0
-
-	// Text segments are written in a delayed fashion
-	// such that consecutive segments belonging to the
-	// same selection can be combined (peephole optimization).
-	// last describes the last segment which has not yet been written.
-	var last struct {
-		begin, end int // valid if begin < end
-		bitset     int
-	}
-
-	// flush writes the last delayed text segment
-	flush := func() {
-		if last.begin < last.end {
-			sw(w, text[last.begin:last.end], last.bitset)
-		}
-		last.begin = last.end // invalidate last
-	}
-
-	// segment runs the segment [lastOffs, end) with the selection
-	// indicated by bitset through the segment peephole optimizer.
-	segment := func(end int) {
-		if lastOffs < end { // ignore empty segments
-			if last.end != lastOffs || last.bitset != bitset {
-				// the last segment is not adjacent to or
-				// differs from the new one
-				flush()
-				// start a new segment
-				last.begin = lastOffs
-			}
-			last.end = end
-			last.bitset = bitset
-		}
-	}
-
-	for {
-		// get the next segment change
-		index, offs, start := changes.next()
-		if index < 0 || offs > len(text) {
-			// no more segment changes or the next change
-			// is past the end of the text - we're done
-			break
-		}
-		// determine the kind of segment change
-		if lw != nil && index == len(selections)-1 {
-			// we have a link segment change (see start of this function):
-			// format the previous selection segment, write the
-			// link tag and start a new selection segment
-			segment(offs)
-			flush()
-			lastOffs = offs
-			lw(w, offs, start)
-		} else {
-			// we have a selection change:
-			// format the previous selection segment, determine
-			// the new selection bitset and start a new segment
-			segment(offs)
-			lastOffs = offs
-			mask := 1 << uint(index)
-			if start {
-				bitset |= mask
-			} else {
-				bitset &^= mask
-			}
-		}
-	}
-	segment(len(text))
-	flush()
-}
-
-// A merger merges a slice of Selections and produces a sequence of
-// consecutive segment change events through repeated next() calls.
-//
-type merger struct {
-	selections []Selection
-	segments   []Segment // segments[i] is the next segment of selections[i]
-}
-
-const infinity int = 2e9
-
-func newMerger(selections []Selection) *merger {
-	segments := make([]Segment, len(selections))
-	for i, sel := range selections {
-		segments[i] = Segment{infinity, infinity}
-		if sel != nil {
-			if seg := sel(); !seg.isEmpty() {
-				segments[i] = seg
-			}
-		}
-	}
-	return &merger{selections, segments}
-}
-
-// next returns the next segment change: index specifies the Selection
-// to which the segment belongs, offs is the segment start or end offset
-// as determined by the start value. If there are no more segment changes,
-// next returns an index value < 0.
-//
-func (m *merger) next() (index, offs int, start bool) {
-	// find the next smallest offset where a segment starts or ends
-	offs = infinity
-	index = -1
-	for i, seg := range m.segments {
-		switch {
-		case seg.start < offs:
-			offs = seg.start
-			index = i
-			start = true
-		case seg.end < offs:
-			offs = seg.end
-			index = i
-			start = false
-		}
-	}
-	if index < 0 {
-		// no offset found => all selections merged
-		return
-	}
-	// offset found - it's either the start or end offset but
-	// either way it is ok to consume the start offset: set it
-	// to infinity so it won't be considered in the following
-	// next call
-	m.segments[index].start = infinity
-	if start {
-		return
-	}
-	// end offset found - consume it
-	m.segments[index].end = infinity
-	// advance to the next segment for that selection
-	seg := m.selections[index]()
-	if !seg.isEmpty() {
-		m.segments[index] = seg
-	}
-	return
-}
-
-// ----------------------------------------------------------------------------
-// Implementation of FormatText
-
-// lineSelection returns the line segments for text as a Selection.
-func lineSelection(text []byte) Selection {
-	i, j := 0, 0
-	return func() (seg Segment) {
-		// find next newline, if any
-		for j < len(text) {
-			j++
-			if text[j-1] == '\n' {
-				break
-			}
-		}
-		if i < j {
-			// text[i:j] constitutes a line
-			seg = Segment{i, j}
-			i = j
-		}
-		return
-	}
-}
-
-// tokenSelection returns, as a selection, the sequence of
-// consecutive occurrences of token sel in the Go src text.
-//
-func tokenSelection(src []byte, sel token.Token) Selection {
-	var s scanner.Scanner
-	fset := token.NewFileSet()
-	file := fset.AddFile("", fset.Base(), len(src))
-	s.Init(file, src, nil, scanner.ScanComments)
-	return func() (seg Segment) {
-		for {
-			pos, tok, lit := s.Scan()
-			if tok == token.EOF {
-				break
-			}
-			offs := file.Offset(pos)
-			if tok == sel {
-				seg = Segment{offs, offs + len(lit)}
-				break
-			}
-		}
-		return
-	}
-}
-
-// makeSelection is a helper function to make a Selection from a slice of pairs.
-// Pairs describing empty segments are ignored.
-//
-func makeSelection(matches [][]int) Selection {
-	i := 0
-	return func() Segment {
-		for i < len(matches) {
-			m := matches[i]
-			i++
-			if m[0] < m[1] {
-				// non-empty segment
-				return Segment{m[0], m[1]}
-			}
-		}
-		return Segment{}
-	}
-}
-
-// regexpSelection computes the Selection for the regular expression expr in text.
-func regexpSelection(text []byte, expr string) Selection {
-	var matches [][]int
-	if rx, err := regexp.Compile(expr); err == nil {
-		matches = rx.FindAllIndex(text, -1)
-	}
-	return makeSelection(matches)
-}
-
-var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
-
-// RangeSelection computes the Selection for a text range described
-// by the argument str; the range description must match the selRx
-// regular expression.
-func RangeSelection(str string) Selection {
-	m := selRx.FindStringSubmatch(str)
-	if len(m) >= 2 {
-		from, _ := strconv.Atoi(m[1])
-		to, _ := strconv.Atoi(m[2])
-		if from < to {
-			return makeSelection([][]int{{from, to}})
-		}
-	}
-	return nil
-}
-
-// Span tags for all the possible selection combinations that may
-// be generated by FormatText. Selections are indicated by a bitset,
-// and the value of the bitset specifies the tag to be used.
-//
-// bit 0: comments
-// bit 1: highlights
-// bit 2: selections
-//
-var startTags = [][]byte{
-	/* 000 */ []byte(``),
-	/* 001 */ []byte(`<span class="comment">`),
-	/* 010 */ []byte(`<span class="highlight">`),
-	/* 011 */ []byte(`<span class="highlight-comment">`),
-	/* 100 */ []byte(`<span class="selection">`),
-	/* 101 */ []byte(`<span class="selection-comment">`),
-	/* 110 */ []byte(`<span class="selection-highlight">`),
-	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
-}
-
-var endTag = []byte(`</span>`)
-
-func selectionTag(w io.Writer, text []byte, selections int) {
-	if selections < len(startTags) {
-		if tag := startTags[selections]; len(tag) > 0 {
-			w.Write(tag)
-			template.HTMLEscape(w, text)
-			w.Write(endTag)
-			return
-		}
-	}
-	template.HTMLEscape(w, text)
-}
-
-// FormatText HTML-escapes text and writes it to w.
-// Consecutive text segments are wrapped in HTML spans (with tags as
-// defined by startTags and endTag) as follows:
-//
-//	- if line >= 0, line number (ln) spans are inserted before each line,
-//	  starting with the value of line
-//	- if the text is Go source, comments get the "comment" span class
-//	- each occurrence of the regular expression pattern gets the "highlight"
-//	  span class
-//	- text segments covered by selection get the "selection" span class
-//
-// Comments, highlights, and selections may overlap arbitrarily; the respective
-// HTML span classes are specified in the startTags variable.
-//
-func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
-	var comments, highlights Selection
-	if goSource {
-		comments = tokenSelection(text, token.COMMENT)
-	}
-	if pattern != "" {
-		highlights = regexpSelection(text, pattern)
-	}
-	if line >= 0 || comments != nil || highlights != nil || selection != nil {
-		var lineTag LinkWriter
-		if line >= 0 {
-			lineTag = func(w io.Writer, _ int, start bool) {
-				if start {
-					fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line)
-					line++
-				}
-			}
-		}
-		FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
-	} else {
-		template.HTMLEscape(w, text)
-	}
-}

diff --git a/internal/godoc/godoc.go b/internal/godoc/godoc.go
index c9b1836..25b3063 100644
--- a/internal/godoc/godoc.go
+++ b/internal/godoc/godoc.go

@@ -31,6 +31,8 @@
 	"time"
 	"unicode"
 	"unicode/utf8"
+
+	"golang.org/x/website/internal/texthtml"
 )
 
 // Fake relative package path for built-ins. Documentation for all globals
@@ -127,150 +129,17 @@
 	p.writeNode(&buf1, info, info.FSet, node)
 
 	var buf2 bytes.Buffer
-	if n, _ := node.(ast.Node); n != nil && linkify && p.DeclLinks {
-		LinkifyText(&buf2, buf1.Bytes(), n)
-		if st, name := isStructTypeDecl(n); st != nil {
-			addStructFieldIDAttributes(&buf2, name, st)
-		}
-	} else {
-		FormatText(&buf2, buf1.Bytes(), -1, true, "", nil)
+	var n ast.Node
+	if linkify && p.DeclLinks {
+		n, _ = node.(ast.Node)
 	}
-
+	buf2.Write(texthtml.Format(buf1.Bytes(), texthtml.Config{
+		AST:        n,
+		GoComments: true,
+	}))
 	return buf2.String()
 }
 
-// isStructTypeDecl checks whether n is a struct declaration.
-// It either returns a non-nil StructType and its name, or zero values.
-func isStructTypeDecl(n ast.Node) (st *ast.StructType, name string) {
-	gd, ok := n.(*ast.GenDecl)
-	if !ok || gd.Tok != token.TYPE {
-		return nil, ""
-	}
-	if gd.Lparen > 0 {
-		// Parenthesized type. Who does that, anyway?
-		// TODO: Reportedly gri does. Fix this to handle that too.
-		return nil, ""
-	}
-	if len(gd.Specs) != 1 {
-		return nil, ""
-	}
-	ts, ok := gd.Specs[0].(*ast.TypeSpec)
-	if !ok {
-		return nil, ""
-	}
-	st, ok = ts.Type.(*ast.StructType)
-	if !ok {
-		return nil, ""
-	}
-	return st, ts.Name.Name
-}
-
-// addStructFieldIDAttributes modifies the contents of buf such that
-// all struct fields of the named struct have <span id='name.Field'>
-// in them, so people can link to /#Struct.Field.
-func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructType) {
-	if st.Fields == nil {
-		return
-	}
-	// needsLink is a set of identifiers that still need to be
-	// linked, where value == key, to avoid an allocation in func
-	// linkedField.
-	needsLink := make(map[string]string)
-
-	for _, f := range st.Fields.List {
-		if len(f.Names) == 0 {
-			continue
-		}
-		fieldName := f.Names[0].Name
-		needsLink[fieldName] = fieldName
-	}
-	var newBuf bytes.Buffer
-	foreachLine(buf.Bytes(), func(line []byte) {
-		if fieldName := linkedField(line, needsLink); fieldName != "" {
-			fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
-			delete(needsLink, fieldName)
-		}
-		newBuf.Write(line)
-	})
-	buf.Reset()
-	buf.Write(newBuf.Bytes())
-}
-
-// foreachLine calls fn for each line of in, where a line includes
-// the trailing "\n", except on the last line, if it doesn't exist.
-func foreachLine(in []byte, fn func(line []byte)) {
-	for len(in) > 0 {
-		nl := bytes.IndexByte(in, '\n')
-		if nl == -1 {
-			fn(in)
-			return
-		}
-		fn(in[:nl+1])
-		in = in[nl+1:]
-	}
-}
-
-// commentPrefix is the line prefix for comments after they've been HTMLified.
-var commentPrefix = []byte(`<span class="comment">// `)
-
-// linkedField determines whether the given line starts with an
-// identifier in the provided ids map (mapping from identifier to the
-// same identifier). The line can start with either an identifier or
-// an identifier in a comment. If one matches, it returns the
-// identifier that matched. Otherwise it returns the empty string.
-func linkedField(line []byte, ids map[string]string) string {
-	line = bytes.TrimSpace(line)
-
-	// For fields with a doc string of the
-	// conventional form, we put the new span into
-	// the comment instead of the field.
-	// The "conventional" form is a complete sentence
-	// per https://golang.org/s/style#comment-sentences like:
-	//
-	//    // Foo is an optional Fooer to foo the foos.
-	//    Foo Fooer
-	//
-	// In this case, we want the #StructName.Foo
-	// link to make the browser go to the comment
-	// line "Foo is an optional Fooer" instead of
-	// the "Foo Fooer" line, which could otherwise
-	// obscure the docs above the browser's "fold".
-	//
-	// TODO: do this better, so it works for all
-	// comments, including unconventional ones.
-	line = bytes.TrimPrefix(line, commentPrefix)
-	id := scanIdentifier(line)
-	if len(id) == 0 {
-		// No leading identifier. Avoid map lookup for
-		// somewhat common case.
-		return ""
-	}
-	return ids[string(id)]
-}
-
-// scanIdentifier scans a valid Go identifier off the front of v and
-// either returns a subslice of v if there's a valid identifier, or
-// returns a zero-length slice.
-func scanIdentifier(v []byte) []byte {
-	var n int // number of leading bytes of v belonging to an identifier
-	for {
-		r, width := utf8.DecodeRune(v[n:])
-		if !(isLetter(r) || n > 0 && isDigit(r)) {
-			break
-		}
-		n += width
-	}
-	return v[:n]
-}
-
-func isLetter(ch rune) bool {
-	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
-}
-
-func isDigit(ch rune) bool {
-	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
-}
-
 func comment_htmlFunc(comment string) string {
 	var buf bytes.Buffer
 	// TODO(gri) Provide list of words (e.g. function parameters)

diff --git a/internal/godoc/godoc_test.go b/internal/godoc/godoc_test.go
index fd65c7e..69914cb 100644
--- a/internal/godoc/godoc_test.go
+++ b/internal/godoc/godoc_test.go

@@ -250,25 +250,6 @@
 	return buf.String()
 }
 
-func TestScanIdentifier(t *testing.T) {
-	tests := []struct {
-		in, want string
-	}{
-		{"foo bar", "foo"},
-		{"foo/bar", "foo"},
-		{" foo", ""},
-		{"фоо", "фоо"},
-		{"f123", "f123"},
-		{"123f", ""},
-	}
-	for _, tt := range tests {
-		got := scanIdentifier([]byte(tt.in))
-		if string(got) != tt.want {
-			t.Errorf("scanIdentifier(%q) = %q; want %q", tt.in, got, tt.want)
-		}
-	}
-}
-
 func TestReplaceLeadingIndentation(t *testing.T) {
 	oldIndent := strings.Repeat(" ", 2)
 	newIndent := strings.Repeat(" ", 4)

diff --git a/internal/godoc/linkify.go b/internal/godoc/linkify.go
deleted file mode 100644
index 3f44ee6..0000000
--- a/internal/godoc/linkify.go
+++ /dev/null

@@ -1,198 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.16
-// +build go1.16
-
-// This file implements LinkifyText which introduces
-// links for identifiers pointing to their declarations.
-// The approach does not cover all cases because godoc
-// doesn't have complete type information, but it's
-// reasonably good for browsing.
-
-package godoc
-
-import (
-	"fmt"
-	"go/ast"
-	"go/doc"
-	"go/token"
-	"io"
-	"strconv"
-)
-
-// LinkifyText HTML-escapes source text and writes it to w.
-// Identifiers that are in a "use" position (i.e., that are
-// not being declared), are wrapped with HTML links pointing
-// to the respective declaration, if possible. Comments are
-// formatted the same way as with FormatText.
-//
-func LinkifyText(w io.Writer, text []byte, n ast.Node) {
-	links := linksFor(n)
-
-	i := 0     // links index
-	prev := "" // prev HTML tag
-	linkWriter := func(w io.Writer, _ int, start bool) {
-		// end tag
-		if !start {
-			if prev != "" {
-				fmt.Fprintf(w, `</%s>`, prev)
-				prev = ""
-			}
-			return
-		}
-
-		// start tag
-		prev = ""
-		if i < len(links) {
-			switch info := links[i]; {
-			case info.path != "" && info.name == "":
-				// package path
-				fmt.Fprintf(w, `<a href="/pkg/%s/">`, info.path)
-				prev = "a"
-			case info.path != "" && info.name != "":
-				// qualified identifier
-				fmt.Fprintf(w, `<a href="/pkg/%s/#%s">`, info.path, info.name)
-				prev = "a"
-			case info.path == "" && info.name != "":
-				// local identifier
-				if info.isVal {
-					fmt.Fprintf(w, `<span id="%s">`, info.name)
-					prev = "span"
-				} else if ast.IsExported(info.name) {
-					fmt.Fprintf(w, `<a href="#%s">`, info.name)
-					prev = "a"
-				}
-			}
-			i++
-		}
-	}
-
-	idents := tokenSelection(text, token.IDENT)
-	comments := tokenSelection(text, token.COMMENT)
-	FormatSelections(w, text, linkWriter, idents, selectionTag, comments)
-}
-
-// A link describes the (HTML) link information for an identifier.
-// The zero value of a link represents "no link".
-//
-type link struct {
-	path, name string // package path, identifier name
-	isVal      bool   // identifier is defined in a const or var declaration
-}
-
-// linksFor returns the list of links for the identifiers used
-// by node in the same order as they appear in the source.
-//
-func linksFor(node ast.Node) (links []link) {
-	// linkMap tracks link information for each ast.Ident node. Entries may
-	// be created out of source order (for example, when we visit a parent
-	// definition node). These links are appended to the returned slice when
-	// their ast.Ident nodes are visited.
-	linkMap := make(map[*ast.Ident]link)
-
-	ast.Inspect(node, func(node ast.Node) bool {
-		switch n := node.(type) {
-		case *ast.Field:
-			for _, n := range n.Names {
-				linkMap[n] = link{}
-			}
-		case *ast.ImportSpec:
-			if name := n.Name; name != nil {
-				linkMap[name] = link{}
-			}
-		case *ast.ValueSpec:
-			for _, n := range n.Names {
-				linkMap[n] = link{name: n.Name, isVal: true}
-			}
-		case *ast.FuncDecl:
-			linkMap[n.Name] = link{}
-		case *ast.TypeSpec:
-			linkMap[n.Name] = link{}
-		case *ast.AssignStmt:
-			// Short variable declarations only show up if we apply
-			// this code to all source code (as opposed to exported
-			// declarations only).
-			if n.Tok == token.DEFINE {
-				// Some of the lhs variables may be re-declared,
-				// so technically they are not defs. We don't
-				// care for now.
-				for _, x := range n.Lhs {
-					// Each lhs expression should be an
-					// ident, but we are conservative and check.
-					if n, _ := x.(*ast.Ident); n != nil {
-						linkMap[n] = link{isVal: true}
-					}
-				}
-			}
-		case *ast.SelectorExpr:
-			// Detect qualified identifiers of the form pkg.ident.
-			// If anything fails we return true and collect individual
-			// identifiers instead.
-			if x, _ := n.X.(*ast.Ident); x != nil {
-				// Create links only if x is a qualified identifier.
-				if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
-					if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
-						// spec.Path.Value is the import path
-						if path, err := strconv.Unquote(spec.Path.Value); err == nil {
-							// Register two links, one for the package
-							// and one for the qualified identifier.
-							linkMap[x] = link{path: path}
-							linkMap[n.Sel] = link{path: path, name: n.Sel.Name}
-						}
-					}
-				}
-			}
-		case *ast.CompositeLit:
-			// Detect field names within composite literals. These links should
-			// be prefixed by the type name.
-			fieldPath := ""
-			prefix := ""
-			switch typ := n.Type.(type) {
-			case *ast.Ident:
-				prefix = typ.Name + "."
-			case *ast.SelectorExpr:
-				if x, _ := typ.X.(*ast.Ident); x != nil {
-					// Create links only if x is a qualified identifier.
-					if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
-						if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
-							// spec.Path.Value is the import path
-							if path, err := strconv.Unquote(spec.Path.Value); err == nil {
-								// Register two links, one for the package
-								// and one for the qualified identifier.
-								linkMap[x] = link{path: path}
-								linkMap[typ.Sel] = link{path: path, name: typ.Sel.Name}
-								fieldPath = path
-								prefix = typ.Sel.Name + "."
-							}
-						}
-					}
-				}
-			}
-			for _, e := range n.Elts {
-				if kv, ok := e.(*ast.KeyValueExpr); ok {
-					if k, ok := kv.Key.(*ast.Ident); ok {
-						// Note: there is some syntactic ambiguity here. We cannot determine
-						// if this is a struct literal or a map literal without type
-						// information. We assume struct literal.
-						name := prefix + k.Name
-						linkMap[k] = link{path: fieldPath, name: name}
-					}
-				}
-			}
-		case *ast.Ident:
-			if l, ok := linkMap[n]; ok {
-				links = append(links, l)
-			} else {
-				l := link{name: n.Name}
-				if n.Obj == nil && doc.IsPredeclared(n.Name) {
-					l.path = builtinPkgPath
-				}
-				links = append(links, l)
-			}
-		}
-		return true
-	})
-	return
-}

diff --git a/internal/godoc/server.go b/internal/godoc/server.go
index b16449c..2cf200b 100644
--- a/internal/godoc/server.go
+++ b/internal/godoc/server.go

@@ -25,10 +25,14 @@
 	"os"
 	pathpkg "path"
 	"path/filepath"
+	"regexp"
 	"sort"
+	"strconv"
 	"strings"
 	"text/template"
 	"time"
+
+	"golang.org/x/website/internal/texthtml"
 )
 
 // handlerServer is a migration from an old godoc http Handler type.
@@ -555,6 +559,23 @@
 	return
 }
 
+var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
+
+// rangeSelection computes the Selection for a text range described
+// by the argument str, of the form Start:End, where Start and End
+// are decimal byte offsets.
+func rangeSelection(str string) texthtml.Selection {
+	m := selRx.FindStringSubmatch(str)
+	if len(m) >= 2 {
+		from, _ := strconv.Atoi(m[1])
+		to, _ := strconv.Atoi(m[2])
+		if from < to {
+			return texthtml.Spans(texthtml.Span{Start: from, End: to})
+		}
+	}
+	return nil
+}
+
 func (p *Presentation) serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) {
 	src, err := fs.ReadFile(p.Corpus.fs, toFS(abspath))
 	if err != nil {
@@ -568,19 +589,18 @@
 		return
 	}
 
-	h := r.FormValue("h")
-	s := RangeSelection(r.FormValue("s"))
+	cfg := texthtml.Config{
+		GoComments: pathpkg.Ext(abspath) == ".go",
+		Highlight:  r.FormValue("h"),
+		Selection:  rangeSelection(r.FormValue("s")),
+		Line:       1,
+	}
 
 	var buf bytes.Buffer
-	if pathpkg.Ext(abspath) == ".go" {
-		buf.WriteString("<pre>")
-		formatGoSource(&buf, src, h, s)
-		buf.WriteString("</pre>")
-	} else {
-		buf.WriteString("<pre>")
-		FormatText(&buf, src, 1, false, h, s)
-		buf.WriteString("</pre>")
-	}
+	buf.WriteString("<pre>")
+	buf.Write(texthtml.Format(src, cfg))
+	buf.WriteString("</pre>")
+
 	fmt.Fprintf(&buf, `<p><a href="/%s?m=text">View as plain text</a></p>`, htmlpkg.EscapeString(relpath))
 
 	p.ServePage(w, Page{
@@ -592,49 +612,6 @@
 	})
 }
 
-// formatGoSource HTML-escapes Go source text and writes it to w.
-func formatGoSource(buf *bytes.Buffer, text []byte, pattern string, selection Selection) {
-	// Emit to a temp buffer so that we can add line anchors at the end.
-	saved, buf := buf, new(bytes.Buffer)
-
-	comments := tokenSelection(text, token.COMMENT)
-	var highlights Selection
-	if pattern != "" {
-		highlights = regexpSelection(text, pattern)
-	}
-
-	FormatSelections(buf, text, nil, nil, selectionTag, comments, highlights, selection)
-
-	// Now copy buf to saved, adding line anchors.
-
-	// The lineSelection mechanism can't be composed with our
-	// linkWriter, so we have to add line spans as another pass.
-	n := 1
-	for _, line := range bytes.Split(buf.Bytes(), []byte("\n")) {
-		// The line numbers are inserted into the document via a CSS ::before
-		// pseudo-element. This prevents them from being copied when users
-		// highlight and copy text.
-		// ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent
-		// This is also the trick Github uses to hide line numbers.
-		//
-		// The first tab for the code snippet needs to start in column 9, so
-		// it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab
-		// character only indents a short amount.
-		//
-		// Due to rounding and font width Firefox might not treat 8 rendered
-		// characters as 8 characters wide, and subsequently may treat the tab
-		// character in the 9th position as moving the width from (7.5 or so) up
-		// to 8. See
-		// https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091
-		// for a fuller explanation. The solution is to add a CSS class to
-		// explicitly declare the width to be 8 characters.
-		fmt.Fprintf(saved, `<span id="L%d" class="ln">%6d&nbsp;&nbsp;</span>`, n, n)
-		n++
-		saved.Write(line)
-		saved.WriteByte('\n')
-	}
-}
-
 func (p *Presentation) serveDirectory(w http.ResponseWriter, r *http.Request, abspath, relpath string) {
 	if redirect(w, r) {
 		return

diff --git a/internal/godoc/template.go b/internal/godoc/template.go
index 3e52453..5baa140 100644
--- a/internal/godoc/template.go
+++ b/internal/godoc/template.go

@@ -41,6 +41,8 @@
 	"log"
 	"regexp"
 	"strings"
+
+	"golang.org/x/website/internal/texthtml"
 )
 
 // Functions in this file panic on error, but the panic is recovered
@@ -100,7 +102,7 @@
 	text = strings.Replace(text, "\t", "    ", -1)
 	var buf bytes.Buffer
 	// HTML-escape text and syntax-color comments like elsewhere.
-	FormatText(&buf, []byte(text), -1, true, "", nil)
+	buf.Write(texthtml.Format([]byte(text), texthtml.Config{GoComments: true}))
 	// Include the command as a comment.
 	text = fmt.Sprintf("<pre><!--{{%s}}\n-->%s</pre>", command, buf.Bytes())
 	return text, nil

diff --git a/internal/texthtml/ast.go b/internal/texthtml/ast.go
new file mode 100644
index 0000000..76bd4bb
--- /dev/null
+++ b/internal/texthtml/ast.go

@@ -0,0 +1,298 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package texthtml
+
+import (
+	"bytes"
+	"fmt"
+	"go/ast"
+	"go/doc"
+	"go/token"
+	"strconv"
+	"unicode"
+	"unicode/utf8"
+)
+
+// A goLink describes the (HTML) link information for a Go identifier.
+// The zero value of a link represents "no link".
+type goLink struct {
+	path, name string // package path, identifier name
+	isVal      bool   // identifier is defined in a const or var declaration
+}
+
+func (l *goLink) tags() (start, end string) {
+	switch {
+	case l.path != "" && l.name == "":
+		// package path
+		return `<a href="/pkg/` + l.path + `/">`, `</a>`
+	case l.path != "" && l.name != "":
+		// qualified identifier
+		return `<a href="/pkg/` + l.path + `/#` + l.name + `">`, `</a>`
+	case l.path == "" && l.name != "":
+		// local identifier
+		if l.isVal {
+			return `<span id="` + l.name + `">`, `</span>`
+		}
+		if ast.IsExported(l.name) {
+			return `<a href="#` + l.name + `">`, `</a>`
+		}
+	}
+	return "", ""
+}
+
+// goLinksFor returns the list of links for the identifiers used
+// by node in the same order as they appear in the source.
+func goLinksFor(node ast.Node) (links []goLink) {
+	// linkMap tracks link information for each ast.Ident node. Entries may
+	// be created out of source order (for example, when we visit a parent
+	// definition node). These links are appended to the returned slice when
+	// their ast.Ident nodes are visited.
+	linkMap := make(map[*ast.Ident]goLink)
+
+	ast.Inspect(node, func(node ast.Node) bool {
+		switch n := node.(type) {
+		case *ast.Field:
+			for _, n := range n.Names {
+				linkMap[n] = goLink{}
+			}
+		case *ast.ImportSpec:
+			if name := n.Name; name != nil {
+				linkMap[name] = goLink{}
+			}
+		case *ast.ValueSpec:
+			for _, n := range n.Names {
+				linkMap[n] = goLink{name: n.Name, isVal: true}
+			}
+		case *ast.FuncDecl:
+			linkMap[n.Name] = goLink{}
+		case *ast.TypeSpec:
+			linkMap[n.Name] = goLink{}
+		case *ast.AssignStmt:
+			// Short variable declarations only show up if we apply
+			// this code to all source code (as opposed to exported
+			// declarations only).
+			if n.Tok == token.DEFINE {
+				// Some of the lhs variables may be re-declared,
+				// so technically they are not defs. We don't
+				// care for now.
+				for _, x := range n.Lhs {
+					// Each lhs expression should be an
+					// ident, but we are conservative and check.
+					if n, _ := x.(*ast.Ident); n != nil {
+						linkMap[n] = goLink{isVal: true}
+					}
+				}
+			}
+		case *ast.SelectorExpr:
+			// Detect qualified identifiers of the form pkg.ident.
+			// If anything fails we return true and collect individual
+			// identifiers instead.
+			if x, _ := n.X.(*ast.Ident); x != nil {
+				// Create links only if x is a qualified identifier.
+				if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
+					if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
+						// spec.Path.Value is the import path
+						if path, err := strconv.Unquote(spec.Path.Value); err == nil {
+							// Register two links, one for the package
+							// and one for the qualified identifier.
+							linkMap[x] = goLink{path: path}
+							linkMap[n.Sel] = goLink{path: path, name: n.Sel.Name}
+						}
+					}
+				}
+			}
+		case *ast.CompositeLit:
+			// Detect field names within composite literals. These links should
+			// be prefixed by the type name.
+			fieldPath := ""
+			prefix := ""
+			switch typ := n.Type.(type) {
+			case *ast.Ident:
+				prefix = typ.Name + "."
+			case *ast.SelectorExpr:
+				if x, _ := typ.X.(*ast.Ident); x != nil {
+					// Create links only if x is a qualified identifier.
+					if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
+						if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
+							// spec.Path.Value is the import path
+							if path, err := strconv.Unquote(spec.Path.Value); err == nil {
+								// Register two links, one for the package
+								// and one for the qualified identifier.
+								linkMap[x] = goLink{path: path}
+								linkMap[typ.Sel] = goLink{path: path, name: typ.Sel.Name}
+								fieldPath = path
+								prefix = typ.Sel.Name + "."
+							}
+						}
+					}
+				}
+			}
+			for _, e := range n.Elts {
+				if kv, ok := e.(*ast.KeyValueExpr); ok {
+					if k, ok := kv.Key.(*ast.Ident); ok {
+						// Note: there is some syntactic ambiguity here. We cannot determine
+						// if this is a struct literal or a map literal without type
+						// information. We assume struct literal.
+						name := prefix + k.Name
+						linkMap[k] = goLink{path: fieldPath, name: name}
+					}
+				}
+			}
+		case *ast.Ident:
+			if l, ok := linkMap[n]; ok {
+				links = append(links, l)
+			} else {
+				l := goLink{name: n.Name}
+				if n.Obj == nil && doc.IsPredeclared(n.Name) {
+					l.path = "builtin"
+				}
+				links = append(links, l)
+			}
+		}
+		return true
+	})
+	return
+}
+
+// postFormatAST makes any appropriate changes to the formatting of node in buf.
+// Specifically, it adds span links to each struct field, so they can be linked properly.
+// TODO(rsc): Why not do this as part of the linking above?
+func postFormatAST(buf *bytes.Buffer, node ast.Node) {
+	if st, name := isStructTypeDecl(node); st != nil {
+		addStructFieldIDAttributes(buf, name, st)
+	}
+}
+
+// isStructTypeDecl checks whether n is a struct declaration.
+// It either returns a non-nil StructType and its name, or zero values.
+func isStructTypeDecl(n ast.Node) (st *ast.StructType, name string) {
+	gd, ok := n.(*ast.GenDecl)
+	if !ok || gd.Tok != token.TYPE {
+		return nil, ""
+	}
+	if gd.Lparen > 0 {
+		// Parenthesized type. Who does that, anyway?
+		// TODO: Reportedly gri does. Fix this to handle that too.
+		return nil, ""
+	}
+	if len(gd.Specs) != 1 {
+		return nil, ""
+	}
+	ts, ok := gd.Specs[0].(*ast.TypeSpec)
+	if !ok {
+		return nil, ""
+	}
+	st, ok = ts.Type.(*ast.StructType)
+	if !ok {
+		return nil, ""
+	}
+	return st, ts.Name.Name
+}
+
+// addStructFieldIDAttributes modifies the contents of buf such that
+// all struct fields of the named struct have <span id='name.Field'>
+// in them, so people can link to /#Struct.Field.
+func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructType) {
+	if st.Fields == nil {
+		return
+	}
+	// needsLink is a set of identifiers that still need to be
+	// linked, where value == key, to avoid an allocation in func
+	// linkedField.
+	needsLink := make(map[string]string)
+
+	for _, f := range st.Fields.List {
+		if len(f.Names) == 0 {
+			continue
+		}
+		fieldName := f.Names[0].Name
+		needsLink[fieldName] = fieldName
+	}
+	var newBuf bytes.Buffer
+	foreachLine(buf.Bytes(), func(line []byte) {
+		if fieldName := linkedField(line, needsLink); fieldName != "" {
+			fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
+			delete(needsLink, fieldName)
+		}
+		newBuf.Write(line)
+	})
+	buf.Reset()
+	buf.Write(newBuf.Bytes())
+}
+
+// foreachLine calls fn for each line of in, where a line includes
+// the trailing "\n", except on the last line, if it doesn't exist.
+func foreachLine(in []byte, fn func(line []byte)) {
+	for len(in) > 0 {
+		nl := bytes.IndexByte(in, '\n')
+		if nl == -1 {
+			fn(in)
+			return
+		}
+		fn(in[:nl+1])
+		in = in[nl+1:]
+	}
+}
+
+// commentPrefix is the line prefix for comments after they've been HTMLified.
+var commentPrefix = []byte(`<span class="comment">// `)
+
+// linkedField determines whether the given line starts with an
+// identifier in the provided ids map (mapping from identifier to the
+// same identifier). The line can start with either an identifier or
+// an identifier in a comment. If one matches, it returns the
+// identifier that matched. Otherwise it returns the empty string.
+func linkedField(line []byte, ids map[string]string) string {
+	line = bytes.TrimSpace(line)
+
+	// For fields with a doc string of the
+	// conventional form, we put the new span into
+	// the comment instead of the field.
+	// The "conventional" form is a complete sentence
+	// per https://golang.org/s/style#comment-sentences like:
+	//
+	//    // Foo is an optional Fooer to foo the foos.
+	//    Foo Fooer
+	//
+	// In this case, we want the #StructName.Foo
+	// link to make the browser go to the comment
+	// line "Foo is an optional Fooer" instead of
+	// the "Foo Fooer" line, which could otherwise
+	// obscure the docs above the browser's "fold".
+	//
+	// TODO: do this better, so it works for all
+	// comments, including unconventional ones.
+	line = bytes.TrimPrefix(line, commentPrefix)
+	id := scanIdentifier(line)
+	if len(id) == 0 {
+		// No leading identifier. Avoid map lookup for
+		// somewhat common case.
+		return ""
+	}
+	return ids[string(id)]
+}
+
+// scanIdentifier scans a valid Go identifier off the front of v and
+// either returns a subslice of v if there's a valid identifier, or
+// returns a zero-length slice.
+func scanIdentifier(v []byte) []byte {
+	var n int // number of leading bytes of v belonging to an identifier
+	for {
+		r, width := utf8.DecodeRune(v[n:])
+		if !(isLetter(r) || n > 0 && isDigit(r)) {
+			break
+		}
+		n += width
+	}
+	return v[:n]
+}
+
+func isLetter(ch rune) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
+}
+
+func isDigit(ch rune) bool {
+	return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
+}

diff --git a/internal/texthtml/texthtml.go b/internal/texthtml/texthtml.go
new file mode 100644
index 0000000..1175fe1
--- /dev/null
+++ b/internal/texthtml/texthtml.go

@@ -0,0 +1,355 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package texthtml formats text files to HTML.
+package texthtml
+
+import (
+	"bytes"
+	"fmt"
+	"go/ast"
+	"go/scanner"
+	"go/token"
+	"io"
+	"regexp"
+	"text/template"
+)
+
+// A Span describes a text span [start, end).
+// The zero value of a Span is an empty span.
+type Span struct {
+	Start, End int
+}
+
+func (s *Span) isEmpty() bool { return s.Start >= s.End }
+
+// A Selection is an "iterator" function returning a text span.
+// Repeated calls to a selection return consecutive, non-overlapping,
+// non-empty spans, followed by an infinite sequence of empty
+// spans. The first empty span marks the end of the selection.
+type Selection func() Span
+
+// A Config configures how to format text as HTML.
+type Config struct {
+	Line       int       // if >= 1, number lines beginning with number Line, with <span class="ln">
+	GoComments bool      // mark comments in Go text with <span class="comment">
+	Highlight  string    // highlight matches for this regexp with <span class="highlight">
+	Selection  Selection // mark selected spans with <span class="selection">
+	AST        ast.Node  // link uses to declarations, assuming text is formatting of AST
+}
+
+// Format formats text to HTML according to the configuration cfg.
+func Format(text []byte, cfg Config) (html []byte) {
+	var comments, highlights Selection
+	if cfg.GoComments {
+		comments = tokenSelection(text, token.COMMENT)
+	}
+	if cfg.Highlight != "" {
+		highlights = regexpSelection(text, cfg.Highlight)
+	}
+
+	var buf bytes.Buffer
+	var idents Selection = Spans()
+	var goLinks []goLink
+	if cfg.AST != nil {
+		idents = tokenSelection(text, token.IDENT)
+		goLinks = goLinksFor(cfg.AST)
+	}
+
+	formatSelections(&buf, text, goLinks, comments, highlights, cfg.Selection, idents)
+
+	if cfg.AST != nil {
+		postFormatAST(&buf, cfg.AST)
+	}
+
+	if cfg.Line > 0 {
+		// Add line numbers in a separate pass.
+		old := buf.Bytes()
+		buf = bytes.Buffer{}
+		n := cfg.Line
+		for _, line := range bytes.Split(old, []byte("\n")) {
+			// The line numbers are inserted into the document via a CSS ::before
+			// pseudo-element. This prevents them from being copied when users
+			// highlight and copy text.
+			// ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent
+			// This is also the trick Github uses to hide line numbers.
+			//
+			// The first tab for the code snippet needs to start in column 9, so
+			// it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab
+			// character only indents a short amount.
+			//
+			// Due to rounding and font width Firefox might not treat 8 rendered
+			// characters as 8 characters wide, and subsequently may treat the tab
+			// character in the 9th position as moving the width from (7.5 or so) up
+			// to 8. See
+			// https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091
+			// for a fuller explanation. The solution is to add a CSS class to
+			// explicitly declare the width to be 8 characters.
+			fmt.Fprintf(&buf, `<span id="L%d" class="ln">%6d&nbsp;&nbsp;</span>`, n, n)
+			n++
+			buf.Write(line)
+			buf.WriteByte('\n')
+		}
+	}
+	return buf.Bytes()
+}
+
+// formatSelections takes a text and writes it to w using link and span
+// writers lw and sw as follows: lw is invoked for consecutive span starts
+// and ends as specified through the links selection, and sw is invoked for
+// consecutive spans of text overlapped by the same selections as specified
+// by selections.
+func formatSelections(w io.Writer, text []byte, goLinks []goLink, selections ...Selection) {
+	// compute the sequence of consecutive span changes
+	changes := newMerger(selections)
+
+	// The i'th bit in bitset indicates that the text
+	// at the current offset is covered by selections[i].
+	bitset := 0
+	lastOffs := 0
+
+	// Text spans are written in a delayed fashion
+	// such that consecutive spans belonging to the
+	// same selection can be combined (peephole optimization).
+	// last describes the last span which has not yet been written.
+	var last struct {
+		begin, end int // valid if begin < end
+		bitset     int
+	}
+
+	// flush writes the last delayed text span
+	flush := func() {
+		if last.begin < last.end {
+			selectionTag(w, text[last.begin:last.end], last.bitset)
+		}
+		last.begin = last.end // invalidate last
+	}
+
+	// span runs the span [lastOffs, end) with the selection
+	// indicated by bitset through the span peephole optimizer.
+	span := func(end int) {
+		if lastOffs < end { // ignore empty spans
+			if last.end != lastOffs || last.bitset != bitset {
+				// the last span is not adjacent to or
+				// differs from the new one
+				flush()
+				// start a new span
+				last.begin = lastOffs
+			}
+			last.end = end
+			last.bitset = bitset
+		}
+	}
+
+	linkEnd := ""
+	for {
+		// get the next span change
+		index, offs, start := changes.next()
+		if index < 0 || offs > len(text) {
+			// no more span changes or the next change
+			// is past the end of the text - we're done
+			break
+		}
+
+		// format the previous selection span, determine
+		// the new selection bitset and start a new span
+		span(offs)
+		if index == 3 { // Go link
+			flush()
+			if start {
+				if len(goLinks) > 0 {
+					start, end := goLinks[0].tags()
+					io.WriteString(w, start)
+					linkEnd = end
+					goLinks = goLinks[1:]
+				}
+			} else {
+				if linkEnd != "" {
+					io.WriteString(w, linkEnd)
+					linkEnd = ""
+				}
+			}
+		} else {
+			mask := 1 << uint(index)
+			if start {
+				bitset |= mask
+			} else {
+				bitset &^= mask
+			}
+		}
+		lastOffs = offs
+	}
+	span(len(text))
+	flush()
+}
+
+// A merger merges a slice of Selections and produces a sequence of
+// consecutive span change events through repeated next() calls.
+type merger struct {
+	selections []Selection
+	spans      []Span // spans[i] is the next span of selections[i]
+}
+
+const infinity int = 2e9
+
+func newMerger(selections []Selection) *merger {
+	spans := make([]Span, len(selections))
+	for i, sel := range selections {
+		spans[i] = Span{infinity, infinity}
+		if sel != nil {
+			if seg := sel(); !seg.isEmpty() {
+				spans[i] = seg
+			}
+		}
+	}
+	return &merger{selections, spans}
+}
+
+// next returns the next span change: index specifies the Selection
+// to which the span belongs, offs is the span start or end offset
+// as determined by the start value. If there are no more span changes,
+// next returns an index value < 0.
+func (m *merger) next() (index, offs int, start bool) {
+	// find the next smallest offset where a span starts or ends
+	offs = infinity
+	index = -1
+	for i, seg := range m.spans {
+		switch {
+		case seg.Start < offs:
+			offs = seg.Start
+			index = i
+			start = true
+		case seg.End < offs:
+			offs = seg.End
+			index = i
+			start = false
+		}
+	}
+	if index < 0 {
+		// no offset found => all selections merged
+		return
+	}
+	// offset found - it's either the start or end offset but
+	// either way it is ok to consume the start offset: set it
+	// to infinity so it won't be considered in the following
+	// next call
+	m.spans[index].Start = infinity
+	if start {
+		return
+	}
+	// end offset found - consume it
+	m.spans[index].End = infinity
+	// advance to the next span for that selection
+	seg := m.selections[index]()
+	if !seg.isEmpty() {
+		m.spans[index] = seg
+	}
+	return
+}
+
+// lineSelection returns the line spans for text as a Selection.
+func lineSelection(text []byte) Selection {
+	i, j := 0, 0
+	return func() (seg Span) {
+		// find next newline, if any
+		for j < len(text) {
+			j++
+			if text[j-1] == '\n' {
+				break
+			}
+		}
+		if i < j {
+			// text[i:j] constitutes a line
+			seg = Span{i, j}
+			i = j
+		}
+		return
+	}
+}
+
+// tokenSelection returns, as a selection, the sequence of
+// consecutive occurrences of token sel in the Go src text.
+func tokenSelection(src []byte, sel token.Token) Selection {
+	var s scanner.Scanner
+	fset := token.NewFileSet()
+	file := fset.AddFile("", fset.Base(), len(src))
+	s.Init(file, src, nil, scanner.ScanComments)
+	return func() (seg Span) {
+		for {
+			pos, tok, lit := s.Scan()
+			if tok == token.EOF {
+				break
+			}
+			offs := file.Offset(pos)
+			if tok == sel {
+				seg = Span{offs, offs + len(lit)}
+				break
+			}
+		}
+		return
+	}
+}
+
+// Spans is a helper function to make a Selection from a slice of spans.
+// Empty spans are discarded.
+func Spans(spans ...Span) Selection {
+	i := 0
+	return func() Span {
+		for i < len(spans) {
+			s := spans[i]
+			i++
+			if s.Start < s.End {
+				// non-empty
+				return s
+			}
+		}
+		return Span{}
+	}
+}
+
+// regexpSelection computes the Selection for the regular expression expr in text.
+func regexpSelection(text []byte, expr string) Selection {
+	var matches [][]int
+	if rx, err := regexp.Compile(expr); err == nil {
+		matches = rx.FindAllIndex(text, -1)
+	}
+	var spans []Span
+	for _, m := range matches {
+		spans = append(spans, Span{m[0], m[1]})
+	}
+	return Spans(spans...)
+}
+
+// Span tags for all the possible selection combinations that may
+// be generated by FormatText. Selections are indicated by a bitset,
+// and the value of the bitset specifies the tag to be used.
+//
+// bit 0: comments
+// bit 1: highlights
+// bit 2: selections
+//
+var startTags = [][]byte{
+	/* 000 */ []byte(``),
+	/* 001 */ []byte(`<span class="comment">`),
+	/* 010 */ []byte(`<span class="highlight">`),
+	/* 011 */ []byte(`<span class="highlight-comment">`),
+	/* 100 */ []byte(`<span class="selection">`),
+	/* 101 */ []byte(`<span class="selection-comment">`),
+	/* 110 */ []byte(`<span class="selection-highlight">`),
+	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
+}
+
+var endTag = []byte(`</span>`)
+
+func selectionTag(w io.Writer, text []byte, selections int) {
+	if selections < len(startTags) {
+		if tag := startTags[selections]; len(tag) > 0 {
+			w.Write(tag)
+			template.HTMLEscape(w, text)
+			w.Write(endTag)
+			return
+		}
+	}
+	template.HTMLEscape(w, text)
+}
commit	4eb9b856a32a6593f95a0083eff258e19ca35647	[log] [tgz]
author	Russ Cox <rsc@golang.org>	Wed Feb 17 22:50:57 2021 -0500
committer	Russ Cox <rsc@golang.org>	Fri Mar 12 19:58:00 2021 +0000
tree	e74b377024c36c46ff96e7a458c42ac196daa3c9
parent	2d643c47cec3fefc956fac35b780e9e6ddaf9c49 [diff]