internal/godoc: split text formatting into internal/texthtml
Change-Id: Iaef43e66c32dbeb7c3fc3a4a769112d153c57ce1
Reviewed-on: https://go-review.googlesource.com/c/website/+/295409
Trust: Russ Cox <rsc@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
diff --git a/internal/godoc/format.go b/internal/godoc/format.go
deleted file mode 100644
index e005dcb..0000000
--- a/internal/godoc/format.go
+++ /dev/null
@@ -1,374 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.16
-// +build go1.16
-
-// This file implements FormatSelections and FormatText.
-// FormatText is used to HTML-format Go and non-Go source
-// text with line numbers and highlighted sections. It is
-// built on top of FormatSelections, a generic formatter
-// for "selected" text.
-
-package godoc
-
-import (
- "fmt"
- "go/scanner"
- "go/token"
- "io"
- "regexp"
- "strconv"
- "text/template"
-)
-
-// ----------------------------------------------------------------------------
-// Implementation of FormatSelections
-
-// A Segment describes a text segment [start, end).
-// The zero value of a Segment is a ready-to-use empty segment.
-//
-type Segment struct {
- start, end int
-}
-
-func (seg *Segment) isEmpty() bool { return seg.start >= seg.end }
-
-// A Selection is an "iterator" function returning a text segment.
-// Repeated calls to a selection return consecutive, non-overlapping,
-// non-empty segments, followed by an infinite sequence of empty
-// segments. The first empty segment marks the end of the selection.
-//
-type Selection func() Segment
-
-// A LinkWriter writes some start or end "tag" to w for the text offset offs.
-// It is called by FormatSelections at the start or end of each link segment.
-//
-type LinkWriter func(w io.Writer, offs int, start bool)
-
-// A SegmentWriter formats a text according to selections and writes it to w.
-// The selections parameter is a bit set indicating which selections provided
-// to FormatSelections overlap with the text segment: If the n'th bit is set
-// in selections, the n'th selection provided to FormatSelections is overlapping
-// with the text.
-//
-type SegmentWriter func(w io.Writer, text []byte, selections int)
-
-// FormatSelections takes a text and writes it to w using link and segment
-// writers lw and sw as follows: lw is invoked for consecutive segment starts
-// and ends as specified through the links selection, and sw is invoked for
-// consecutive segments of text overlapped by the same selections as specified
-// by selections. The link writer lw may be nil, in which case the links
-// Selection is ignored.
-//
-func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
- // If we have a link writer, make the links
- // selection the last entry in selections
- if lw != nil {
- selections = append(selections, links)
- }
-
- // compute the sequence of consecutive segment changes
- changes := newMerger(selections)
-
- // The i'th bit in bitset indicates that the text
- // at the current offset is covered by selections[i].
- bitset := 0
- lastOffs := 0
-
- // Text segments are written in a delayed fashion
- // such that consecutive segments belonging to the
- // same selection can be combined (peephole optimization).
- // last describes the last segment which has not yet been written.
- var last struct {
- begin, end int // valid if begin < end
- bitset int
- }
-
- // flush writes the last delayed text segment
- flush := func() {
- if last.begin < last.end {
- sw(w, text[last.begin:last.end], last.bitset)
- }
- last.begin = last.end // invalidate last
- }
-
- // segment runs the segment [lastOffs, end) with the selection
- // indicated by bitset through the segment peephole optimizer.
- segment := func(end int) {
- if lastOffs < end { // ignore empty segments
- if last.end != lastOffs || last.bitset != bitset {
- // the last segment is not adjacent to or
- // differs from the new one
- flush()
- // start a new segment
- last.begin = lastOffs
- }
- last.end = end
- last.bitset = bitset
- }
- }
-
- for {
- // get the next segment change
- index, offs, start := changes.next()
- if index < 0 || offs > len(text) {
- // no more segment changes or the next change
- // is past the end of the text - we're done
- break
- }
- // determine the kind of segment change
- if lw != nil && index == len(selections)-1 {
- // we have a link segment change (see start of this function):
- // format the previous selection segment, write the
- // link tag and start a new selection segment
- segment(offs)
- flush()
- lastOffs = offs
- lw(w, offs, start)
- } else {
- // we have a selection change:
- // format the previous selection segment, determine
- // the new selection bitset and start a new segment
- segment(offs)
- lastOffs = offs
- mask := 1 << uint(index)
- if start {
- bitset |= mask
- } else {
- bitset &^= mask
- }
- }
- }
- segment(len(text))
- flush()
-}
-
-// A merger merges a slice of Selections and produces a sequence of
-// consecutive segment change events through repeated next() calls.
-//
-type merger struct {
- selections []Selection
- segments []Segment // segments[i] is the next segment of selections[i]
-}
-
-const infinity int = 2e9
-
-func newMerger(selections []Selection) *merger {
- segments := make([]Segment, len(selections))
- for i, sel := range selections {
- segments[i] = Segment{infinity, infinity}
- if sel != nil {
- if seg := sel(); !seg.isEmpty() {
- segments[i] = seg
- }
- }
- }
- return &merger{selections, segments}
-}
-
-// next returns the next segment change: index specifies the Selection
-// to which the segment belongs, offs is the segment start or end offset
-// as determined by the start value. If there are no more segment changes,
-// next returns an index value < 0.
-//
-func (m *merger) next() (index, offs int, start bool) {
- // find the next smallest offset where a segment starts or ends
- offs = infinity
- index = -1
- for i, seg := range m.segments {
- switch {
- case seg.start < offs:
- offs = seg.start
- index = i
- start = true
- case seg.end < offs:
- offs = seg.end
- index = i
- start = false
- }
- }
- if index < 0 {
- // no offset found => all selections merged
- return
- }
- // offset found - it's either the start or end offset but
- // either way it is ok to consume the start offset: set it
- // to infinity so it won't be considered in the following
- // next call
- m.segments[index].start = infinity
- if start {
- return
- }
- // end offset found - consume it
- m.segments[index].end = infinity
- // advance to the next segment for that selection
- seg := m.selections[index]()
- if !seg.isEmpty() {
- m.segments[index] = seg
- }
- return
-}
-
-// ----------------------------------------------------------------------------
-// Implementation of FormatText
-
-// lineSelection returns the line segments for text as a Selection.
-func lineSelection(text []byte) Selection {
- i, j := 0, 0
- return func() (seg Segment) {
- // find next newline, if any
- for j < len(text) {
- j++
- if text[j-1] == '\n' {
- break
- }
- }
- if i < j {
- // text[i:j] constitutes a line
- seg = Segment{i, j}
- i = j
- }
- return
- }
-}
-
-// tokenSelection returns, as a selection, the sequence of
-// consecutive occurrences of token sel in the Go src text.
-//
-func tokenSelection(src []byte, sel token.Token) Selection {
- var s scanner.Scanner
- fset := token.NewFileSet()
- file := fset.AddFile("", fset.Base(), len(src))
- s.Init(file, src, nil, scanner.ScanComments)
- return func() (seg Segment) {
- for {
- pos, tok, lit := s.Scan()
- if tok == token.EOF {
- break
- }
- offs := file.Offset(pos)
- if tok == sel {
- seg = Segment{offs, offs + len(lit)}
- break
- }
- }
- return
- }
-}
-
-// makeSelection is a helper function to make a Selection from a slice of pairs.
-// Pairs describing empty segments are ignored.
-//
-func makeSelection(matches [][]int) Selection {
- i := 0
- return func() Segment {
- for i < len(matches) {
- m := matches[i]
- i++
- if m[0] < m[1] {
- // non-empty segment
- return Segment{m[0], m[1]}
- }
- }
- return Segment{}
- }
-}
-
-// regexpSelection computes the Selection for the regular expression expr in text.
-func regexpSelection(text []byte, expr string) Selection {
- var matches [][]int
- if rx, err := regexp.Compile(expr); err == nil {
- matches = rx.FindAllIndex(text, -1)
- }
- return makeSelection(matches)
-}
-
-var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
-
-// RangeSelection computes the Selection for a text range described
-// by the argument str; the range description must match the selRx
-// regular expression.
-func RangeSelection(str string) Selection {
- m := selRx.FindStringSubmatch(str)
- if len(m) >= 2 {
- from, _ := strconv.Atoi(m[1])
- to, _ := strconv.Atoi(m[2])
- if from < to {
- return makeSelection([][]int{{from, to}})
- }
- }
- return nil
-}
-
-// Span tags for all the possible selection combinations that may
-// be generated by FormatText. Selections are indicated by a bitset,
-// and the value of the bitset specifies the tag to be used.
-//
-// bit 0: comments
-// bit 1: highlights
-// bit 2: selections
-//
-var startTags = [][]byte{
- /* 000 */ []byte(``),
- /* 001 */ []byte(`<span class="comment">`),
- /* 010 */ []byte(`<span class="highlight">`),
- /* 011 */ []byte(`<span class="highlight-comment">`),
- /* 100 */ []byte(`<span class="selection">`),
- /* 101 */ []byte(`<span class="selection-comment">`),
- /* 110 */ []byte(`<span class="selection-highlight">`),
- /* 111 */ []byte(`<span class="selection-highlight-comment">`),
-}
-
-var endTag = []byte(`</span>`)
-
-func selectionTag(w io.Writer, text []byte, selections int) {
- if selections < len(startTags) {
- if tag := startTags[selections]; len(tag) > 0 {
- w.Write(tag)
- template.HTMLEscape(w, text)
- w.Write(endTag)
- return
- }
- }
- template.HTMLEscape(w, text)
-}
-
-// FormatText HTML-escapes text and writes it to w.
-// Consecutive text segments are wrapped in HTML spans (with tags as
-// defined by startTags and endTag) as follows:
-//
-// - if line >= 0, line number (ln) spans are inserted before each line,
-// starting with the value of line
-// - if the text is Go source, comments get the "comment" span class
-// - each occurrence of the regular expression pattern gets the "highlight"
-// span class
-// - text segments covered by selection get the "selection" span class
-//
-// Comments, highlights, and selections may overlap arbitrarily; the respective
-// HTML span classes are specified in the startTags variable.
-//
-func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
- var comments, highlights Selection
- if goSource {
- comments = tokenSelection(text, token.COMMENT)
- }
- if pattern != "" {
- highlights = regexpSelection(text, pattern)
- }
- if line >= 0 || comments != nil || highlights != nil || selection != nil {
- var lineTag LinkWriter
- if line >= 0 {
- lineTag = func(w io.Writer, _ int, start bool) {
- if start {
- fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line)
- line++
- }
- }
- }
- FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
- } else {
- template.HTMLEscape(w, text)
- }
-}
diff --git a/internal/godoc/godoc.go b/internal/godoc/godoc.go
index c9b1836..25b3063 100644
--- a/internal/godoc/godoc.go
+++ b/internal/godoc/godoc.go
@@ -31,6 +31,8 @@
"time"
"unicode"
"unicode/utf8"
+
+ "golang.org/x/website/internal/texthtml"
)
// Fake relative package path for built-ins. Documentation for all globals
@@ -127,150 +129,17 @@
p.writeNode(&buf1, info, info.FSet, node)
var buf2 bytes.Buffer
- if n, _ := node.(ast.Node); n != nil && linkify && p.DeclLinks {
- LinkifyText(&buf2, buf1.Bytes(), n)
- if st, name := isStructTypeDecl(n); st != nil {
- addStructFieldIDAttributes(&buf2, name, st)
- }
- } else {
- FormatText(&buf2, buf1.Bytes(), -1, true, "", nil)
+ var n ast.Node
+ if linkify && p.DeclLinks {
+ n, _ = node.(ast.Node)
}
-
+ buf2.Write(texthtml.Format(buf1.Bytes(), texthtml.Config{
+ AST: n,
+ GoComments: true,
+ }))
return buf2.String()
}
-// isStructTypeDecl checks whether n is a struct declaration.
-// It either returns a non-nil StructType and its name, or zero values.
-func isStructTypeDecl(n ast.Node) (st *ast.StructType, name string) {
- gd, ok := n.(*ast.GenDecl)
- if !ok || gd.Tok != token.TYPE {
- return nil, ""
- }
- if gd.Lparen > 0 {
- // Parenthesized type. Who does that, anyway?
- // TODO: Reportedly gri does. Fix this to handle that too.
- return nil, ""
- }
- if len(gd.Specs) != 1 {
- return nil, ""
- }
- ts, ok := gd.Specs[0].(*ast.TypeSpec)
- if !ok {
- return nil, ""
- }
- st, ok = ts.Type.(*ast.StructType)
- if !ok {
- return nil, ""
- }
- return st, ts.Name.Name
-}
-
-// addStructFieldIDAttributes modifies the contents of buf such that
-// all struct fields of the named struct have <span id='name.Field'>
-// in them, so people can link to /#Struct.Field.
-func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructType) {
- if st.Fields == nil {
- return
- }
- // needsLink is a set of identifiers that still need to be
- // linked, where value == key, to avoid an allocation in func
- // linkedField.
- needsLink := make(map[string]string)
-
- for _, f := range st.Fields.List {
- if len(f.Names) == 0 {
- continue
- }
- fieldName := f.Names[0].Name
- needsLink[fieldName] = fieldName
- }
- var newBuf bytes.Buffer
- foreachLine(buf.Bytes(), func(line []byte) {
- if fieldName := linkedField(line, needsLink); fieldName != "" {
- fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
- delete(needsLink, fieldName)
- }
- newBuf.Write(line)
- })
- buf.Reset()
- buf.Write(newBuf.Bytes())
-}
-
-// foreachLine calls fn for each line of in, where a line includes
-// the trailing "\n", except on the last line, if it doesn't exist.
-func foreachLine(in []byte, fn func(line []byte)) {
- for len(in) > 0 {
- nl := bytes.IndexByte(in, '\n')
- if nl == -1 {
- fn(in)
- return
- }
- fn(in[:nl+1])
- in = in[nl+1:]
- }
-}
-
-// commentPrefix is the line prefix for comments after they've been HTMLified.
-var commentPrefix = []byte(`<span class="comment">// `)
-
-// linkedField determines whether the given line starts with an
-// identifier in the provided ids map (mapping from identifier to the
-// same identifier). The line can start with either an identifier or
-// an identifier in a comment. If one matches, it returns the
-// identifier that matched. Otherwise it returns the empty string.
-func linkedField(line []byte, ids map[string]string) string {
- line = bytes.TrimSpace(line)
-
- // For fields with a doc string of the
- // conventional form, we put the new span into
- // the comment instead of the field.
- // The "conventional" form is a complete sentence
- // per https://golang.org/s/style#comment-sentences like:
- //
- // // Foo is an optional Fooer to foo the foos.
- // Foo Fooer
- //
- // In this case, we want the #StructName.Foo
- // link to make the browser go to the comment
- // line "Foo is an optional Fooer" instead of
- // the "Foo Fooer" line, which could otherwise
- // obscure the docs above the browser's "fold".
- //
- // TODO: do this better, so it works for all
- // comments, including unconventional ones.
- line = bytes.TrimPrefix(line, commentPrefix)
- id := scanIdentifier(line)
- if len(id) == 0 {
- // No leading identifier. Avoid map lookup for
- // somewhat common case.
- return ""
- }
- return ids[string(id)]
-}
-
-// scanIdentifier scans a valid Go identifier off the front of v and
-// either returns a subslice of v if there's a valid identifier, or
-// returns a zero-length slice.
-func scanIdentifier(v []byte) []byte {
- var n int // number of leading bytes of v belonging to an identifier
- for {
- r, width := utf8.DecodeRune(v[n:])
- if !(isLetter(r) || n > 0 && isDigit(r)) {
- break
- }
- n += width
- }
- return v[:n]
-}
-
-func isLetter(ch rune) bool {
- return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
-}
-
-func isDigit(ch rune) bool {
- return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
-}
-
func comment_htmlFunc(comment string) string {
var buf bytes.Buffer
// TODO(gri) Provide list of words (e.g. function parameters)
diff --git a/internal/godoc/godoc_test.go b/internal/godoc/godoc_test.go
index fd65c7e..69914cb 100644
--- a/internal/godoc/godoc_test.go
+++ b/internal/godoc/godoc_test.go
@@ -250,25 +250,6 @@
return buf.String()
}
-func TestScanIdentifier(t *testing.T) {
- tests := []struct {
- in, want string
- }{
- {"foo bar", "foo"},
- {"foo/bar", "foo"},
- {" foo", ""},
- {"фоо", "фоо"},
- {"f123", "f123"},
- {"123f", ""},
- }
- for _, tt := range tests {
- got := scanIdentifier([]byte(tt.in))
- if string(got) != tt.want {
- t.Errorf("scanIdentifier(%q) = %q; want %q", tt.in, got, tt.want)
- }
- }
-}
-
func TestReplaceLeadingIndentation(t *testing.T) {
oldIndent := strings.Repeat(" ", 2)
newIndent := strings.Repeat(" ", 4)
diff --git a/internal/godoc/linkify.go b/internal/godoc/linkify.go
deleted file mode 100644
index 3f44ee6..0000000
--- a/internal/godoc/linkify.go
+++ /dev/null
@@ -1,198 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build go1.16
-// +build go1.16
-
-// This file implements LinkifyText which introduces
-// links for identifiers pointing to their declarations.
-// The approach does not cover all cases because godoc
-// doesn't have complete type information, but it's
-// reasonably good for browsing.
-
-package godoc
-
-import (
- "fmt"
- "go/ast"
- "go/doc"
- "go/token"
- "io"
- "strconv"
-)
-
-// LinkifyText HTML-escapes source text and writes it to w.
-// Identifiers that are in a "use" position (i.e., that are
-// not being declared), are wrapped with HTML links pointing
-// to the respective declaration, if possible. Comments are
-// formatted the same way as with FormatText.
-//
-func LinkifyText(w io.Writer, text []byte, n ast.Node) {
- links := linksFor(n)
-
- i := 0 // links index
- prev := "" // prev HTML tag
- linkWriter := func(w io.Writer, _ int, start bool) {
- // end tag
- if !start {
- if prev != "" {
- fmt.Fprintf(w, `</%s>`, prev)
- prev = ""
- }
- return
- }
-
- // start tag
- prev = ""
- if i < len(links) {
- switch info := links[i]; {
- case info.path != "" && info.name == "":
- // package path
- fmt.Fprintf(w, `<a href="/pkg/%s/">`, info.path)
- prev = "a"
- case info.path != "" && info.name != "":
- // qualified identifier
- fmt.Fprintf(w, `<a href="/pkg/%s/#%s">`, info.path, info.name)
- prev = "a"
- case info.path == "" && info.name != "":
- // local identifier
- if info.isVal {
- fmt.Fprintf(w, `<span id="%s">`, info.name)
- prev = "span"
- } else if ast.IsExported(info.name) {
- fmt.Fprintf(w, `<a href="#%s">`, info.name)
- prev = "a"
- }
- }
- i++
- }
- }
-
- idents := tokenSelection(text, token.IDENT)
- comments := tokenSelection(text, token.COMMENT)
- FormatSelections(w, text, linkWriter, idents, selectionTag, comments)
-}
-
-// A link describes the (HTML) link information for an identifier.
-// The zero value of a link represents "no link".
-//
-type link struct {
- path, name string // package path, identifier name
- isVal bool // identifier is defined in a const or var declaration
-}
-
-// linksFor returns the list of links for the identifiers used
-// by node in the same order as they appear in the source.
-//
-func linksFor(node ast.Node) (links []link) {
- // linkMap tracks link information for each ast.Ident node. Entries may
- // be created out of source order (for example, when we visit a parent
- // definition node). These links are appended to the returned slice when
- // their ast.Ident nodes are visited.
- linkMap := make(map[*ast.Ident]link)
-
- ast.Inspect(node, func(node ast.Node) bool {
- switch n := node.(type) {
- case *ast.Field:
- for _, n := range n.Names {
- linkMap[n] = link{}
- }
- case *ast.ImportSpec:
- if name := n.Name; name != nil {
- linkMap[name] = link{}
- }
- case *ast.ValueSpec:
- for _, n := range n.Names {
- linkMap[n] = link{name: n.Name, isVal: true}
- }
- case *ast.FuncDecl:
- linkMap[n.Name] = link{}
- case *ast.TypeSpec:
- linkMap[n.Name] = link{}
- case *ast.AssignStmt:
- // Short variable declarations only show up if we apply
- // this code to all source code (as opposed to exported
- // declarations only).
- if n.Tok == token.DEFINE {
- // Some of the lhs variables may be re-declared,
- // so technically they are not defs. We don't
- // care for now.
- for _, x := range n.Lhs {
- // Each lhs expression should be an
- // ident, but we are conservative and check.
- if n, _ := x.(*ast.Ident); n != nil {
- linkMap[n] = link{isVal: true}
- }
- }
- }
- case *ast.SelectorExpr:
- // Detect qualified identifiers of the form pkg.ident.
- // If anything fails we return true and collect individual
- // identifiers instead.
- if x, _ := n.X.(*ast.Ident); x != nil {
- // Create links only if x is a qualified identifier.
- if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
- if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
- // spec.Path.Value is the import path
- if path, err := strconv.Unquote(spec.Path.Value); err == nil {
- // Register two links, one for the package
- // and one for the qualified identifier.
- linkMap[x] = link{path: path}
- linkMap[n.Sel] = link{path: path, name: n.Sel.Name}
- }
- }
- }
- }
- case *ast.CompositeLit:
- // Detect field names within composite literals. These links should
- // be prefixed by the type name.
- fieldPath := ""
- prefix := ""
- switch typ := n.Type.(type) {
- case *ast.Ident:
- prefix = typ.Name + "."
- case *ast.SelectorExpr:
- if x, _ := typ.X.(*ast.Ident); x != nil {
- // Create links only if x is a qualified identifier.
- if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
- if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
- // spec.Path.Value is the import path
- if path, err := strconv.Unquote(spec.Path.Value); err == nil {
- // Register two links, one for the package
- // and one for the qualified identifier.
- linkMap[x] = link{path: path}
- linkMap[typ.Sel] = link{path: path, name: typ.Sel.Name}
- fieldPath = path
- prefix = typ.Sel.Name + "."
- }
- }
- }
- }
- }
- for _, e := range n.Elts {
- if kv, ok := e.(*ast.KeyValueExpr); ok {
- if k, ok := kv.Key.(*ast.Ident); ok {
- // Note: there is some syntactic ambiguity here. We cannot determine
- // if this is a struct literal or a map literal without type
- // information. We assume struct literal.
- name := prefix + k.Name
- linkMap[k] = link{path: fieldPath, name: name}
- }
- }
- }
- case *ast.Ident:
- if l, ok := linkMap[n]; ok {
- links = append(links, l)
- } else {
- l := link{name: n.Name}
- if n.Obj == nil && doc.IsPredeclared(n.Name) {
- l.path = builtinPkgPath
- }
- links = append(links, l)
- }
- }
- return true
- })
- return
-}
diff --git a/internal/godoc/server.go b/internal/godoc/server.go
index b16449c..2cf200b 100644
--- a/internal/godoc/server.go
+++ b/internal/godoc/server.go
@@ -25,10 +25,14 @@
"os"
pathpkg "path"
"path/filepath"
+ "regexp"
"sort"
+ "strconv"
"strings"
"text/template"
"time"
+
+ "golang.org/x/website/internal/texthtml"
)
// handlerServer is a migration from an old godoc http Handler type.
@@ -555,6 +559,23 @@
return
}
+var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
+
+// rangeSelection computes the Selection for a text range described
+// by the argument str, of the form Start:End, where Start and End
+// are decimal byte offsets.
+func rangeSelection(str string) texthtml.Selection {
+ m := selRx.FindStringSubmatch(str)
+ if len(m) >= 2 {
+ from, _ := strconv.Atoi(m[1])
+ to, _ := strconv.Atoi(m[2])
+ if from < to {
+ return texthtml.Spans(texthtml.Span{Start: from, End: to})
+ }
+ }
+ return nil
+}
+
func (p *Presentation) serveTextFile(w http.ResponseWriter, r *http.Request, abspath, relpath, title string) {
src, err := fs.ReadFile(p.Corpus.fs, toFS(abspath))
if err != nil {
@@ -568,19 +589,18 @@
return
}
- h := r.FormValue("h")
- s := RangeSelection(r.FormValue("s"))
+ cfg := texthtml.Config{
+ GoComments: pathpkg.Ext(abspath) == ".go",
+ Highlight: r.FormValue("h"),
+ Selection: rangeSelection(r.FormValue("s")),
+ Line: 1,
+ }
var buf bytes.Buffer
- if pathpkg.Ext(abspath) == ".go" {
- buf.WriteString("<pre>")
- formatGoSource(&buf, src, h, s)
- buf.WriteString("</pre>")
- } else {
- buf.WriteString("<pre>")
- FormatText(&buf, src, 1, false, h, s)
- buf.WriteString("</pre>")
- }
+ buf.WriteString("<pre>")
+ buf.Write(texthtml.Format(src, cfg))
+ buf.WriteString("</pre>")
+
fmt.Fprintf(&buf, `<p><a href="/%s?m=text">View as plain text</a></p>`, htmlpkg.EscapeString(relpath))
p.ServePage(w, Page{
@@ -592,49 +612,6 @@
})
}
-// formatGoSource HTML-escapes Go source text and writes it to w.
-func formatGoSource(buf *bytes.Buffer, text []byte, pattern string, selection Selection) {
- // Emit to a temp buffer so that we can add line anchors at the end.
- saved, buf := buf, new(bytes.Buffer)
-
- comments := tokenSelection(text, token.COMMENT)
- var highlights Selection
- if pattern != "" {
- highlights = regexpSelection(text, pattern)
- }
-
- FormatSelections(buf, text, nil, nil, selectionTag, comments, highlights, selection)
-
- // Now copy buf to saved, adding line anchors.
-
- // The lineSelection mechanism can't be composed with our
- // linkWriter, so we have to add line spans as another pass.
- n := 1
- for _, line := range bytes.Split(buf.Bytes(), []byte("\n")) {
- // The line numbers are inserted into the document via a CSS ::before
- // pseudo-element. This prevents them from being copied when users
- // highlight and copy text.
- // ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent
- // This is also the trick Github uses to hide line numbers.
- //
- // The first tab for the code snippet needs to start in column 9, so
- // it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab
- // character only indents a short amount.
- //
- // Due to rounding and font width Firefox might not treat 8 rendered
- // characters as 8 characters wide, and subsequently may treat the tab
- // character in the 9th position as moving the width from (7.5 or so) up
- // to 8. See
- // https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091
- // for a fuller explanation. The solution is to add a CSS class to
- // explicitly declare the width to be 8 characters.
- fmt.Fprintf(saved, `<span id="L%d" class="ln">%6d </span>`, n, n)
- n++
- saved.Write(line)
- saved.WriteByte('\n')
- }
-}
-
func (p *Presentation) serveDirectory(w http.ResponseWriter, r *http.Request, abspath, relpath string) {
if redirect(w, r) {
return
diff --git a/internal/godoc/template.go b/internal/godoc/template.go
index 3e52453..5baa140 100644
--- a/internal/godoc/template.go
+++ b/internal/godoc/template.go
@@ -41,6 +41,8 @@
"log"
"regexp"
"strings"
+
+ "golang.org/x/website/internal/texthtml"
)
// Functions in this file panic on error, but the panic is recovered
@@ -100,7 +102,7 @@
text = strings.Replace(text, "\t", " ", -1)
var buf bytes.Buffer
// HTML-escape text and syntax-color comments like elsewhere.
- FormatText(&buf, []byte(text), -1, true, "", nil)
+ buf.Write(texthtml.Format([]byte(text), texthtml.Config{GoComments: true}))
// Include the command as a comment.
text = fmt.Sprintf("<pre><!--{{%s}}\n-->%s</pre>", command, buf.Bytes())
return text, nil
diff --git a/internal/texthtml/ast.go b/internal/texthtml/ast.go
new file mode 100644
index 0000000..76bd4bb
--- /dev/null
+++ b/internal/texthtml/ast.go
@@ -0,0 +1,298 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package texthtml
+
+import (
+ "bytes"
+ "fmt"
+ "go/ast"
+ "go/doc"
+ "go/token"
+ "strconv"
+ "unicode"
+ "unicode/utf8"
+)
+
+// A goLink describes the (HTML) link information for a Go identifier.
+// The zero value of a link represents "no link".
+type goLink struct {
+ path, name string // package path, identifier name
+ isVal bool // identifier is defined in a const or var declaration
+}
+
+func (l *goLink) tags() (start, end string) {
+ switch {
+ case l.path != "" && l.name == "":
+ // package path
+ return `<a href="/pkg/` + l.path + `/">`, `</a>`
+ case l.path != "" && l.name != "":
+ // qualified identifier
+ return `<a href="/pkg/` + l.path + `/#` + l.name + `">`, `</a>`
+ case l.path == "" && l.name != "":
+ // local identifier
+ if l.isVal {
+ return `<span id="` + l.name + `">`, `</span>`
+ }
+ if ast.IsExported(l.name) {
+ return `<a href="#` + l.name + `">`, `</a>`
+ }
+ }
+ return "", ""
+}
+
+// goLinksFor returns the list of links for the identifiers used
+// by node in the same order as they appear in the source.
+func goLinksFor(node ast.Node) (links []goLink) {
+ // linkMap tracks link information for each ast.Ident node. Entries may
+ // be created out of source order (for example, when we visit a parent
+ // definition node). These links are appended to the returned slice when
+ // their ast.Ident nodes are visited.
+ linkMap := make(map[*ast.Ident]goLink)
+
+ ast.Inspect(node, func(node ast.Node) bool {
+ switch n := node.(type) {
+ case *ast.Field:
+ for _, n := range n.Names {
+ linkMap[n] = goLink{}
+ }
+ case *ast.ImportSpec:
+ if name := n.Name; name != nil {
+ linkMap[name] = goLink{}
+ }
+ case *ast.ValueSpec:
+ for _, n := range n.Names {
+ linkMap[n] = goLink{name: n.Name, isVal: true}
+ }
+ case *ast.FuncDecl:
+ linkMap[n.Name] = goLink{}
+ case *ast.TypeSpec:
+ linkMap[n.Name] = goLink{}
+ case *ast.AssignStmt:
+ // Short variable declarations only show up if we apply
+ // this code to all source code (as opposed to exported
+ // declarations only).
+ if n.Tok == token.DEFINE {
+ // Some of the lhs variables may be re-declared,
+ // so technically they are not defs. We don't
+ // care for now.
+ for _, x := range n.Lhs {
+ // Each lhs expression should be an
+ // ident, but we are conservative and check.
+ if n, _ := x.(*ast.Ident); n != nil {
+ linkMap[n] = goLink{isVal: true}
+ }
+ }
+ }
+ case *ast.SelectorExpr:
+ // Detect qualified identifiers of the form pkg.ident.
+ // If anything fails we return true and collect individual
+ // identifiers instead.
+ if x, _ := n.X.(*ast.Ident); x != nil {
+ // Create links only if x is a qualified identifier.
+ if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
+ if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
+ // spec.Path.Value is the import path
+ if path, err := strconv.Unquote(spec.Path.Value); err == nil {
+ // Register two links, one for the package
+ // and one for the qualified identifier.
+ linkMap[x] = goLink{path: path}
+ linkMap[n.Sel] = goLink{path: path, name: n.Sel.Name}
+ }
+ }
+ }
+ }
+ case *ast.CompositeLit:
+ // Detect field names within composite literals. These links should
+ // be prefixed by the type name.
+ fieldPath := ""
+ prefix := ""
+ switch typ := n.Type.(type) {
+ case *ast.Ident:
+ prefix = typ.Name + "."
+ case *ast.SelectorExpr:
+ if x, _ := typ.X.(*ast.Ident); x != nil {
+ // Create links only if x is a qualified identifier.
+ if obj := x.Obj; obj != nil && obj.Kind == ast.Pkg {
+ if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil {
+ // spec.Path.Value is the import path
+ if path, err := strconv.Unquote(spec.Path.Value); err == nil {
+ // Register two links, one for the package
+ // and one for the qualified identifier.
+ linkMap[x] = goLink{path: path}
+ linkMap[typ.Sel] = goLink{path: path, name: typ.Sel.Name}
+ fieldPath = path
+ prefix = typ.Sel.Name + "."
+ }
+ }
+ }
+ }
+ }
+ for _, e := range n.Elts {
+ if kv, ok := e.(*ast.KeyValueExpr); ok {
+ if k, ok := kv.Key.(*ast.Ident); ok {
+ // Note: there is some syntactic ambiguity here. We cannot determine
+ // if this is a struct literal or a map literal without type
+ // information. We assume struct literal.
+ name := prefix + k.Name
+ linkMap[k] = goLink{path: fieldPath, name: name}
+ }
+ }
+ }
+ case *ast.Ident:
+ if l, ok := linkMap[n]; ok {
+ links = append(links, l)
+ } else {
+ l := goLink{name: n.Name}
+ if n.Obj == nil && doc.IsPredeclared(n.Name) {
+ l.path = "builtin"
+ }
+ links = append(links, l)
+ }
+ }
+ return true
+ })
+ return
+}
+
+// postFormatAST makes any appropriate changes to the formatting of node in buf.
+// Specifically, it adds span links to each struct field, so they can be linked properly.
+// TODO(rsc): Why not do this as part of the linking above?
+func postFormatAST(buf *bytes.Buffer, node ast.Node) {
+ if st, name := isStructTypeDecl(node); st != nil {
+ addStructFieldIDAttributes(buf, name, st)
+ }
+}
+
+// isStructTypeDecl checks whether n is a struct declaration.
+// It either returns a non-nil StructType and its name, or zero values.
+func isStructTypeDecl(n ast.Node) (st *ast.StructType, name string) {
+ gd, ok := n.(*ast.GenDecl)
+ if !ok || gd.Tok != token.TYPE {
+ return nil, ""
+ }
+ if gd.Lparen > 0 {
+ // Parenthesized type. Who does that, anyway?
+ // TODO: Reportedly gri does. Fix this to handle that too.
+ return nil, ""
+ }
+ if len(gd.Specs) != 1 {
+ return nil, ""
+ }
+ ts, ok := gd.Specs[0].(*ast.TypeSpec)
+ if !ok {
+ return nil, ""
+ }
+ st, ok = ts.Type.(*ast.StructType)
+ if !ok {
+ return nil, ""
+ }
+ return st, ts.Name.Name
+}
+
+// addStructFieldIDAttributes modifies the contents of buf such that
+// all struct fields of the named struct have <span id='name.Field'>
+// in them, so people can link to /#Struct.Field.
+func addStructFieldIDAttributes(buf *bytes.Buffer, name string, st *ast.StructType) {
+ if st.Fields == nil {
+ return
+ }
+ // needsLink is a set of identifiers that still need to be
+ // linked, where value == key, to avoid an allocation in func
+ // linkedField.
+ needsLink := make(map[string]string)
+
+ for _, f := range st.Fields.List {
+ if len(f.Names) == 0 {
+ continue
+ }
+ fieldName := f.Names[0].Name
+ needsLink[fieldName] = fieldName
+ }
+ var newBuf bytes.Buffer
+ foreachLine(buf.Bytes(), func(line []byte) {
+ if fieldName := linkedField(line, needsLink); fieldName != "" {
+ fmt.Fprintf(&newBuf, `<span id="%s.%s"></span>`, name, fieldName)
+ delete(needsLink, fieldName)
+ }
+ newBuf.Write(line)
+ })
+ buf.Reset()
+ buf.Write(newBuf.Bytes())
+}
+
+// foreachLine calls fn for each line of in, where a line includes
+// the trailing "\n", except on the last line, if it doesn't exist.
+func foreachLine(in []byte, fn func(line []byte)) {
+ for len(in) > 0 {
+ nl := bytes.IndexByte(in, '\n')
+ if nl == -1 {
+ fn(in)
+ return
+ }
+ fn(in[:nl+1])
+ in = in[nl+1:]
+ }
+}
+
+// commentPrefix is the line prefix for comments after they've been HTMLified.
+var commentPrefix = []byte(`<span class="comment">// `)
+
+// linkedField determines whether the given line starts with an
+// identifier in the provided ids map (mapping from identifier to the
+// same identifier). The line can start with either an identifier or
+// an identifier in a comment. If one matches, it returns the
+// identifier that matched. Otherwise it returns the empty string.
+func linkedField(line []byte, ids map[string]string) string {
+ line = bytes.TrimSpace(line)
+
+ // For fields with a doc string of the
+ // conventional form, we put the new span into
+ // the comment instead of the field.
+ // The "conventional" form is a complete sentence
+ // per https://golang.org/s/style#comment-sentences like:
+ //
+ // // Foo is an optional Fooer to foo the foos.
+ // Foo Fooer
+ //
+ // In this case, we want the #StructName.Foo
+ // link to make the browser go to the comment
+ // line "Foo is an optional Fooer" instead of
+ // the "Foo Fooer" line, which could otherwise
+ // obscure the docs above the browser's "fold".
+ //
+ // TODO: do this better, so it works for all
+ // comments, including unconventional ones.
+ line = bytes.TrimPrefix(line, commentPrefix)
+ id := scanIdentifier(line)
+ if len(id) == 0 {
+ // No leading identifier. Avoid map lookup for
+ // somewhat common case.
+ return ""
+ }
+ return ids[string(id)]
+}
+
+// scanIdentifier scans a valid Go identifier off the front of v and
+// either returns a subslice of v if there's a valid identifier, or
+// returns a zero-length slice.
+func scanIdentifier(v []byte) []byte {
+ var n int // number of leading bytes of v belonging to an identifier
+ for {
+ r, width := utf8.DecodeRune(v[n:])
+ if !(isLetter(r) || n > 0 && isDigit(r)) {
+ break
+ }
+ n += width
+ }
+ return v[:n]
+}
+
+func isLetter(ch rune) bool {
+ return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
+}
+
+func isDigit(ch rune) bool {
+ return '0' <= ch && ch <= '9' || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
+}
diff --git a/internal/texthtml/texthtml.go b/internal/texthtml/texthtml.go
new file mode 100644
index 0000000..1175fe1
--- /dev/null
+++ b/internal/texthtml/texthtml.go
@@ -0,0 +1,355 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package texthtml formats text files to HTML.
+package texthtml
+
+import (
+ "bytes"
+ "fmt"
+ "go/ast"
+ "go/scanner"
+ "go/token"
+ "io"
+ "regexp"
+ "text/template"
+)
+
+// A Span describes a text span [start, end).
+// The zero value of a Span is an empty span.
+type Span struct {
+ Start, End int
+}
+
+func (s *Span) isEmpty() bool { return s.Start >= s.End }
+
+// A Selection is an "iterator" function returning a text span.
+// Repeated calls to a selection return consecutive, non-overlapping,
+// non-empty spans, followed by an infinite sequence of empty
+// spans. The first empty span marks the end of the selection.
+type Selection func() Span
+
+// A Config configures how to format text as HTML.
+type Config struct {
+ Line int // if >= 1, number lines beginning with number Line, with <span class="ln">
+ GoComments bool // mark comments in Go text with <span class="comment">
+ Highlight string // highlight matches for this regexp with <span class="highlight">
+ Selection Selection // mark selected spans with <span class="selection">
+ AST ast.Node // link uses to declarations, assuming text is formatting of AST
+}
+
+// Format formats text to HTML according to the configuration cfg.
+func Format(text []byte, cfg Config) (html []byte) {
+ var comments, highlights Selection
+ if cfg.GoComments {
+ comments = tokenSelection(text, token.COMMENT)
+ }
+ if cfg.Highlight != "" {
+ highlights = regexpSelection(text, cfg.Highlight)
+ }
+
+ var buf bytes.Buffer
+ var idents Selection = Spans()
+ var goLinks []goLink
+ if cfg.AST != nil {
+ idents = tokenSelection(text, token.IDENT)
+ goLinks = goLinksFor(cfg.AST)
+ }
+
+ formatSelections(&buf, text, goLinks, comments, highlights, cfg.Selection, idents)
+
+ if cfg.AST != nil {
+ postFormatAST(&buf, cfg.AST)
+ }
+
+ if cfg.Line > 0 {
+ // Add line numbers in a separate pass.
+ old := buf.Bytes()
+ buf = bytes.Buffer{}
+ n := cfg.Line
+ for _, line := range bytes.Split(old, []byte("\n")) {
+ // The line numbers are inserted into the document via a CSS ::before
+ // pseudo-element. This prevents them from being copied when users
+ // highlight and copy text.
+ // ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent
+ // This is also the trick Github uses to hide line numbers.
+ //
+ // The first tab for the code snippet needs to start in column 9, so
+ // it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab
+ // character only indents a short amount.
+ //
+ // Due to rounding and font width Firefox might not treat 8 rendered
+ // characters as 8 characters wide, and subsequently may treat the tab
+ // character in the 9th position as moving the width from (7.5 or so) up
+ // to 8. See
+ // https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091
+ // for a fuller explanation. The solution is to add a CSS class to
+ // explicitly declare the width to be 8 characters.
+ fmt.Fprintf(&buf, `<span id="L%d" class="ln">%6d </span>`, n, n)
+ n++
+ buf.Write(line)
+ buf.WriteByte('\n')
+ }
+ }
+ return buf.Bytes()
+}
+
+// formatSelections takes a text and writes it to w using link and span
+// writers lw and sw as follows: lw is invoked for consecutive span starts
+// and ends as specified through the links selection, and sw is invoked for
+// consecutive spans of text overlapped by the same selections as specified
+// by selections.
+func formatSelections(w io.Writer, text []byte, goLinks []goLink, selections ...Selection) {
+ // compute the sequence of consecutive span changes
+ changes := newMerger(selections)
+
+ // The i'th bit in bitset indicates that the text
+ // at the current offset is covered by selections[i].
+ bitset := 0
+ lastOffs := 0
+
+ // Text spans are written in a delayed fashion
+ // such that consecutive spans belonging to the
+ // same selection can be combined (peephole optimization).
+ // last describes the last span which has not yet been written.
+ var last struct {
+ begin, end int // valid if begin < end
+ bitset int
+ }
+
+ // flush writes the last delayed text span
+ flush := func() {
+ if last.begin < last.end {
+ selectionTag(w, text[last.begin:last.end], last.bitset)
+ }
+ last.begin = last.end // invalidate last
+ }
+
+ // span runs the span [lastOffs, end) with the selection
+ // indicated by bitset through the span peephole optimizer.
+ span := func(end int) {
+ if lastOffs < end { // ignore empty spans
+ if last.end != lastOffs || last.bitset != bitset {
+ // the last span is not adjacent to or
+ // differs from the new one
+ flush()
+ // start a new span
+ last.begin = lastOffs
+ }
+ last.end = end
+ last.bitset = bitset
+ }
+ }
+
+ linkEnd := ""
+ for {
+ // get the next span change
+ index, offs, start := changes.next()
+ if index < 0 || offs > len(text) {
+ // no more span changes or the next change
+ // is past the end of the text - we're done
+ break
+ }
+
+ // format the previous selection span, determine
+ // the new selection bitset and start a new span
+ span(offs)
+ if index == 3 { // Go link
+ flush()
+ if start {
+ if len(goLinks) > 0 {
+ start, end := goLinks[0].tags()
+ io.WriteString(w, start)
+ linkEnd = end
+ goLinks = goLinks[1:]
+ }
+ } else {
+ if linkEnd != "" {
+ io.WriteString(w, linkEnd)
+ linkEnd = ""
+ }
+ }
+ } else {
+ mask := 1 << uint(index)
+ if start {
+ bitset |= mask
+ } else {
+ bitset &^= mask
+ }
+ }
+ lastOffs = offs
+ }
+ span(len(text))
+ flush()
+}
+
+// A merger merges a slice of Selections and produces a sequence of
+// consecutive span change events through repeated next() calls.
+type merger struct {
+ selections []Selection
+ spans []Span // spans[i] is the next span of selections[i]
+}
+
+const infinity int = 2e9
+
+func newMerger(selections []Selection) *merger {
+ spans := make([]Span, len(selections))
+ for i, sel := range selections {
+ spans[i] = Span{infinity, infinity}
+ if sel != nil {
+ if seg := sel(); !seg.isEmpty() {
+ spans[i] = seg
+ }
+ }
+ }
+ return &merger{selections, spans}
+}
+
+// next returns the next span change: index specifies the Selection
+// to which the span belongs, offs is the span start or end offset
+// as determined by the start value. If there are no more span changes,
+// next returns an index value < 0.
+func (m *merger) next() (index, offs int, start bool) {
+ // find the next smallest offset where a span starts or ends
+ offs = infinity
+ index = -1
+ for i, seg := range m.spans {
+ switch {
+ case seg.Start < offs:
+ offs = seg.Start
+ index = i
+ start = true
+ case seg.End < offs:
+ offs = seg.End
+ index = i
+ start = false
+ }
+ }
+ if index < 0 {
+ // no offset found => all selections merged
+ return
+ }
+ // offset found - it's either the start or end offset but
+ // either way it is ok to consume the start offset: set it
+ // to infinity so it won't be considered in the following
+ // next call
+ m.spans[index].Start = infinity
+ if start {
+ return
+ }
+ // end offset found - consume it
+ m.spans[index].End = infinity
+ // advance to the next span for that selection
+ seg := m.selections[index]()
+ if !seg.isEmpty() {
+ m.spans[index] = seg
+ }
+ return
+}
+
+// lineSelection returns the line spans for text as a Selection.
+func lineSelection(text []byte) Selection {
+ i, j := 0, 0
+ return func() (seg Span) {
+ // find next newline, if any
+ for j < len(text) {
+ j++
+ if text[j-1] == '\n' {
+ break
+ }
+ }
+ if i < j {
+ // text[i:j] constitutes a line
+ seg = Span{i, j}
+ i = j
+ }
+ return
+ }
+}
+
+// tokenSelection returns, as a selection, the sequence of
+// consecutive occurrences of token sel in the Go src text.
+func tokenSelection(src []byte, sel token.Token) Selection {
+ var s scanner.Scanner
+ fset := token.NewFileSet()
+ file := fset.AddFile("", fset.Base(), len(src))
+ s.Init(file, src, nil, scanner.ScanComments)
+ return func() (seg Span) {
+ for {
+ pos, tok, lit := s.Scan()
+ if tok == token.EOF {
+ break
+ }
+ offs := file.Offset(pos)
+ if tok == sel {
+ seg = Span{offs, offs + len(lit)}
+ break
+ }
+ }
+ return
+ }
+}
+
+// Spans is a helper function to make a Selection from a slice of spans.
+// Empty spans are discarded.
+func Spans(spans ...Span) Selection {
+ i := 0
+ return func() Span {
+ for i < len(spans) {
+ s := spans[i]
+ i++
+ if s.Start < s.End {
+ // non-empty
+ return s
+ }
+ }
+ return Span{}
+ }
+}
+
+// regexpSelection computes the Selection for the regular expression expr in text.
+func regexpSelection(text []byte, expr string) Selection {
+ var matches [][]int
+ if rx, err := regexp.Compile(expr); err == nil {
+ matches = rx.FindAllIndex(text, -1)
+ }
+ var spans []Span
+ for _, m := range matches {
+ spans = append(spans, Span{m[0], m[1]})
+ }
+ return Spans(spans...)
+}
+
+// Span tags for all the possible selection combinations that may
+// be generated by FormatText. Selections are indicated by a bitset,
+// and the value of the bitset specifies the tag to be used.
+//
+// bit 0: comments
+// bit 1: highlights
+// bit 2: selections
+//
+var startTags = [][]byte{
+ /* 000 */ []byte(``),
+ /* 001 */ []byte(`<span class="comment">`),
+ /* 010 */ []byte(`<span class="highlight">`),
+ /* 011 */ []byte(`<span class="highlight-comment">`),
+ /* 100 */ []byte(`<span class="selection">`),
+ /* 101 */ []byte(`<span class="selection-comment">`),
+ /* 110 */ []byte(`<span class="selection-highlight">`),
+ /* 111 */ []byte(`<span class="selection-highlight-comment">`),
+}
+
+var endTag = []byte(`</span>`)
+
+func selectionTag(w io.Writer, text []byte, selections int) {
+ if selections < len(startTags) {
+ if tag := startTags[selections]; len(tag) > 0 {
+ w.Write(tag)
+ template.HTMLEscape(w, text)
+ w.Write(endTag)
+ return
+ }
+ }
+ template.HTMLEscape(w, text)
+}