internal/texthtml/texthtml.go - website - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package texthtml formats text files to HTML.
 package texthtml

 import (
 	"bytes"
 	"fmt"
 	"golang.org/x/website/internal/backport/go/ast"
 	"golang.org/x/website/internal/backport/go/scanner"
 	"golang.org/x/website/internal/backport/go/token"
 	"io"
 	"regexp"
 	"text/template"
 )

 // A Span describes a text span [start, end).
 // The zero value of a Span is an empty span.
 type Span struct {
 	Start, End int
 }

 func (s *Span) isEmpty() bool { return s.Start >= s.End }

 // A Selection is an "iterator" function returning a text span.
 // Repeated calls to a selection return consecutive, non-overlapping,
 // non-empty spans, followed by an infinite sequence of empty
 // spans. The first empty span marks the end of the selection.
 type Selection func() Span

 // A Config configures how to format text as HTML.
 type Config struct {
 	Line       int       // if >= 1, number lines beginning with number Line, with <span class="ln">
 	GoComments bool      // mark comments in Go text with <span class="comment">
 	Playground bool      // format for playground sample
 	Highlight  string    // highlight matches for this regexp with <span class="highlight">
 	HL         string    // highlight lines that end with // HL (x/tools/present convention)
 	Selection  Selection // mark selected spans with <span class="selection">
 	AST        ast.Node  // link uses to declarations, assuming text is formatting of AST
 	OldDocs    bool      // emit links to ?m=old docs
 }

 // Format formats text to HTML according to the configuration cfg.
 func Format(text []byte, cfg Config) (html []byte) {
 	var comments, highlights Selection
 	if cfg.GoComments {
 		comments = tokenSelection(text, token.COMMENT)
 	}
 	if cfg.Highlight != "" {
 		highlights = regexpSelection(text, cfg.Highlight)
 	}
 	if cfg.HL != "" {
 		highlights = hlSelection(text, cfg.HL)
 	}

 	var buf bytes.Buffer
 	var idents Selection = Spans()
 	var goLinks []goLink
 	if cfg.AST != nil {
 		idents = tokenSelection(text, token.IDENT)
 		goLinks = goLinksFor(cfg.AST)
 		if cfg.OldDocs {
 			for i := range goLinks {
 				goLinks[i].oldDocs = true
 			}
 		}
 	}

 	formatSelections(&buf, text, goLinks, comments, highlights, cfg.Selection, idents)

 	if cfg.AST != nil {
 		postFormatAST(&buf, cfg.AST)
 	}

 	trimSpaces(&buf)

 	if cfg.Line > 0 {
 		// Add line numbers in a separate pass.
 		old := buf.Bytes()
 		buf = bytes.Buffer{}
 		n := cfg.Line
 		for _, line := range bytes.Split(old, []byte("\n")) {
 			// The line numbers are inserted into the document via a CSS ::before
 			// pseudo-element. This prevents them from being copied when users
 			// highlight and copy text.
 			// ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent
 			// This is also the trick Github uses to hide line numbers.
 			//
 			// The first tab for the code snippet needs to start in column 9, so
 			// it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab
 			// character only indents a short amount.
 			//
 			// Due to rounding and font width Firefox might not treat 8 rendered
 			// characters as 8 characters wide, and subsequently may treat the tab
 			// character in the 9th position as moving the width from (7.5 or so) up
 			// to 8. See
 			// https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091
 			// for a fuller explanation. The solution is to add a CSS class to
 			// explicitly declare the width to be 8 characters.
 			if cfg.Playground {
 				fmt.Fprintf(&buf, `<span class="number">%2d&nbsp;&nbsp;</span>`, n)
 			} else {
 				fmt.Fprintf(&buf, `<span id="L%d" class="ln">%6d&nbsp;&nbsp;</span>`, n, n)
 			}
 			n++
 			buf.Write(line)
 			buf.WriteByte('\n')
 		}
 	}
 	return buf.Bytes()
 }

 // formatSelections takes a text and writes it to w using link and span
 // writers lw and sw as follows: lw is invoked for consecutive span starts
 // and ends as specified through the links selection, and sw is invoked for
 // consecutive spans of text overlapped by the same selections as specified
 // by selections.
 func formatSelections(w io.Writer, text []byte, goLinks []goLink, selections ...Selection) {
 	// compute the sequence of consecutive span changes
 	changes := newMerger(selections)

 	// The i'th bit in bitset indicates that the text
 	// at the current offset is covered by selections[i].
 	bitset := 0
 	lastOffs := 0

 	// Text spans are written in a delayed fashion
 	// such that consecutive spans belonging to the
 	// same selection can be combined (peephole optimization).
 	// last describes the last span which has not yet been written.
 	var last struct {
 		begin, end int // valid if begin < end
 		bitset     int
 	}

 	// flush writes the last delayed text span
 	flush := func() {
 		if last.begin < last.end {
 			selectionTag(w, text[last.begin:last.end], last.bitset)
 		}
 		last.begin = last.end // invalidate last
 	}

 	// span runs the span [lastOffs, end) with the selection
 	// indicated by bitset through the span peephole optimizer.
 	span := func(end int) {
 		if lastOffs < end { // ignore empty spans
 			if last.end != lastOffs || last.bitset != bitset {
 				// the last span is not adjacent to or
 				// differs from the new one
 				flush()
 				// start a new span
 				last.begin = lastOffs
 			}
 			last.end = end
 			last.bitset = bitset
 		}
 	}

 	linkEnd := ""
 	for {
 		// get the next span change
 		index, offs, start := changes.next()
 		if index < 0 || offs > len(text) {
 			// no more span changes or the next change
 			// is past the end of the text - we're done
 			break
 		}

 		// format the previous selection span, determine
 		// the new selection bitset and start a new span
 		span(offs)
 		if index == 3 { // Go link
 			flush()
 			if start {
 				if len(goLinks) > 0 {
 					start, end := goLinks[0].tags()
 					io.WriteString(w, start)
 					linkEnd = end
 					goLinks = goLinks[1:]
 				}
 			} else {
 				if linkEnd != "" {
 					io.WriteString(w, linkEnd)
 					linkEnd = ""
 				}
 			}
 		} else {
 			mask := 1 << uint(index)
 			if start {
 				bitset |= mask
 			} else {
 				bitset &^= mask
 			}
 		}
 		lastOffs = offs
 	}
 	span(len(text))
 	flush()
 }

 // A merger merges a slice of Selections and produces a sequence of
 // consecutive span change events through repeated next() calls.
 type merger struct {
 	selections []Selection
 	spans      []Span // spans[i] is the next span of selections[i]
 }

 const infinity int = 2e9

 func newMerger(selections []Selection) *merger {
 	spans := make([]Span, len(selections))
 	for i, sel := range selections {
 		spans[i] = Span{infinity, infinity}
 		if sel != nil {
 			if seg := sel(); !seg.isEmpty() {
 				spans[i] = seg
 			}
 		}
 	}
 	return &merger{selections, spans}
 }

 // next returns the next span change: index specifies the Selection
 // to which the span belongs, offs is the span start or end offset
 // as determined by the start value. If there are no more span changes,
 // next returns an index value < 0.
 func (m *merger) next() (index, offs int, start bool) {
 	// find the next smallest offset where a span starts or ends
 	offs = infinity
 	index = -1
 	for i, seg := range m.spans {
 		switch {
 		case seg.Start < offs:
 			offs = seg.Start
 			index = i
 			start = true
 		case seg.End < offs:
 			offs = seg.End
 			index = i
 			start = false
 		}
 	}
 	if index < 0 {
 		// no offset found => all selections merged
 		return
 	}
 	// offset found - it's either the start or end offset but
 	// either way it is ok to consume the start offset: set it
 	// to infinity so it won't be considered in the following
 	// next call
 	m.spans[index].Start = infinity
 	if start {
 		return
 	}
 	// end offset found - consume it
 	m.spans[index].End = infinity
 	// advance to the next span for that selection
 	seg := m.selections[index]()
 	if !seg.isEmpty() {
 		m.spans[index] = seg
 	}
 	return
 }

 // lineSelection returns the line spans for text as a Selection.
 func lineSelection(text []byte) Selection {
 	i, j := 0, 0
 	return func() (seg Span) {
 		// find next newline, if any
 		for j < len(text) {
 			j++
 			if text[j-1] == '\n' {
 				break
 			}
 		}
 		if i < j {
 			// text[i:j] constitutes a line
 			seg = Span{i, j}
 			i = j
 		}
 		return
 	}
 }

 // tokenSelection returns, as a selection, the sequence of
 // consecutive occurrences of token sel in the Go src text.
 func tokenSelection(src []byte, sel token.Token) Selection {
 	var s scanner.Scanner
 	fset := token.NewFileSet()
 	file := fset.AddFile("", fset.Base(), len(src))
 	s.Init(file, src, nil, scanner.ScanComments)
 	return func() (seg Span) {
 		for {
 			pos, tok, lit := s.Scan()
 			if tok == token.EOF {
 				break
 			}
 			offs := file.Offset(pos)
 			if tok == sel {
 				seg = Span{offs, offs + len(lit)}
 				break
 			}
 		}
 		return
 	}
 }

 // Spans is a helper function to make a Selection from a slice of spans.
 // Empty spans are discarded.
 func Spans(spans ...Span) Selection {
 	i := 0
 	return func() Span {
 		for i < len(spans) {
 			s := spans[i]
 			i++
 			if s.Start < s.End {
 				// non-empty
 				return s
 			}
 		}
 		return Span{}
 	}
 }

 var hlRE = regexp.MustCompile(`(?m)\s*(.+)(\s+// (HL[a-zA-Z0-9_]*))$`)

 // hlSelection returns the Selection for lines ending in // hl in text,
 // also removing any // HLxxx from the text (overwriting with spaces)
 func hlSelection(text []byte, hl string) Selection {
 	lines := bytes.SplitAfter(text, []byte("\n"))
 	off := 0
 	var spans []Span
 	for _, line := range lines {
 		if m := hlRE.FindSubmatchIndex(line); m != nil {
 			if string(line[m[6]:m[7]]) == hl {
 				spans = append(spans, Span{off + m[2], off + m[3]})
 			}
 			for i := m[4]; i < m[5]; i++ {
 				line[i] = ' '
 			}
 		}
 		off += len(line)
 	}
 	return Spans(spans...)
 }

 // regexpSelection computes the Selection for the regular expression expr in text.
 func regexpSelection(text []byte, expr string) Selection {
 	var matches [][]int
 	if rx, err := regexp.Compile(expr); err == nil {
 		matches = rx.FindAllIndex(text, -1)
 	}
 	var spans []Span
 	for _, m := range matches {
 		spans = append(spans, Span{m[0], m[1]})
 	}
 	return Spans(spans...)
 }

 // Span tags for all the possible selection combinations that may
 // be generated by FormatText. Selections are indicated by a bitset,
 // and the value of the bitset specifies the tag to be used.
 //
 // bit 0: comments
 // bit 1: highlights
 // bit 2: selections
 var startTags = [][]byte{
 	/* 000 */ []byte(``),
 	/* 001 */ []byte(`<span class="comment">`),
 	/* 010 */ []byte(`<span class="highlight">`),
 	/* 011 */ []byte(`<span class="highlight-comment">`),
 	/* 100 */ []byte(`<span class="selection">`),
 	/* 101 */ []byte(`<span class="selection-comment">`),
 	/* 110 */ []byte(`<span class="selection-highlight">`),
 	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
 }

 var endTag = []byte(`</span>`)

 func selectionTag(w io.Writer, text []byte, selections int) {
 	if selections < len(startTags) {
 		if tag := startTags[selections]; len(tag) > 0 {
 			w.Write(tag)
 			template.HTMLEscape(w, text)
 			w.Write(endTag)
 			return
 		}
 	}
 	template.HTMLEscape(w, text)
 }

 // trimSpaces removes trailing spaces at the end of each line in buf.
 func trimSpaces(buf *bytes.Buffer) {
 	data := buf.Bytes()
 	out := data[:0]
 	for len(data) > 0 {
 		j := bytes.IndexByte(data, '\n')
 		if j < 0 {
 			j = len(data)
 		}
 		var line []byte
 		line, data = data[:j], data[j:]
 		for len(line) > 0 && (line[len(line)-1] == ' ' || line[len(line)-1] == '\t') {
 			line = line[:len(line)-1]
 		}
 		out = append(out, line...)
 		if len(data) > 0 {
 			out = append(out, '\n')
 			data = data[1:]
 		}
 	}
 	buf.Truncate(len(out))
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package texthtml formats text files to HTML.
	package texthtml

	import (
	"bytes"
	"fmt"
	"golang.org/x/website/internal/backport/go/ast"
	"golang.org/x/website/internal/backport/go/scanner"
	"golang.org/x/website/internal/backport/go/token"
	"io"
	"regexp"
	"text/template"
	)

	// A Span describes a text span [start, end).
	// The zero value of a Span is an empty span.
	type Span struct {
	Start, End int
	}

	func (s *Span) isEmpty() bool { return s.Start >= s.End }

	// A Selection is an "iterator" function returning a text span.
	// Repeated calls to a selection return consecutive, non-overlapping,
	// non-empty spans, followed by an infinite sequence of empty
	// spans. The first empty span marks the end of the selection.
	type Selection func() Span

	// A Config configures how to format text as HTML.
	type Config struct {
	Line int // if >= 1, number lines beginning with number Line, with <span class="ln">
	GoComments bool // mark comments in Go text with <span class="comment">
	Playground bool // format for playground sample
	Highlight string // highlight matches for this regexp with <span class="highlight">
	HL string // highlight lines that end with // HL (x/tools/present convention)
	Selection Selection // mark selected spans with <span class="selection">
	AST ast.Node // link uses to declarations, assuming text is formatting of AST
	OldDocs bool // emit links to ?m=old docs
	}

	// Format formats text to HTML according to the configuration cfg.
	func Format(text []byte, cfg Config) (html []byte) {
	var comments, highlights Selection
	if cfg.GoComments {
	comments = tokenSelection(text, token.COMMENT)
	}
	if cfg.Highlight != "" {
	highlights = regexpSelection(text, cfg.Highlight)
	}
	if cfg.HL != "" {
	highlights = hlSelection(text, cfg.HL)
	}

	var buf bytes.Buffer
	var idents Selection = Spans()
	var goLinks []goLink
	if cfg.AST != nil {
	idents = tokenSelection(text, token.IDENT)
	goLinks = goLinksFor(cfg.AST)
	if cfg.OldDocs {
	for i := range goLinks {
	goLinks[i].oldDocs = true
	}
	}
	}

	formatSelections(&buf, text, goLinks, comments, highlights, cfg.Selection, idents)

	if cfg.AST != nil {
	postFormatAST(&buf, cfg.AST)
	}

	trimSpaces(&buf)

	if cfg.Line > 0 {
	// Add line numbers in a separate pass.
	old := buf.Bytes()
	buf = bytes.Buffer{}
	n := cfg.Line
	for _, line := range bytes.Split(old, []byte("\n")) {
	// The line numbers are inserted into the document via a CSS ::before
	// pseudo-element. This prevents them from being copied when users
	// highlight and copy text.
	// ::before is supported in 98% of browsers: https://caniuse.com/#feat=css-gencontent
	// This is also the trick Github uses to hide line numbers.
	//
	// The first tab for the code snippet needs to start in column 9, so
	// it indents a full 8 spaces, hence the two nbsp's. Otherwise the tab
	// character only indents a short amount.
	//
	// Due to rounding and font width Firefox might not treat 8 rendered
	// characters as 8 characters wide, and subsequently may treat the tab
	// character in the 9th position as moving the width from (7.5 or so) up
	// to 8. See
	// https://github.com/webcompat/web-bugs/issues/17530#issuecomment-402675091
	// for a fuller explanation. The solution is to add a CSS class to
	// explicitly declare the width to be 8 characters.
	if cfg.Playground {
	fmt.Fprintf(&buf, `<span class="number">%2d  </span>`, n)
	} else {
	fmt.Fprintf(&buf, `<span id="L%d" class="ln">%6d  </span>`, n, n)
	}
	n++
	buf.Write(line)
	buf.WriteByte('\n')
	}
	}
	return buf.Bytes()
	}

	// formatSelections takes a text and writes it to w using link and span
	// writers lw and sw as follows: lw is invoked for consecutive span starts
	// and ends as specified through the links selection, and sw is invoked for
	// consecutive spans of text overlapped by the same selections as specified
	// by selections.
	func formatSelections(w io.Writer, text []byte, goLinks []goLink, selections ...Selection) {
	// compute the sequence of consecutive span changes
	changes := newMerger(selections)

	// The i'th bit in bitset indicates that the text
	// at the current offset is covered by selections[i].
	bitset := 0
	lastOffs := 0

	// Text spans are written in a delayed fashion
	// such that consecutive spans belonging to the
	// same selection can be combined (peephole optimization).
	// last describes the last span which has not yet been written.
	var last struct {
	begin, end int // valid if begin < end
	bitset int
	}

	// flush writes the last delayed text span
	flush := func() {
	if last.begin < last.end {
	selectionTag(w, text[last.begin:last.end], last.bitset)
	}
	last.begin = last.end // invalidate last
	}

	// span runs the span [lastOffs, end) with the selection
	// indicated by bitset through the span peephole optimizer.
	span := func(end int) {
	if lastOffs < end { // ignore empty spans
	if last.end != lastOffs \|\| last.bitset != bitset {
	// the last span is not adjacent to or
	// differs from the new one
	flush()
	// start a new span
	last.begin = lastOffs
	}
	last.end = end
	last.bitset = bitset
	}
	}

	linkEnd := ""
	for {
	// get the next span change
	index, offs, start := changes.next()
	if index < 0 \|\| offs > len(text) {
	// no more span changes or the next change
	// is past the end of the text - we're done
	break
	}

	// format the previous selection span, determine
	// the new selection bitset and start a new span
	span(offs)
	if index == 3 { // Go link
	flush()
	if start {
	if len(goLinks) > 0 {
	start, end := goLinks[0].tags()
	io.WriteString(w, start)
	linkEnd = end
	goLinks = goLinks[1:]
	}
	} else {
	if linkEnd != "" {
	io.WriteString(w, linkEnd)
	linkEnd = ""
	}
	}
	} else {
	mask := 1 << uint(index)
	if start {
	bitset \|= mask
	} else {
	bitset &^= mask
	}
	}
	lastOffs = offs
	}
	span(len(text))
	flush()
	}

	// A merger merges a slice of Selections and produces a sequence of
	// consecutive span change events through repeated next() calls.
	type merger struct {
	selections []Selection
	spans []Span // spans[i] is the next span of selections[i]
	}

	const infinity int = 2e9

	func newMerger(selections []Selection) *merger {
	spans := make([]Span, len(selections))
	for i, sel := range selections {
	spans[i] = Span{infinity, infinity}
	if sel != nil {
	if seg := sel(); !seg.isEmpty() {
	spans[i] = seg
	}
	}
	}
	return &merger{selections, spans}
	}

	// next returns the next span change: index specifies the Selection
	// to which the span belongs, offs is the span start or end offset
	// as determined by the start value. If there are no more span changes,
	// next returns an index value < 0.
	func (m *merger) next() (index, offs int, start bool) {
	// find the next smallest offset where a span starts or ends
	offs = infinity
	index = -1
	for i, seg := range m.spans {
	switch {
	case seg.Start < offs:
	offs = seg.Start
	index = i
	start = true
	case seg.End < offs:
	offs = seg.End
	index = i
	start = false
	}
	}
	if index < 0 {
	// no offset found => all selections merged
	return
	}
	// offset found - it's either the start or end offset but
	// either way it is ok to consume the start offset: set it
	// to infinity so it won't be considered in the following
	// next call
	m.spans[index].Start = infinity
	if start {
	return
	}
	// end offset found - consume it
	m.spans[index].End = infinity
	// advance to the next span for that selection
	seg := m.selections[index]()
	if !seg.isEmpty() {
	m.spans[index] = seg
	}
	return
	}

	// lineSelection returns the line spans for text as a Selection.
	func lineSelection(text []byte) Selection {
	i, j := 0, 0
	return func() (seg Span) {
	// find next newline, if any
	for j < len(text) {
	j++
	if text[j-1] == '\n' {
	break
	}
	}
	if i < j {
	// text[i:j] constitutes a line
	seg = Span{i, j}
	i = j
	}
	return
	}
	}

	// tokenSelection returns, as a selection, the sequence of
	// consecutive occurrences of token sel in the Go src text.
	func tokenSelection(src []byte, sel token.Token) Selection {
	var s scanner.Scanner
	fset := token.NewFileSet()
	file := fset.AddFile("", fset.Base(), len(src))
	s.Init(file, src, nil, scanner.ScanComments)
	return func() (seg Span) {
	for {
	pos, tok, lit := s.Scan()
	if tok == token.EOF {
	break
	}
	offs := file.Offset(pos)
	if tok == sel {
	seg = Span{offs, offs + len(lit)}
	break
	}
	}
	return
	}
	}

	// Spans is a helper function to make a Selection from a slice of spans.
	// Empty spans are discarded.
	func Spans(spans ...Span) Selection {
	i := 0
	return func() Span {
	for i < len(spans) {
	s := spans[i]
	i++
	if s.Start < s.End {
	// non-empty
	return s
	}
	}
	return Span{}
	}
	}

	var hlRE = regexp.MustCompile(`(?m)\s(.+)(\s+// (HL[a-zA-Z0-9_]))$`)

	// hlSelection returns the Selection for lines ending in // hl in text,
	// also removing any // HLxxx from the text (overwriting with spaces)
	func hlSelection(text []byte, hl string) Selection {
	lines := bytes.SplitAfter(text, []byte("\n"))
	off := 0
	var spans []Span
	for _, line := range lines {
	if m := hlRE.FindSubmatchIndex(line); m != nil {
	if string(line[m[6]:m[7]]) == hl {
	spans = append(spans, Span{off + m[2], off + m[3]})
	}
	for i := m[4]; i < m[5]; i++ {
	line[i] = ' '
	}
	}
	off += len(line)
	}
	return Spans(spans...)
	}

	// regexpSelection computes the Selection for the regular expression expr in text.
	func regexpSelection(text []byte, expr string) Selection {
	var matches [][]int
	if rx, err := regexp.Compile(expr); err == nil {
	matches = rx.FindAllIndex(text, -1)
	}
	var spans []Span
	for _, m := range matches {
	spans = append(spans, Span{m[0], m[1]})
	}
	return Spans(spans...)
	}

	// Span tags for all the possible selection combinations that may
	// be generated by FormatText. Selections are indicated by a bitset,
	// and the value of the bitset specifies the tag to be used.
	//
	// bit 0: comments
	// bit 1: highlights
	// bit 2: selections
	var startTags = [][]byte{
	/* 000 */ []byte(``),
	/* 001 */ []byte(`<span class="comment">`),
	/* 010 */ []byte(`<span class="highlight">`),
	/* 011 */ []byte(`<span class="highlight-comment">`),
	/* 100 */ []byte(`<span class="selection">`),
	/* 101 */ []byte(`<span class="selection-comment">`),
	/* 110 */ []byte(`<span class="selection-highlight">`),
	/* 111 */ []byte(`<span class="selection-highlight-comment">`),
	}

	var endTag = []byte(`</span>`)

	func selectionTag(w io.Writer, text []byte, selections int) {
	if selections < len(startTags) {
	if tag := startTags[selections]; len(tag) > 0 {
	w.Write(tag)
	template.HTMLEscape(w, text)
	w.Write(endTag)
	return
	}
	}
	template.HTMLEscape(w, text)
	}

	// trimSpaces removes trailing spaces at the end of each line in buf.
	func trimSpaces(buf *bytes.Buffer) {
	data := buf.Bytes()
	out := data[:0]
	for len(data) > 0 {
	j := bytes.IndexByte(data, '\n')
	if j < 0 {
	j = len(data)
	}
	var line []byte
	line, data = data[:j], data[j:]
	for len(line) > 0 && (line[len(line)-1] == ' ' \|\| line[len(line)-1] == '\t') {
	line = line[:len(line)-1]
	}
	out = append(out, line...)
	if len(data) > 0 {
	out = append(out, '\n')
	data = data[1:]
	}
	}
	buf.Truncate(len(out))
	}