Andrew Gerrand | d79f4fe | 2013-07-17 14:02:35 +1000 | [diff] [blame] | 1 | // Copyright 2011 The Go Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style |
| 3 | // license that can be found in the LICENSE file. |
| 4 | |
| 5 | // This file implements FormatSelections and FormatText. |
| 6 | // FormatText is used to HTML-format Go and non-Go source |
| 7 | // text with line numbers and highlighted sections. It is |
| 8 | // built on top of FormatSelections, a generic formatter |
| 9 | // for "selected" text. |
| 10 | |
Brad Fitzpatrick | e6ff53b | 2013-07-17 17:09:54 +1000 | [diff] [blame] | 11 | package godoc |
Andrew Gerrand | d79f4fe | 2013-07-17 14:02:35 +1000 | [diff] [blame] | 12 | |
| 13 | import ( |
| 14 | "fmt" |
| 15 | "go/scanner" |
| 16 | "go/token" |
| 17 | "io" |
| 18 | "regexp" |
| 19 | "strconv" |
| 20 | "text/template" |
| 21 | ) |
| 22 | |
| 23 | // ---------------------------------------------------------------------------- |
| 24 | // Implementation of FormatSelections |
| 25 | |
| 26 | // A Segment describes a text segment [start, end). |
| 27 | // The zero value of a Segment is a ready-to-use empty segment. |
| 28 | // |
| 29 | type Segment struct { |
| 30 | start, end int |
| 31 | } |
| 32 | |
| 33 | func (seg *Segment) isEmpty() bool { return seg.start >= seg.end } |
| 34 | |
| 35 | // A Selection is an "iterator" function returning a text segment. |
| 36 | // Repeated calls to a selection return consecutive, non-overlapping, |
| 37 | // non-empty segments, followed by an infinite sequence of empty |
| 38 | // segments. The first empty segment marks the end of the selection. |
| 39 | // |
| 40 | type Selection func() Segment |
| 41 | |
| 42 | // A LinkWriter writes some start or end "tag" to w for the text offset offs. |
| 43 | // It is called by FormatSelections at the start or end of each link segment. |
| 44 | // |
| 45 | type LinkWriter func(w io.Writer, offs int, start bool) |
| 46 | |
| 47 | // A SegmentWriter formats a text according to selections and writes it to w. |
| 48 | // The selections parameter is a bit set indicating which selections provided |
| 49 | // to FormatSelections overlap with the text segment: If the n'th bit is set |
| 50 | // in selections, the n'th selection provided to FormatSelections is overlapping |
| 51 | // with the text. |
| 52 | // |
| 53 | type SegmentWriter func(w io.Writer, text []byte, selections int) |
| 54 | |
| 55 | // FormatSelections takes a text and writes it to w using link and segment |
| 56 | // writers lw and sw as follows: lw is invoked for consecutive segment starts |
| 57 | // and ends as specified through the links selection, and sw is invoked for |
| 58 | // consecutive segments of text overlapped by the same selections as specified |
| 59 | // by selections. The link writer lw may be nil, in which case the links |
| 60 | // Selection is ignored. |
| 61 | // |
| 62 | func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) { |
| 63 | // If we have a link writer, make the links |
| 64 | // selection the last entry in selections |
| 65 | if lw != nil { |
| 66 | selections = append(selections, links) |
| 67 | } |
| 68 | |
| 69 | // compute the sequence of consecutive segment changes |
| 70 | changes := newMerger(selections) |
| 71 | |
| 72 | // The i'th bit in bitset indicates that the text |
| 73 | // at the current offset is covered by selections[i]. |
| 74 | bitset := 0 |
| 75 | lastOffs := 0 |
| 76 | |
| 77 | // Text segments are written in a delayed fashion |
| 78 | // such that consecutive segments belonging to the |
| 79 | // same selection can be combined (peephole optimization). |
| 80 | // last describes the last segment which has not yet been written. |
| 81 | var last struct { |
| 82 | begin, end int // valid if begin < end |
| 83 | bitset int |
| 84 | } |
| 85 | |
| 86 | // flush writes the last delayed text segment |
| 87 | flush := func() { |
| 88 | if last.begin < last.end { |
| 89 | sw(w, text[last.begin:last.end], last.bitset) |
| 90 | } |
| 91 | last.begin = last.end // invalidate last |
| 92 | } |
| 93 | |
| 94 | // segment runs the segment [lastOffs, end) with the selection |
| 95 | // indicated by bitset through the segment peephole optimizer. |
| 96 | segment := func(end int) { |
| 97 | if lastOffs < end { // ignore empty segments |
| 98 | if last.end != lastOffs || last.bitset != bitset { |
| 99 | // the last segment is not adjacent to or |
| 100 | // differs from the new one |
| 101 | flush() |
| 102 | // start a new segment |
| 103 | last.begin = lastOffs |
| 104 | } |
| 105 | last.end = end |
| 106 | last.bitset = bitset |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | for { |
| 111 | // get the next segment change |
| 112 | index, offs, start := changes.next() |
| 113 | if index < 0 || offs > len(text) { |
| 114 | // no more segment changes or the next change |
| 115 | // is past the end of the text - we're done |
| 116 | break |
| 117 | } |
| 118 | // determine the kind of segment change |
| 119 | if lw != nil && index == len(selections)-1 { |
| 120 | // we have a link segment change (see start of this function): |
| 121 | // format the previous selection segment, write the |
| 122 | // link tag and start a new selection segment |
| 123 | segment(offs) |
| 124 | flush() |
| 125 | lastOffs = offs |
| 126 | lw(w, offs, start) |
| 127 | } else { |
| 128 | // we have a selection change: |
| 129 | // format the previous selection segment, determine |
| 130 | // the new selection bitset and start a new segment |
| 131 | segment(offs) |
| 132 | lastOffs = offs |
| 133 | mask := 1 << uint(index) |
| 134 | if start { |
| 135 | bitset |= mask |
| 136 | } else { |
| 137 | bitset &^= mask |
| 138 | } |
| 139 | } |
| 140 | } |
| 141 | segment(len(text)) |
| 142 | flush() |
| 143 | } |
| 144 | |
| 145 | // A merger merges a slice of Selections and produces a sequence of |
| 146 | // consecutive segment change events through repeated next() calls. |
| 147 | // |
| 148 | type merger struct { |
| 149 | selections []Selection |
| 150 | segments []Segment // segments[i] is the next segment of selections[i] |
| 151 | } |
| 152 | |
| 153 | const infinity int = 2e9 |
| 154 | |
| 155 | func newMerger(selections []Selection) *merger { |
| 156 | segments := make([]Segment, len(selections)) |
| 157 | for i, sel := range selections { |
| 158 | segments[i] = Segment{infinity, infinity} |
| 159 | if sel != nil { |
| 160 | if seg := sel(); !seg.isEmpty() { |
| 161 | segments[i] = seg |
| 162 | } |
| 163 | } |
| 164 | } |
| 165 | return &merger{selections, segments} |
| 166 | } |
| 167 | |
| 168 | // next returns the next segment change: index specifies the Selection |
| 169 | // to which the segment belongs, offs is the segment start or end offset |
| 170 | // as determined by the start value. If there are no more segment changes, |
| 171 | // next returns an index value < 0. |
| 172 | // |
| 173 | func (m *merger) next() (index, offs int, start bool) { |
| 174 | // find the next smallest offset where a segment starts or ends |
| 175 | offs = infinity |
| 176 | index = -1 |
| 177 | for i, seg := range m.segments { |
| 178 | switch { |
| 179 | case seg.start < offs: |
| 180 | offs = seg.start |
| 181 | index = i |
| 182 | start = true |
| 183 | case seg.end < offs: |
| 184 | offs = seg.end |
| 185 | index = i |
| 186 | start = false |
| 187 | } |
| 188 | } |
| 189 | if index < 0 { |
| 190 | // no offset found => all selections merged |
| 191 | return |
| 192 | } |
| 193 | // offset found - it's either the start or end offset but |
| 194 | // either way it is ok to consume the start offset: set it |
| 195 | // to infinity so it won't be considered in the following |
| 196 | // next call |
| 197 | m.segments[index].start = infinity |
| 198 | if start { |
| 199 | return |
| 200 | } |
| 201 | // end offset found - consume it |
| 202 | m.segments[index].end = infinity |
| 203 | // advance to the next segment for that selection |
| 204 | seg := m.selections[index]() |
| 205 | if !seg.isEmpty() { |
| 206 | m.segments[index] = seg |
| 207 | } |
| 208 | return |
| 209 | } |
| 210 | |
| 211 | // ---------------------------------------------------------------------------- |
| 212 | // Implementation of FormatText |
| 213 | |
| 214 | // lineSelection returns the line segments for text as a Selection. |
| 215 | func lineSelection(text []byte) Selection { |
| 216 | i, j := 0, 0 |
| 217 | return func() (seg Segment) { |
| 218 | // find next newline, if any |
| 219 | for j < len(text) { |
| 220 | j++ |
| 221 | if text[j-1] == '\n' { |
| 222 | break |
| 223 | } |
| 224 | } |
| 225 | if i < j { |
| 226 | // text[i:j] constitutes a line |
| 227 | seg = Segment{i, j} |
| 228 | i = j |
| 229 | } |
| 230 | return |
| 231 | } |
| 232 | } |
| 233 | |
| 234 | // tokenSelection returns, as a selection, the sequence of |
| 235 | // consecutive occurrences of token sel in the Go src text. |
| 236 | // |
| 237 | func tokenSelection(src []byte, sel token.Token) Selection { |
| 238 | var s scanner.Scanner |
| 239 | fset := token.NewFileSet() |
| 240 | file := fset.AddFile("", fset.Base(), len(src)) |
| 241 | s.Init(file, src, nil, scanner.ScanComments) |
| 242 | return func() (seg Segment) { |
| 243 | for { |
| 244 | pos, tok, lit := s.Scan() |
| 245 | if tok == token.EOF { |
| 246 | break |
| 247 | } |
| 248 | offs := file.Offset(pos) |
| 249 | if tok == sel { |
| 250 | seg = Segment{offs, offs + len(lit)} |
| 251 | break |
| 252 | } |
| 253 | } |
| 254 | return |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | // makeSelection is a helper function to make a Selection from a slice of pairs. |
| 259 | // Pairs describing empty segments are ignored. |
| 260 | // |
| 261 | func makeSelection(matches [][]int) Selection { |
| 262 | i := 0 |
| 263 | return func() Segment { |
| 264 | for i < len(matches) { |
| 265 | m := matches[i] |
| 266 | i++ |
| 267 | if m[0] < m[1] { |
| 268 | // non-empty segment |
| 269 | return Segment{m[0], m[1]} |
| 270 | } |
| 271 | } |
| 272 | return Segment{} |
| 273 | } |
| 274 | } |
| 275 | |
| 276 | // regexpSelection computes the Selection for the regular expression expr in text. |
| 277 | func regexpSelection(text []byte, expr string) Selection { |
| 278 | var matches [][]int |
| 279 | if rx, err := regexp.Compile(expr); err == nil { |
| 280 | matches = rx.FindAllIndex(text, -1) |
| 281 | } |
| 282 | return makeSelection(matches) |
| 283 | } |
| 284 | |
| 285 | var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`) |
| 286 | |
Brad Fitzpatrick | e6ff53b | 2013-07-17 17:09:54 +1000 | [diff] [blame] | 287 | // RangeSelection computes the Selection for a text range described |
Andrew Gerrand | d79f4fe | 2013-07-17 14:02:35 +1000 | [diff] [blame] | 288 | // by the argument str; the range description must match the selRx |
| 289 | // regular expression. |
Brad Fitzpatrick | e6ff53b | 2013-07-17 17:09:54 +1000 | [diff] [blame] | 290 | func RangeSelection(str string) Selection { |
Andrew Gerrand | d79f4fe | 2013-07-17 14:02:35 +1000 | [diff] [blame] | 291 | m := selRx.FindStringSubmatch(str) |
| 292 | if len(m) >= 2 { |
| 293 | from, _ := strconv.Atoi(m[1]) |
| 294 | to, _ := strconv.Atoi(m[2]) |
| 295 | if from < to { |
| 296 | return makeSelection([][]int{{from, to}}) |
| 297 | } |
| 298 | } |
| 299 | return nil |
| 300 | } |
| 301 | |
| 302 | // Span tags for all the possible selection combinations that may |
| 303 | // be generated by FormatText. Selections are indicated by a bitset, |
| 304 | // and the value of the bitset specifies the tag to be used. |
| 305 | // |
| 306 | // bit 0: comments |
| 307 | // bit 1: highlights |
| 308 | // bit 2: selections |
| 309 | // |
| 310 | var startTags = [][]byte{ |
| 311 | /* 000 */ []byte(``), |
| 312 | /* 001 */ []byte(`<span class="comment">`), |
| 313 | /* 010 */ []byte(`<span class="highlight">`), |
| 314 | /* 011 */ []byte(`<span class="highlight-comment">`), |
| 315 | /* 100 */ []byte(`<span class="selection">`), |
| 316 | /* 101 */ []byte(`<span class="selection-comment">`), |
| 317 | /* 110 */ []byte(`<span class="selection-highlight">`), |
| 318 | /* 111 */ []byte(`<span class="selection-highlight-comment">`), |
| 319 | } |
| 320 | |
| 321 | var endTag = []byte(`</span>`) |
| 322 | |
| 323 | func selectionTag(w io.Writer, text []byte, selections int) { |
| 324 | if selections < len(startTags) { |
| 325 | if tag := startTags[selections]; len(tag) > 0 { |
| 326 | w.Write(tag) |
| 327 | template.HTMLEscape(w, text) |
| 328 | w.Write(endTag) |
| 329 | return |
| 330 | } |
| 331 | } |
| 332 | template.HTMLEscape(w, text) |
| 333 | } |
| 334 | |
| 335 | // FormatText HTML-escapes text and writes it to w. |
| 336 | // Consecutive text segments are wrapped in HTML spans (with tags as |
| 337 | // defined by startTags and endTag) as follows: |
| 338 | // |
| 339 | // - if line >= 0, line number (ln) spans are inserted before each line, |
| 340 | // starting with the value of line |
| 341 | // - if the text is Go source, comments get the "comment" span class |
| 342 | // - each occurrence of the regular expression pattern gets the "highlight" |
| 343 | // span class |
| 344 | // - text segments covered by selection get the "selection" span class |
| 345 | // |
| 346 | // Comments, highlights, and selections may overlap arbitrarily; the respective |
| 347 | // HTML span classes are specified in the startTags variable. |
| 348 | // |
| 349 | func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) { |
| 350 | var comments, highlights Selection |
| 351 | if goSource { |
| 352 | comments = tokenSelection(text, token.COMMENT) |
| 353 | } |
| 354 | if pattern != "" { |
| 355 | highlights = regexpSelection(text, pattern) |
| 356 | } |
| 357 | if line >= 0 || comments != nil || highlights != nil || selection != nil { |
| 358 | var lineTag LinkWriter |
| 359 | if line >= 0 { |
| 360 | lineTag = func(w io.Writer, _ int, start bool) { |
| 361 | if start { |
Agniva De Sarker | 7f7074d | 2019-02-02 15:50:54 +0530 | [diff] [blame] | 362 | fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>", line, line) |
Andrew Gerrand | d79f4fe | 2013-07-17 14:02:35 +1000 | [diff] [blame] | 363 | line++ |
| 364 | } |
| 365 | } |
| 366 | } |
| 367 | FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection) |
| 368 | } else { |
| 369 | template.HTMLEscape(w, text) |
| 370 | } |
| 371 | } |