blob: c9fb55bd54ef1cd4715e70442e1d987943eee0a8 [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Godoc comment extraction and comment -> HTML formatting.
package doc
import (
"text/template" // for HTMLEscape
func isWhitespace(ch byte) bool { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' }
func stripTrailingWhitespace(s string) string {
i := len(s)
for i > 0 && isWhitespace(s[i-1]) {
return s[0:i]
// CommentText returns the text of comment,
// with the comment markers - //, /*, and */ - removed.
func CommentText(comment *ast.CommentGroup) string {
if comment == nil {
return ""
comments := make([]string, len(comment.List))
for i, c := range comment.List {
comments[i] = string(c.Text)
lines := make([]string, 0, 10) // most comments are less than 10 lines
for _, c := range comments {
// Remove comment markers.
// The parser has given us exactly the comment text.
switch c[1] {
case '/':
//-style comment
c = c[2:]
// Remove leading space after //, if there is one.
// TODO(gri) This appears to be necessary in isolated
// cases (bignum.RatFromString) - why?
if len(c) > 0 && c[0] == ' ' {
c = c[1:]
case '*':
/*-style comment */
c = c[2 : len(c)-2]
// Split on newlines.
cl := strings.Split(c, "\n")
// Walk lines, stripping trailing white space and adding to list.
for _, l := range cl {
lines = append(lines, stripTrailingWhitespace(l))
// Remove leading blank lines; convert runs of
// interior blank lines to a single blank line.
n := 0
for _, line := range lines {
if line != "" || n > 0 && lines[n-1] != "" {
lines[n] = line
lines = lines[0:n]
// Add final "" entry to get trailing newline from Join.
if n > 0 && lines[n-1] != "" {
lines = append(lines, "")
return strings.Join(lines, "\n")
var (
ldquo = []byte("“")
rdquo = []byte("”")
// Escape comment text for HTML. If nice is set,
// also turn `` into “ and '' into ”.
func commentEscape(w io.Writer, text string, nice bool) {
last := 0
if nice {
for i := 0; i < len(text)-1; i++ {
ch := text[i]
if ch == text[i+1] && (ch == '`' || ch == '\'') {
template.HTMLEscape(w, []byte(text[last:i]))
last = i + 2
switch ch {
case '`':
case '\'':
i++ // loop will add one more
template.HTMLEscape(w, []byte(text[last:]))
const (
// Regexp for Go identifiers
identRx = `[a-zA-Z_][a-zA-Z_0-9]*` // TODO(gri) ASCII only for now - fix this
// Regexp for URLs
protocol = `(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero):`
hostPart = `[a-zA-Z0-9_@\-]+`
filePart = `[a-zA-Z0-9_?%#~&/\-+=]+`
urlRx = protocol + `//` + // http://
hostPart + `([.:]` + hostPart + `)*/?` + // //
filePart + `([:.,]` + filePart + `)*`
var matchRx = regexp.MustCompile(`(` + identRx + `)|(` + urlRx + `)`)
var (
html_a = []byte(`<a href="`)
html_aq = []byte(`">`)
html_enda = []byte("</a>")
html_i = []byte("<i>")
html_endi = []byte("</i>")
html_p = []byte("<p>\n")
html_endp = []byte("</p>\n")
html_pre = []byte("<pre>")
html_endpre = []byte("</pre>\n")
html_h = []byte("<h3>")
html_endh = []byte("</h3>\n")
// Emphasize and escape a line of text for HTML. URLs are converted into links;
// if the URL also appears in the words map, the link is taken from the map (if
// the corresponding map value is the empty string, the URL is not converted
// into a link). Go identifiers that appear in the words map are italicized; if
// the corresponding map value is not the empty string, it is considered a URL
// and the word is converted into a link. If nice is set, the remaining text's
// appearance is improved where it makes sense (e.g., `` is turned into &ldquo;
// and '' into &rdquo;).
func emphasize(w io.Writer, line string, words map[string]string, nice bool) {
for {
m := matchRx.FindStringSubmatchIndex(line)
if m == nil {
// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is identRx)
// write text before match
commentEscape(w, line[0:m[0]], nice)
// analyze match
match := line[m[0]:m[1]]
url := ""
italics := false
if words != nil {
url, italics = words[string(match)]
if m[2] < 0 {
// didn't match against first parenthesized sub-regexp; must be match against urlRx
if !italics {
// no alternative URL in words list, use match instead
url = string(match)
italics = false // don't italicize URLs
// write match
if len(url) > 0 {
template.HTMLEscape(w, []byte(url))
if italics {
commentEscape(w, match, nice)
if italics {
if len(url) > 0 {
// advance
line = line[m[1]:]
commentEscape(w, line, nice)
func indentLen(s string) int {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
return i
func isBlank(s string) bool {
return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
func commonPrefix(a, b string) string {
i := 0
for i < len(a) && i < len(b) && a[i] == b[i] {
return a[0:i]
func unindent(block []string) {
if len(block) == 0 {
// compute maximum common white prefix
prefix := block[0][0:indentLen(block[0])]
for _, line := range block {
if !isBlank(line) {
prefix = commonPrefix(prefix, line[0:indentLen(line)])
n := len(prefix)
// remove
for i, line := range block {
if !isBlank(line) {
block[i] = line[n:]
// heading returns the trimmed line if it passes as a section heading;
// otherwise it returns the empty string.
func heading(line string) string {
line = strings.TrimSpace(line)
if len(line) == 0 {
return ""
// a heading must start with an uppercase letter
r, _ := utf8.DecodeRuneInString(line)
if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
return ""
// it must end in a letter or digit:
r, _ = utf8.DecodeLastRuneInString(line)
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
return ""
// exclude lines with illegal characters
if strings.IndexAny(line, ",.;:!?+*/=()[]{}_^°&§~%#@<\">\\") >= 0 {
return ""
// allow "'" for possessive "'s" only
for b := line; ; {
i := strings.IndexRune(b, '\'')
if i < 0 {
if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
return "" // not followed by "s "
b = b[i+2:]
return line
// Convert comment text to formatted HTML.
// The comment was prepared by DocReader,
// so it is known not to have leading, trailing blank lines
// nor to have trailing spaces at the end of lines.
// The comment markers have already been removed.
// Turn each run of multiple \n into </p><p>.
// Turn each run of indented lines into a <pre> block without indent.
// Enclose headings with header tags.
// URLs in the comment text are converted into links; if the URL also appears
// in the words map, the link is taken from the map (if the corresponding map
// value is the empty string, the URL is not converted into a link).
// Go identifiers that appear in the words map are italicized; if the corresponding
// map value is not the empty string, it is considered a URL and the word is converted
// into a link.
func ToHTML(w io.Writer, text string, words map[string]string) {
inpara := false
lastWasBlank := false
lastWasHeading := false
close := func() {
if inpara {
inpara = false
open := func() {
if !inpara {
inpara = true
lines := strings.SplitAfter(text, "\n")
for i := 0; i < len(lines); {
line := lines[i]
if isBlank(line) {
// close paragraph
lastWasBlank = true
if indentLen(line) > 0 {
// close paragraph
// count indented or blank lines
j := i + 1
for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
// but not trailing blank lines
for j > i && isBlank(lines[j-1]) {
block := lines[i:j]
i = j
// put those lines in a pre block
for _, line := range block {
emphasize(w, line, nil, false) // no nice text formatting
lastWasHeading = false
if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
// current line is non-blank, sourounded by blank lines
// and the next non-blank line is not indented: this
// might be a heading.
if head := heading(line); head != "" {
commentEscape(w, head, true) // nice text formatting
i += 2
lastWasHeading = true
// open paragraph
lastWasBlank = false
lastWasHeading = false
emphasize(w, lines[i], words, true) // nice text formatting