blob: e8d844c4915e5168f1103b6e8bf7c743f29ed5df [file] [log] [blame]
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package comment
import (
"sort"
"strings"
"unicode"
"unicode/utf8"
)
// A Doc is a parsed Go doc comment.
type Doc struct {
// Content is the sequence of content blocks in the comment.
Content []Block
// Links is the link definitions in the comment.
Links []*LinkDef
}
// A LinkDef is a single link definition.
type LinkDef struct {
Text string // the link text
URL string // the link URL
Used bool // whether the comment uses the definition
}
// A Block is block-level content in a doc comment,
// one of [*Code], [*Heading], [*List], or [*Paragraph].
type Block interface {
block()
}
// A Heading is a doc comment heading.
type Heading struct {
Text []Text // the heading text
}
func (*Heading) block() {}
// A List is a numbered or bullet list.
// Lists are always non-empty: len(Items) > 0.
// In a numbered list, every Items[i].Number is a non-empty string.
// In a bullet list, every Items[i].Number is an empty string.
type List struct {
// Items is the list items.
Items []*ListItem
// ForceBlankBefore indicates that the list must be
// preceded by a blank line when reformatting the comment,
// overriding the usual conditions. See the BlankBefore method.
//
// The comment parser sets ForceBlankBefore for any list
// that is preceded by a blank line, to make sure
// the blank line is preserved when printing.
ForceBlankBefore bool
// ForceBlankBetween indicates that list items must be
// separated by blank lines when reformatting the comment,
// overriding the usual conditions. See the BlankBetween method.
//
// The comment parser sets ForceBlankBetween for any list
// that has a blank line between any two of its items, to make sure
// the blank lines are preserved when printing.
ForceBlankBetween bool
}
func (*List) block() {}
// BlankBefore reports whether a reformatting of the comment
// should include a blank line before the list.
// The default rule is the same as for [BlankBetween]:
// if the list item content contains any blank lines
// (meaning at least one item has multiple paragraphs)
// then the list itself must be preceded by a blank line.
// A preceding blank line can be forced by setting [List].ForceBlankBefore.
func (l *List) BlankBefore() bool {
return l.ForceBlankBefore || l.BlankBetween()
}
// BlankBetween reports whether a reformatting of the comment
// should include a blank line between each pair of list items.
// The default rule is that if the list item content contains any blank lines
// (meaning at least one item has multiple paragraphs)
// then list items must themselves be separated by blank lines.
// Blank line separators can be forced by setting [List].ForceBlankBetween.
func (l *List) BlankBetween() bool {
if l.ForceBlankBetween {
return true
}
for _, item := range l.Items {
if len(item.Content) != 1 {
// Unreachable for parsed comments today,
// since the only way to get multiple item.Content
// is multiple paragraphs, which must have been
// separated by a blank line.
return true
}
}
return false
}
// A ListItem is a single item in a numbered or bullet list.
type ListItem struct {
// Number is a decimal string in a numbered list
// or an empty string in a bullet list.
Number string // "1", "2", ...; "" for bullet list
// Content is the list content.
// Currently, restrictions in the parser and printer
// require every element of Content to be a *Paragraph.
Content []Block // Content of this item.
}
// A Paragraph is a paragraph of text.
type Paragraph struct {
Text []Text
}
func (*Paragraph) block() {}
// A Code is a preformatted code block.
type Code struct {
// Text is the preformatted text, ending with a newline character.
// It may be multiple lines, each of which ends with a newline character.
// It is never empty, nor does it start or end with a blank line.
Text string
}
func (*Code) block() {}
// A Text is text-level content in a doc comment,
// one of [Plain], [Italic], [*Link], or [*DocLink].
type Text interface {
text()
}
// A Plain is a string rendered as plain text (not italicized).
type Plain string
func (Plain) text() {}
// An Italic is a string rendered as italicized text.
type Italic string
func (Italic) text() {}
// A Link is a link to a specific URL.
type Link struct {
Auto bool // is this an automatic (implicit) link of a literal URL?
Text []Text // text of link
URL string // target URL of link
}
func (*Link) text() {}
// A DocLink is a link to documentation for a Go package or symbol.
type DocLink struct {
Text []Text // text of link
// ImportPath, Recv, and Name identify the Go package or symbol
// that is the link target. The potential combinations of
// non-empty fields are:
// - ImportPath: a link to another package
// - ImportPath, Name: a link to a const, func, type, or var in another package
// - ImportPath, Recv, Name: a link to a method in another package
// - Name: a link to a const, func, type, or var in this package
// - Recv, Name: a link to a method in this package
ImportPath string // import path
Recv string // receiver type, without any pointer star, for methods
Name string // const, func, type, var, or method name
}
func (*DocLink) text() {}
// A Parser is a doc comment parser.
// The fields in the struct can be filled in before calling Parse
// in order to customize the details of the parsing process.
type Parser struct {
// Words is a map of Go identifier words that
// should be italicized and potentially linked.
// If Words[w] is the empty string, then the word w
// is only italicized. Otherwise it is linked, using
// Words[w] as the link target.
// Words corresponds to the [go/doc.ToHTML] words parameter.
Words map[string]string
// LookupPackage resolves a package name to an import path.
//
// If LookupPackage(name) returns ok == true, then [name]
// (or [name.Sym] or [name.Sym.Method])
// is considered a documentation link to importPath's package docs.
// It is valid to return "", true, in which case name is considered
// to refer to the current package.
//
// If LookupPackage(name) returns ok == false,
// then [name] (or [name.Sym] or [name.Sym.Method])
// will not be considered a documentation link,
// except in the case where name is the full (but single-element) import path
// of a package in the standard library, such as in [math] or [io.Reader].
// LookupPackage is still called for such names,
// in order to permit references to imports of other packages
// with the same package names.
//
// Setting LookupPackage to nil is equivalent to setting it to
// a function that always returns "", false.
LookupPackage func(name string) (importPath string, ok bool)
// LookupSym reports whether a symbol name or method name
// exists in the current package.
//
// If LookupSym("", "Name") returns true, then [Name]
// is considered a documentation link for a const, func, type, or var.
//
// Similarly, if LookupSym("Recv", "Name") returns true,
// then [Recv.Name] is considered a documentation link for
// type Recv's method Name.
//
// Setting LookupSym to nil is equivalent to setting it to a function
// that always returns false.
LookupSym func(recv, name string) (ok bool)
}
// parseDoc is parsing state for a single doc comment.
type parseDoc struct {
*Parser
*Doc
links map[string]*LinkDef
lines []string
lookupSym func(recv, name string) bool
}
// lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv].
// If pkg has a slash, it is assumed to be the full import path and is returned with ok = true.
//
// Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand").
// d.LookupPackage provides a way for the caller to allow resolving such names with reference
// to the imports in the surrounding package.
//
// There is one collision between these two cases: single-element standard library names
// like "math" are full import paths but don't contain slashes. We let d.LookupPackage have
// the first chance to resolve it, in case there's a different package imported as math,
// and otherwise we refer to a built-in list of single-element standard library package names.
func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) {
if strings.Contains(pkg, "/") { // assume a full import path
if validImportPath(pkg) {
return pkg, true
}
return "", false
}
if d.LookupPackage != nil {
// Give LookupPackage a chance.
if path, ok := d.LookupPackage(pkg); ok {
return path, true
}
}
return DefaultLookupPackage(pkg)
}
func isStdPkg(path string) bool {
// TODO(rsc): Use sort.Find once we don't have to worry about
// copying this code into older Go environments.
i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path })
return i < len(stdPkgs) && stdPkgs[i] == path
}
// DefaultLookupPackage is the default package lookup
// function, used when [Parser].LookupPackage is nil.
// It recognizes names of the packages from the standard
// library with single-element import paths, such as math,
// which would otherwise be impossible to name.
//
// Note that the go/doc package provides a more sophisticated
// lookup based on the imports used in the current package.
func DefaultLookupPackage(name string) (importPath string, ok bool) {
if isStdPkg(name) {
return name, true
}
return "", false
}
// Parse parses the doc comment text and returns the *Doc form.
// Comment markers (/* // and */) in the text must have already been removed.
func (p *Parser) Parse(text string) *Doc {
lines := unindent(strings.Split(text, "\n"))
d := &parseDoc{
Parser: p,
Doc: new(Doc),
links: make(map[string]*LinkDef),
lines: lines,
lookupSym: func(recv, name string) bool { return false },
}
if p.LookupSym != nil {
d.lookupSym = p.LookupSym
}
// First pass: break into block structure and collect known links.
// The text is all recorded as Plain for now.
var prev span
for _, s := range parseSpans(lines) {
var b Block
switch s.kind {
default:
panic("go/doc/comment: internal error: unknown span kind")
case spanList:
b = d.list(lines[s.start:s.end], prev.end < s.start)
case spanCode:
b = d.code(lines[s.start:s.end])
case spanOldHeading:
b = d.oldHeading(lines[s.start])
case spanHeading:
b = d.heading(lines[s.start])
case spanPara:
b = d.paragraph(lines[s.start:s.end])
}
if b != nil {
d.Content = append(d.Content, b)
}
prev = s
}
// Second pass: interpret all the Plain text now that we know the links.
for _, b := range d.Content {
switch b := b.(type) {
case *Paragraph:
b.Text = d.parseLinkedText(string(b.Text[0].(Plain)))
case *List:
for _, i := range b.Items {
for _, c := range i.Content {
p := c.(*Paragraph)
p.Text = d.parseLinkedText(string(p.Text[0].(Plain)))
}
}
}
}
return d.Doc
}
// A span represents a single span of comment lines (lines[start:end])
// of an identified kind (code, heading, paragraph, and so on).
type span struct {
start int
end int
kind spanKind
}
// A spanKind describes the kind of span.
type spanKind int
const (
_ spanKind = iota
spanCode
spanHeading
spanList
spanOldHeading
spanPara
)
func parseSpans(lines []string) []span {
var spans []span
// The loop may process a line twice: once as unindented
// and again forced indented. So the maximum expected
// number of iterations is 2*len(lines). The repeating logic
// can be subtle, though, and to protect against introduction
// of infinite loops in future changes, we watch to see that
// we are not looping too much. A panic is better than a
// quiet infinite loop.
watchdog := 2 * len(lines)
i := 0
forceIndent := 0
Spans:
for {
// Skip blank lines.
for i < len(lines) && lines[i] == "" {
i++
}
if i >= len(lines) {
break
}
if watchdog--; watchdog < 0 {
panic("go/doc/comment: internal error: not making progress")
}
var kind spanKind
start := i
end := i
if i < forceIndent || indented(lines[i]) {
// Indented (or force indented).
// Ends before next unindented. (Blank lines are OK.)
// If this is an unindented list that we are heuristically treating as indented,
// then accept unindented list item lines up to the first blank lines.
// The heuristic is disabled at blank lines to contain its effect
// to non-gofmt'ed sections of the comment.
unindentedListOK := isList(lines[i]) && i < forceIndent
i++
for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) {
if lines[i] == "" {
unindentedListOK = false
}
i++
}
// Drop trailing blank lines.
end = i
for end > start && lines[end-1] == "" {
end--
}
// If indented lines are followed (without a blank line)
// by an unindented line ending in a brace,
// take that one line too. This fixes the common mistake
// of pasting in something like
//
// func main() {
// fmt.Println("hello, world")
// }
//
// and forgetting to indent it.
// The heuristic will never trigger on a gofmt'ed comment,
// because any gofmt'ed code block or list would be
// followed by a blank line or end of comment.
if end < len(lines) && strings.HasPrefix(lines[end], "}") {
end++
}
if isList(lines[start]) {
kind = spanList
} else {
kind = spanCode
}
} else {
// Unindented. Ends at next blank or indented line.
i++
for i < len(lines) && lines[i] != "" && !indented(lines[i]) {
i++
}
end = i
// If unindented lines are followed (without a blank line)
// by an indented line that would start a code block,
// check whether the final unindented lines
// should be left for the indented section.
// This can happen for the common mistakes of
// unindented code or unindented lists.
// The heuristic will never trigger on a gofmt'ed comment,
// because any gofmt'ed code block would have a blank line
// preceding it after the unindented lines.
if i < len(lines) && lines[i] != "" && !isList(lines[i]) {
switch {
case isList(lines[i-1]):
// If the final unindented line looks like a list item,
// this may be the first indented line wrap of
// a mistakenly unindented list.
// Leave all the unindented list items.
forceIndent = end
end--
for end > start && isList(lines[end-1]) {
end--
}
case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`):
// If the final unindented line ended in { or \
// it is probably the start of a misindented code block.
// Give the user a single line fix.
// Often that's enough; if not, the user can fix the others themselves.
forceIndent = end
end--
}
if start == end && forceIndent > start {
i = start
continue Spans
}
}
// Span is either paragraph or heading.
if end-start == 1 && isHeading(lines[start]) {
kind = spanHeading
} else if end-start == 1 && isOldHeading(lines[start], lines, start) {
kind = spanOldHeading
} else {
kind = spanPara
}
}
spans = append(spans, span{start, end, kind})
i = end
}
return spans
}
// indented reports whether line is indented
// (starts with a leading space or tab).
func indented(line string) bool {
return line != "" && (line[0] == ' ' || line[0] == '\t')
}
// unindent removes any common space/tab prefix
// from each line in lines, returning a copy of lines in which
// those prefixes have been trimmed from each line.
// It also replaces any lines containing only spaces with blank lines (empty strings).
func unindent(lines []string) []string {
// Trim leading and trailing blank lines.
for len(lines) > 0 && isBlank(lines[0]) {
lines = lines[1:]
}
for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
lines = lines[:len(lines)-1]
}
if len(lines) == 0 {
return nil
}
// Compute and remove common indentation.
prefix := leadingSpace(lines[0])
for _, line := range lines[1:] {
if !isBlank(line) {
prefix = commonPrefix(prefix, leadingSpace(line))
}
}
out := make([]string, len(lines))
for i, line := range lines {
line = strings.TrimPrefix(line, prefix)
if strings.TrimSpace(line) == "" {
line = ""
}
out[i] = line
}
for len(out) > 0 && out[0] == "" {
out = out[1:]
}
for len(out) > 0 && out[len(out)-1] == "" {
out = out[:len(out)-1]
}
return out
}
// isBlank reports whether s is a blank line.
func isBlank(s string) bool {
return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
}
// commonPrefix returns the longest common prefix of a and b.
func commonPrefix(a, b string) string {
i := 0
for i < len(a) && i < len(b) && a[i] == b[i] {
i++
}
return a[0:i]
}
// leadingSpace returns the longest prefix of s consisting of spaces and tabs.
func leadingSpace(s string) string {
i := 0
for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
i++
}
return s[:i]
}
// isOldHeading reports whether line is an old-style section heading.
// line is all[off].
func isOldHeading(line string, all []string, off int) bool {
if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" {
return false
}
line = strings.TrimSpace(line)
// a heading must start with an uppercase letter
r, _ := utf8.DecodeRuneInString(line)
if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
return false
}
// it must end in a letter or digit:
r, _ = utf8.DecodeLastRuneInString(line)
if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
return false
}
// exclude lines with illegal characters. we allow "(),"
if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
return false
}
// allow "'" for possessive "'s" only
for b := line; ; {
var ok bool
if _, b, ok = strings.Cut(b, "'"); !ok {
break
}
if b != "s" && !strings.HasPrefix(b, "s ") {
return false // ' not followed by s and then end-of-word
}
}
// allow "." when followed by non-space
for b := line; ; {
var ok bool
if _, b, ok = strings.Cut(b, "."); !ok {
break
}
if b == "" || strings.HasPrefix(b, " ") {
return false // not followed by non-space
}
}
return true
}
// oldHeading returns the *Heading for the given old-style section heading line.
func (d *parseDoc) oldHeading(line string) Block {
return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}}
}
// isHeading reports whether line is a new-style section heading.
func isHeading(line string) bool {
return len(line) >= 2 &&
line[0] == '#' &&
(line[1] == ' ' || line[1] == '\t') &&
strings.TrimSpace(line) != "#"
}
// heading returns the *Heading for the given new-style section heading line.
func (d *parseDoc) heading(line string) Block {
return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}}
}
// code returns a code block built from the lines.
func (d *parseDoc) code(lines []string) *Code {
body := unindent(lines)
body = append(body, "") // to get final \n from Join
return &Code{Text: strings.Join(body, "\n")}
}
// paragraph returns a paragraph block built from the lines.
// If the lines are link definitions, paragraph adds them to d and returns nil.
func (d *parseDoc) paragraph(lines []string) Block {
// Is this a block of known links? Handle.
var defs []*LinkDef
for _, line := range lines {
def, ok := parseLink(line)
if !ok {
goto NoDefs
}
defs = append(defs, def)
}
for _, def := range defs {
d.Links = append(d.Links, def)
if d.links[def.Text] == nil {
d.links[def.Text] = def
}
}
return nil
NoDefs:
return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}}
}
// parseLink parses a single link definition line:
//
// [text]: url
//
// It returns the link definition and whether the line was well formed.
func parseLink(line string) (*LinkDef, bool) {
if line == "" || line[0] != '[' {
return nil, false
}
i := strings.Index(line, "]:")
if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') {
return nil, false
}
text := line[1:i]
url := strings.TrimSpace(line[i+3:])
j := strings.Index(url, "://")
if j < 0 || !isScheme(url[:j]) {
return nil, false
}
// Line has right form and has valid scheme://.
// That's good enough for us - we are not as picky
// about the characters beyond the :// as we are
// when extracting inline URLs from text.
return &LinkDef{Text: text, URL: url}, true
}
// list returns a list built from the indented lines,
// using forceBlankBefore as the value of the List's ForceBlankBefore field.
func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List {
num, _, _ := listMarker(lines[0])
var (
list *List = &List{ForceBlankBefore: forceBlankBefore}
item *ListItem
text []string
)
flush := func() {
if item != nil {
if para := d.paragraph(text); para != nil {
item.Content = append(item.Content, para)
}
}
text = nil
}
for _, line := range lines {
if n, after, ok := listMarker(line); ok && (n != "") == (num != "") {
// start new list item
flush()
item = &ListItem{Number: n}
list.Items = append(list.Items, item)
line = after
}
line = strings.TrimSpace(line)
if line == "" {
list.ForceBlankBetween = true
flush()
continue
}
text = append(text, strings.TrimSpace(line))
}
flush()
return list
}
// listMarker parses the line as beginning with a list marker.
// If it can do that, it returns the numeric marker ("" for a bullet list),
// the rest of the line, and ok == true.
// Otherwise, it returns "", "", false.
func listMarker(line string) (num, rest string, ok bool) {
line = strings.TrimSpace(line)
if line == "" {
return "", "", false
}
// Can we find a marker?
if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' {
num, rest = "", line[n:]
} else if '0' <= line[0] && line[0] <= '9' {
n := 1
for n < len(line) && '0' <= line[n] && line[n] <= '9' {
n++
}
if n >= len(line) || (line[n] != '.' && line[n] != ')') {
return "", "", false
}
num, rest = line[:n], line[n+1:]
} else {
return "", "", false
}
if !indented(rest) || strings.TrimSpace(rest) == "" {
return "", "", false
}
return num, rest, true
}
// isList reports whether the line is the first line of a list,
// meaning starts with a list marker after any indentation.
// (The caller is responsible for checking the line is indented, as appropriate.)
func isList(line string) bool {
_, _, ok := listMarker(line)
return ok
}
// parseLinkedText parses text that is allowed to contain explicit links,
// such as [math.Sin] or [Go home page], into a slice of Text items.
//
// A “pkg” is only assumed to be a full import path if it starts with
// a domain name (a path element with a dot) or is one of the packages
// from the standard library (“[os]”, “[encoding/json]”, and so on).
// To avoid problems with maps, generics, and array types, doc links
// must be both preceded and followed by punctuation, spaces, tabs,
// or the start or end of a line. An example problem would be treating
// map[ast.Expr]TypeAndValue as containing a link.
func (d *parseDoc) parseLinkedText(text string) []Text {
var out []Text
wrote := 0
flush := func(i int) {
if wrote < i {
out = d.parseText(out, text[wrote:i], true)
wrote = i
}
}
start := -1
var buf []byte
for i := 0; i < len(text); i++ {
c := text[i]
if c == '\n' || c == '\t' {
c = ' '
}
switch c {
case '[':
start = i
case ']':
if start >= 0 {
if def, ok := d.links[string(buf)]; ok {
def.Used = true
flush(start)
out = append(out, &Link{
Text: d.parseText(nil, text[start+1:i], false),
URL: def.URL,
})
wrote = i + 1
} else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok {
flush(start)
link.Text = d.parseText(nil, text[start+1:i], false)
out = append(out, link)
wrote = i + 1
}
}
start = -1
buf = buf[:0]
}
if start >= 0 && i != start {
buf = append(buf, c)
}
}
flush(len(text))
return out
}
// docLink parses text, which was found inside [ ] brackets,
// as a doc link if possible, returning the DocLink and ok == true
// or else nil, false.
// The before and after strings are the text before the [ and after the ]
// on the same line. Doc links must be preceded and followed by
// punctuation, spaces, tabs, or the start or end of a line.
func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) {
if before != "" {
r, _ := utf8.DecodeLastRuneInString(before)
if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
return nil, false
}
}
if after != "" {
r, _ := utf8.DecodeRuneInString(after)
if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' {
return nil, false
}
}
if strings.HasPrefix(text, "*") {
text = text[1:]
}
pkg, name, ok := splitDocName(text)
var recv string
if ok {
pkg, recv, _ = splitDocName(pkg)
}
if pkg != "" {
if pkg, ok = d.lookupPkg(pkg); !ok {
return nil, false
}
} else {
if ok = d.lookupSym(recv, name); !ok {
return nil, false
}
}
link = &DocLink{
ImportPath: pkg,
Recv: recv,
Name: name,
}
return link, true
}
// If text is of the form before.Name, where Name is a capitalized Go identifier,
// then splitDocName returns before, name, true.
// Otherwise it returns text, "", false.
func splitDocName(text string) (before, name string, foundDot bool) {
i := strings.LastIndex(text, ".")
name = text[i+1:]
if !isName(name) {
return text, "", false
}
if i >= 0 {
before = text[:i]
}
return before, name, true
}
// parseText parses s as text and returns the result of appending
// those parsed Text elements to out.
// parseText does not handle explicit links like [math.Sin] or [Go home page]:
// those are handled by parseLinkedText.
// If autoLink is true, then parseText recognizes URLs and words from d.Words
// and converts those to links as appropriate.
func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text {
var w strings.Builder
wrote := 0
writeUntil := func(i int) {
w.WriteString(s[wrote:i])
wrote = i
}
flush := func(i int) {
writeUntil(i)
if w.Len() > 0 {
out = append(out, Plain(w.String()))
w.Reset()
}
}
for i := 0; i < len(s); {
t := s[i:]
if autoLink {
if url, ok := autoURL(t); ok {
flush(i)
// Note: The old comment parser would look up the URL in words
// and replace the target with words[URL] if it was non-empty.
// That would allow creating links that display as one URL but
// when clicked go to a different URL. Not sure what the point
// of that is, so we're not doing that lookup here.
out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url})
i += len(url)
wrote = i
continue
}
if id, ok := ident(t); ok {
url, italics := d.Words[id]
if !italics {
i += len(id)
continue
}
flush(i)
if url == "" {
out = append(out, Italic(id))
} else {
out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url})
}
i += len(id)
wrote = i
continue
}
}
switch {
case strings.HasPrefix(t, "``"):
if len(t) >= 3 && t[2] == '`' {
// Do not convert `` inside ```, in case people are mistakenly writing Markdown.
i += 3
for i < len(t) && t[i] == '`' {
i++
}
break
}
writeUntil(i)
w.WriteRune('“')
i += 2
wrote = i
case strings.HasPrefix(t, "''"):
writeUntil(i)
w.WriteRune('”')
i += 2
wrote = i
default:
i++
}
}
flush(len(s))
return out
}
// autoURL checks whether s begins with a URL that should be hyperlinked.
// If so, it returns the URL, which is a prefix of s, and ok == true.
// Otherwise it returns "", false.
// The caller should skip over the first len(url) bytes of s
// before further processing.
func autoURL(s string) (url string, ok bool) {
// Find the ://. Fast path to pick off non-URL,
// since we call this at every position in the string.
// The shortest possible URL is ftp://x, 7 bytes.
var i int
switch {
case len(s) < 7:
return "", false
case s[3] == ':':
i = 3
case s[4] == ':':
i = 4
case s[5] == ':':
i = 5
case s[6] == ':':
i = 6
default:
return "", false
}
if i+3 > len(s) || s[i:i+3] != "://" {
return "", false
}
// Check valid scheme.
if !isScheme(s[:i]) {
return "", false
}
// Scan host part. Must have at least one byte,
// and must start and end in non-punctuation.
i += 3
if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) {
return "", false
}
i++
end := i
for i < len(s) && isHost(s[i]) {
if !isPunct(s[i]) {
end = i + 1
}
i++
}
i = end
// At this point we are definitely returning a URL (scheme://host).
// We just have to find the longest path we can add to it.
// Heuristics abound.
// We allow parens, braces, and brackets,
// but only if they match (#5043, #22285).
// We allow .,:;?! in the path but not at the end,
// to avoid end-of-sentence punctuation (#18139, #16565).
stk := []byte{}
end = i
Path:
for ; i < len(s); i++ {
if isPunct(s[i]) {
continue
}
if !isPath(s[i]) {
break
}
switch s[i] {
case '(':
stk = append(stk, ')')
case '{':
stk = append(stk, '}')
case '[':
stk = append(stk, ']')
case ')', '}', ']':
if len(stk) == 0 || stk[len(stk)-1] != s[i] {
break Path
}
stk = stk[:len(stk)-1]
}
if len(stk) == 0 {
end = i + 1
}
}
return s[:end], true
}
// isScheme reports whether s is a recognized URL scheme.
// Note that if strings of new length (beyond 3-7)
// are added here, the fast path at the top of autoURL will need updating.
func isScheme(s string) bool {
switch s {
case "file",
"ftp",
"gopher",
"http",
"https",
"mailto",
"nntp":
return true
}
return false
}
// isHost reports whether c is a byte that can appear in a URL host,
// like www.example.com or user@[::1]:8080
func isHost(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
(1<<10-1)<<'0' |
1<<'_' |
1<<'@' |
1<<'-' |
1<<'.' |
1<<'[' |
1<<']' |
1<<':'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// isPunct reports whether c is a punctuation byte that can appear
// inside a path but not at the end.
func isPunct(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
1<<'.' |
1<<',' |
1<<':' |
1<<';' |
1<<'?' |
1<<'!'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// isPath reports whether c is a (non-punctuation) path byte.
func isPath(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
(1<<10-1)<<'0' |
1<<'$' |
1<<'\'' |
1<<'(' |
1<<')' |
1<<'*' |
1<<'+' |
1<<'&' |
1<<'#' |
1<<'=' |
1<<'@' |
1<<'~' |
1<<'_' |
1<<'/' |
1<<'-' |
1<<'[' |
1<<']' |
1<<'{' |
1<<'}' |
1<<'%'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// isName reports whether s is a capitalized Go identifier (like Name).
func isName(s string) bool {
t, ok := ident(s)
if !ok || t != s {
return false
}
r, _ := utf8.DecodeRuneInString(s)
return unicode.IsUpper(r)
}
// ident checks whether s begins with a Go identifier.
// If so, it returns the identifier, which is a prefix of s, and ok == true.
// Otherwise it returns "", false.
// The caller should skip over the first len(id) bytes of s
// before further processing.
func ident(s string) (id string, ok bool) {
// Scan [\pL_][\pL_0-9]*
n := 0
for n < len(s) {
if c := s[n]; c < utf8.RuneSelf {
if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') {
n++
continue
}
break
}
r, nr := utf8.DecodeRuneInString(s[n:])
if unicode.IsLetter(r) {
n += nr
continue
}
break
}
return s[:n], n > 0
}
// isIdentASCII reports whether c is an ASCII identifier byte.
func isIdentASCII(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
(1<<10-1)<<'0' |
1<<'_'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}
// validImportPath reports whether path is a valid import path.
// It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath.
func validImportPath(path string) bool {
if !utf8.ValidString(path) {
return false
}
if path == "" {
return false
}
if path[0] == '-' {
return false
}
if strings.Contains(path, "//") {
return false
}
if path[len(path)-1] == '/' {
return false
}
elemStart := 0
for i, r := range path {
if r == '/' {
if !validImportPathElem(path[elemStart:i]) {
return false
}
elemStart = i + 1
}
}
return validImportPathElem(path[elemStart:])
}
func validImportPathElem(elem string) bool {
if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' {
return false
}
for i := 0; i < len(elem); i++ {
if !importPathOK(elem[i]) {
return false
}
}
return true
}
func importPathOK(c byte) bool {
// mask is a 128-bit bitmap with 1s for allowed bytes,
// so that the byte c can be tested with a shift and an and.
// If c > 128, then 1<<c and 1<<(c-64) will both be zero,
// and this function will return false.
const mask = 0 |
(1<<26-1)<<'A' |
(1<<26-1)<<'a' |
(1<<10-1)<<'0' |
1<<'-' |
1<<'.' |
1<<'~' |
1<<'_' |
1<<'+'
return ((uint64(1)<<c)&(mask&(1<<64-1)) |
(uint64(1)<<(c-64))&(mask>>64)) != 0
}