| // Copyright 2022 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package comment |
| |
| import ( |
| "sort" |
| "strings" |
| "unicode" |
| "unicode/utf8" |
| ) |
| |
| // A Doc is a parsed Go doc comment. |
| type Doc struct { |
| // Content is the sequence of content blocks in the comment. |
| Content []Block |
| |
| // Links is the link definitions in the comment. |
| Links []*LinkDef |
| } |
| |
| // A LinkDef is a single link definition. |
| type LinkDef struct { |
| Text string // the link text |
| URL string // the link URL |
| Used bool // whether the comment uses the definition |
| } |
| |
| // A Block is block-level content in a doc comment, |
| // one of [*Code], [*Heading], [*List], or [*Paragraph]. |
| type Block interface { |
| block() |
| } |
| |
| // A Heading is a doc comment heading. |
| type Heading struct { |
| Text []Text // the heading text |
| } |
| |
| func (*Heading) block() {} |
| |
| // A List is a numbered or bullet list. |
| // Lists are always non-empty: len(Items) > 0. |
| // In a numbered list, every Items[i].Number is a non-empty string. |
| // In a bullet list, every Items[i].Number is an empty string. |
| type List struct { |
| // Items is the list items. |
| Items []*ListItem |
| |
| // ForceBlankBefore indicates that the list must be |
| // preceded by a blank line when reformatting the comment, |
| // overriding the usual conditions. See the BlankBefore method. |
| // |
| // The comment parser sets ForceBlankBefore for any list |
| // that is preceded by a blank line, to make sure |
| // the blank line is preserved when printing. |
| ForceBlankBefore bool |
| |
| // ForceBlankBetween indicates that list items must be |
| // separated by blank lines when reformatting the comment, |
| // overriding the usual conditions. See the BlankBetween method. |
| // |
| // The comment parser sets ForceBlankBetween for any list |
| // that has a blank line between any two of its items, to make sure |
| // the blank lines are preserved when printing. |
| ForceBlankBetween bool |
| } |
| |
| func (*List) block() {} |
| |
| // BlankBefore reports whether a reformatting of the comment |
| // should include a blank line before the list. |
| // The default rule is the same as for [BlankBetween]: |
| // if the list item content contains any blank lines |
| // (meaning at least one item has multiple paragraphs) |
| // then the list itself must be preceded by a blank line. |
| // A preceding blank line can be forced by setting [List].ForceBlankBefore. |
| func (l *List) BlankBefore() bool { |
| return l.ForceBlankBefore || l.BlankBetween() |
| } |
| |
| // BlankBetween reports whether a reformatting of the comment |
| // should include a blank line between each pair of list items. |
| // The default rule is that if the list item content contains any blank lines |
| // (meaning at least one item has multiple paragraphs) |
| // then list items must themselves be separated by blank lines. |
| // Blank line separators can be forced by setting [List].ForceBlankBetween. |
| func (l *List) BlankBetween() bool { |
| if l.ForceBlankBetween { |
| return true |
| } |
| for _, item := range l.Items { |
| if len(item.Content) != 1 { |
| // Unreachable for parsed comments today, |
| // since the only way to get multiple item.Content |
| // is multiple paragraphs, which must have been |
| // separated by a blank line. |
| return true |
| } |
| } |
| return false |
| } |
| |
| // A ListItem is a single item in a numbered or bullet list. |
| type ListItem struct { |
| // Number is a decimal string in a numbered list |
| // or an empty string in a bullet list. |
| Number string // "1", "2", ...; "" for bullet list |
| |
| // Content is the list content. |
| // Currently, restrictions in the parser and printer |
| // require every element of Content to be a *Paragraph. |
| Content []Block // Content of this item. |
| } |
| |
| // A Paragraph is a paragraph of text. |
| type Paragraph struct { |
| Text []Text |
| } |
| |
| func (*Paragraph) block() {} |
| |
| // A Code is a preformatted code block. |
| type Code struct { |
| // Text is the preformatted text, ending with a newline character. |
| // It may be multiple lines, each of which ends with a newline character. |
| // It is never empty, nor does it start or end with a blank line. |
| Text string |
| } |
| |
| func (*Code) block() {} |
| |
| // A Text is text-level content in a doc comment, |
| // one of [Plain], [Italic], [*Link], or [*DocLink]. |
| type Text interface { |
| text() |
| } |
| |
| // A Plain is a string rendered as plain text (not italicized). |
| type Plain string |
| |
| func (Plain) text() {} |
| |
| // An Italic is a string rendered as italicized text. |
| type Italic string |
| |
| func (Italic) text() {} |
| |
| // A Link is a link to a specific URL. |
| type Link struct { |
| Auto bool // is this an automatic (implicit) link of a literal URL? |
| Text []Text // text of link |
| URL string // target URL of link |
| } |
| |
| func (*Link) text() {} |
| |
| // A DocLink is a link to documentation for a Go package or symbol. |
| type DocLink struct { |
| Text []Text // text of link |
| |
| // ImportPath, Recv, and Name identify the Go package or symbol |
| // that is the link target. The potential combinations of |
| // non-empty fields are: |
| // - ImportPath: a link to another package |
| // - ImportPath, Name: a link to a const, func, type, or var in another package |
| // - ImportPath, Recv, Name: a link to a method in another package |
| // - Name: a link to a const, func, type, or var in this package |
| // - Recv, Name: a link to a method in this package |
| ImportPath string // import path |
| Recv string // receiver type, without any pointer star, for methods |
| Name string // const, func, type, var, or method name |
| } |
| |
| func (*DocLink) text() {} |
| |
| // A Parser is a doc comment parser. |
| // The fields in the struct can be filled in before calling Parse |
| // in order to customize the details of the parsing process. |
| type Parser struct { |
| // Words is a map of Go identifier words that |
| // should be italicized and potentially linked. |
| // If Words[w] is the empty string, then the word w |
| // is only italicized. Otherwise it is linked, using |
| // Words[w] as the link target. |
| // Words corresponds to the [go/doc.ToHTML] words parameter. |
| Words map[string]string |
| |
| // LookupPackage resolves a package name to an import path. |
| // |
| // If LookupPackage(name) returns ok == true, then [name] |
| // (or [name.Sym] or [name.Sym.Method]) |
| // is considered a documentation link to importPath's package docs. |
| // It is valid to return "", true, in which case name is considered |
| // to refer to the current package. |
| // |
| // If LookupPackage(name) returns ok == false, |
| // then [name] (or [name.Sym] or [name.Sym.Method]) |
| // will not be considered a documentation link, |
| // except in the case where name is the full (but single-element) import path |
| // of a package in the standard library, such as in [math] or [io.Reader]. |
| // LookupPackage is still called for such names, |
| // in order to permit references to imports of other packages |
| // with the same package names. |
| // |
| // Setting LookupPackage to nil is equivalent to setting it to |
| // a function that always returns "", false. |
| LookupPackage func(name string) (importPath string, ok bool) |
| |
| // LookupSym reports whether a symbol name or method name |
| // exists in the current package. |
| // |
| // If LookupSym("", "Name") returns true, then [Name] |
| // is considered a documentation link for a const, func, type, or var. |
| // |
| // Similarly, if LookupSym("Recv", "Name") returns true, |
| // then [Recv.Name] is considered a documentation link for |
| // type Recv's method Name. |
| // |
| // Setting LookupSym to nil is equivalent to setting it to a function |
| // that always returns false. |
| LookupSym func(recv, name string) (ok bool) |
| } |
| |
| // parseDoc is parsing state for a single doc comment. |
| type parseDoc struct { |
| *Parser |
| *Doc |
| links map[string]*LinkDef |
| lines []string |
| lookupSym func(recv, name string) bool |
| } |
| |
| // lookupPkg is called to look up the pkg in [pkg], [pkg.Name], and [pkg.Name.Recv]. |
| // If pkg has a slash, it is assumed to be the full import path and is returned with ok = true. |
| // |
| // Otherwise, pkg is probably a simple package name like "rand" (not "crypto/rand" or "math/rand"). |
| // d.LookupPackage provides a way for the caller to allow resolving such names with reference |
| // to the imports in the surrounding package. |
| // |
| // There is one collision between these two cases: single-element standard library names |
| // like "math" are full import paths but don't contain slashes. We let d.LookupPackage have |
| // the first chance to resolve it, in case there's a different package imported as math, |
| // and otherwise we refer to a built-in list of single-element standard library package names. |
| func (d *parseDoc) lookupPkg(pkg string) (importPath string, ok bool) { |
| if strings.Contains(pkg, "/") { // assume a full import path |
| if validImportPath(pkg) { |
| return pkg, true |
| } |
| return "", false |
| } |
| if d.LookupPackage != nil { |
| // Give LookupPackage a chance. |
| if path, ok := d.LookupPackage(pkg); ok { |
| return path, true |
| } |
| } |
| return DefaultLookupPackage(pkg) |
| } |
| |
| func isStdPkg(path string) bool { |
| // TODO(rsc): Use sort.Find once we don't have to worry about |
| // copying this code into older Go environments. |
| i := sort.Search(len(stdPkgs), func(i int) bool { return stdPkgs[i] >= path }) |
| return i < len(stdPkgs) && stdPkgs[i] == path |
| } |
| |
| // DefaultLookupPackage is the default package lookup |
| // function, used when [Parser].LookupPackage is nil. |
| // It recognizes names of the packages from the standard |
| // library with single-element import paths, such as math, |
| // which would otherwise be impossible to name. |
| // |
| // Note that the go/doc package provides a more sophisticated |
| // lookup based on the imports used in the current package. |
| func DefaultLookupPackage(name string) (importPath string, ok bool) { |
| if isStdPkg(name) { |
| return name, true |
| } |
| return "", false |
| } |
| |
| // Parse parses the doc comment text and returns the *Doc form. |
| // Comment markers (/* // and */) in the text must have already been removed. |
| func (p *Parser) Parse(text string) *Doc { |
| lines := unindent(strings.Split(text, "\n")) |
| d := &parseDoc{ |
| Parser: p, |
| Doc: new(Doc), |
| links: make(map[string]*LinkDef), |
| lines: lines, |
| lookupSym: func(recv, name string) bool { return false }, |
| } |
| if p.LookupSym != nil { |
| d.lookupSym = p.LookupSym |
| } |
| |
| // First pass: break into block structure and collect known links. |
| // The text is all recorded as Plain for now. |
| var prev span |
| for _, s := range parseSpans(lines) { |
| var b Block |
| switch s.kind { |
| default: |
| panic("go/doc/comment: internal error: unknown span kind") |
| case spanList: |
| b = d.list(lines[s.start:s.end], prev.end < s.start) |
| case spanCode: |
| b = d.code(lines[s.start:s.end]) |
| case spanOldHeading: |
| b = d.oldHeading(lines[s.start]) |
| case spanHeading: |
| b = d.heading(lines[s.start]) |
| case spanPara: |
| b = d.paragraph(lines[s.start:s.end]) |
| } |
| if b != nil { |
| d.Content = append(d.Content, b) |
| } |
| prev = s |
| } |
| |
| // Second pass: interpret all the Plain text now that we know the links. |
| for _, b := range d.Content { |
| switch b := b.(type) { |
| case *Paragraph: |
| b.Text = d.parseLinkedText(string(b.Text[0].(Plain))) |
| case *List: |
| for _, i := range b.Items { |
| for _, c := range i.Content { |
| p := c.(*Paragraph) |
| p.Text = d.parseLinkedText(string(p.Text[0].(Plain))) |
| } |
| } |
| } |
| } |
| |
| return d.Doc |
| } |
| |
| // A span represents a single span of comment lines (lines[start:end]) |
| // of an identified kind (code, heading, paragraph, and so on). |
| type span struct { |
| start int |
| end int |
| kind spanKind |
| } |
| |
| // A spanKind describes the kind of span. |
| type spanKind int |
| |
| const ( |
| _ spanKind = iota |
| spanCode |
| spanHeading |
| spanList |
| spanOldHeading |
| spanPara |
| ) |
| |
| func parseSpans(lines []string) []span { |
| var spans []span |
| |
| // The loop may process a line twice: once as unindented |
| // and again forced indented. So the maximum expected |
| // number of iterations is 2*len(lines). The repeating logic |
| // can be subtle, though, and to protect against introduction |
| // of infinite loops in future changes, we watch to see that |
| // we are not looping too much. A panic is better than a |
| // quiet infinite loop. |
| watchdog := 2 * len(lines) |
| |
| i := 0 |
| forceIndent := 0 |
| Spans: |
| for { |
| // Skip blank lines. |
| for i < len(lines) && lines[i] == "" { |
| i++ |
| } |
| if i >= len(lines) { |
| break |
| } |
| if watchdog--; watchdog < 0 { |
| panic("go/doc/comment: internal error: not making progress") |
| } |
| |
| var kind spanKind |
| start := i |
| end := i |
| if i < forceIndent || indented(lines[i]) { |
| // Indented (or force indented). |
| // Ends before next unindented. (Blank lines are OK.) |
| // If this is an unindented list that we are heuristically treating as indented, |
| // then accept unindented list item lines up to the first blank lines. |
| // The heuristic is disabled at blank lines to contain its effect |
| // to non-gofmt'ed sections of the comment. |
| unindentedListOK := isList(lines[i]) && i < forceIndent |
| i++ |
| for i < len(lines) && (lines[i] == "" || i < forceIndent || indented(lines[i]) || (unindentedListOK && isList(lines[i]))) { |
| if lines[i] == "" { |
| unindentedListOK = false |
| } |
| i++ |
| } |
| |
| // Drop trailing blank lines. |
| end = i |
| for end > start && lines[end-1] == "" { |
| end-- |
| } |
| |
| // If indented lines are followed (without a blank line) |
| // by an unindented line ending in a brace, |
| // take that one line too. This fixes the common mistake |
| // of pasting in something like |
| // |
| // func main() { |
| // fmt.Println("hello, world") |
| // } |
| // |
| // and forgetting to indent it. |
| // The heuristic will never trigger on a gofmt'ed comment, |
| // because any gofmt'ed code block or list would be |
| // followed by a blank line or end of comment. |
| if end < len(lines) && strings.HasPrefix(lines[end], "}") { |
| end++ |
| } |
| |
| if isList(lines[start]) { |
| kind = spanList |
| } else { |
| kind = spanCode |
| } |
| } else { |
| // Unindented. Ends at next blank or indented line. |
| i++ |
| for i < len(lines) && lines[i] != "" && !indented(lines[i]) { |
| i++ |
| } |
| end = i |
| |
| // If unindented lines are followed (without a blank line) |
| // by an indented line that would start a code block, |
| // check whether the final unindented lines |
| // should be left for the indented section. |
| // This can happen for the common mistakes of |
| // unindented code or unindented lists. |
| // The heuristic will never trigger on a gofmt'ed comment, |
| // because any gofmt'ed code block would have a blank line |
| // preceding it after the unindented lines. |
| if i < len(lines) && lines[i] != "" && !isList(lines[i]) { |
| switch { |
| case isList(lines[i-1]): |
| // If the final unindented line looks like a list item, |
| // this may be the first indented line wrap of |
| // a mistakenly unindented list. |
| // Leave all the unindented list items. |
| forceIndent = end |
| end-- |
| for end > start && isList(lines[end-1]) { |
| end-- |
| } |
| |
| case strings.HasSuffix(lines[i-1], "{") || strings.HasSuffix(lines[i-1], `\`): |
| // If the final unindented line ended in { or \ |
| // it is probably the start of a misindented code block. |
| // Give the user a single line fix. |
| // Often that's enough; if not, the user can fix the others themselves. |
| forceIndent = end |
| end-- |
| } |
| |
| if start == end && forceIndent > start { |
| i = start |
| continue Spans |
| } |
| } |
| |
| // Span is either paragraph or heading. |
| if end-start == 1 && isHeading(lines[start]) { |
| kind = spanHeading |
| } else if end-start == 1 && isOldHeading(lines[start], lines, start) { |
| kind = spanOldHeading |
| } else { |
| kind = spanPara |
| } |
| } |
| |
| spans = append(spans, span{start, end, kind}) |
| i = end |
| } |
| |
| return spans |
| } |
| |
| // indented reports whether line is indented |
| // (starts with a leading space or tab). |
| func indented(line string) bool { |
| return line != "" && (line[0] == ' ' || line[0] == '\t') |
| } |
| |
| // unindent removes any common space/tab prefix |
| // from each line in lines, returning a copy of lines in which |
| // those prefixes have been trimmed from each line. |
| // It also replaces any lines containing only spaces with blank lines (empty strings). |
| func unindent(lines []string) []string { |
| // Trim leading and trailing blank lines. |
| for len(lines) > 0 && isBlank(lines[0]) { |
| lines = lines[1:] |
| } |
| for len(lines) > 0 && isBlank(lines[len(lines)-1]) { |
| lines = lines[:len(lines)-1] |
| } |
| if len(lines) == 0 { |
| return nil |
| } |
| |
| // Compute and remove common indentation. |
| prefix := leadingSpace(lines[0]) |
| for _, line := range lines[1:] { |
| if !isBlank(line) { |
| prefix = commonPrefix(prefix, leadingSpace(line)) |
| } |
| } |
| |
| out := make([]string, len(lines)) |
| for i, line := range lines { |
| line = strings.TrimPrefix(line, prefix) |
| if strings.TrimSpace(line) == "" { |
| line = "" |
| } |
| out[i] = line |
| } |
| for len(out) > 0 && out[0] == "" { |
| out = out[1:] |
| } |
| for len(out) > 0 && out[len(out)-1] == "" { |
| out = out[:len(out)-1] |
| } |
| return out |
| } |
| |
| // isBlank reports whether s is a blank line. |
| func isBlank(s string) bool { |
| return len(s) == 0 || (len(s) == 1 && s[0] == '\n') |
| } |
| |
| // commonPrefix returns the longest common prefix of a and b. |
| func commonPrefix(a, b string) string { |
| i := 0 |
| for i < len(a) && i < len(b) && a[i] == b[i] { |
| i++ |
| } |
| return a[0:i] |
| } |
| |
| // leadingSpace returns the longest prefix of s consisting of spaces and tabs. |
| func leadingSpace(s string) string { |
| i := 0 |
| for i < len(s) && (s[i] == ' ' || s[i] == '\t') { |
| i++ |
| } |
| return s[:i] |
| } |
| |
| // isOldHeading reports whether line is an old-style section heading. |
| // line is all[off]. |
| func isOldHeading(line string, all []string, off int) bool { |
| if off <= 0 || all[off-1] != "" || off+2 >= len(all) || all[off+1] != "" || leadingSpace(all[off+2]) != "" { |
| return false |
| } |
| |
| line = strings.TrimSpace(line) |
| |
| // a heading must start with an uppercase letter |
| r, _ := utf8.DecodeRuneInString(line) |
| if !unicode.IsLetter(r) || !unicode.IsUpper(r) { |
| return false |
| } |
| |
| // it must end in a letter or digit: |
| r, _ = utf8.DecodeLastRuneInString(line) |
| if !unicode.IsLetter(r) && !unicode.IsDigit(r) { |
| return false |
| } |
| |
| // exclude lines with illegal characters. we allow "()," |
| if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") { |
| return false |
| } |
| |
| // allow "'" for possessive "'s" only |
| for b := line; ; { |
| var ok bool |
| if _, b, ok = strings.Cut(b, "'"); !ok { |
| break |
| } |
| if b != "s" && !strings.HasPrefix(b, "s ") { |
| return false // ' not followed by s and then end-of-word |
| } |
| } |
| |
| // allow "." when followed by non-space |
| for b := line; ; { |
| var ok bool |
| if _, b, ok = strings.Cut(b, "."); !ok { |
| break |
| } |
| if b == "" || strings.HasPrefix(b, " ") { |
| return false // not followed by non-space |
| } |
| } |
| |
| return true |
| } |
| |
| // oldHeading returns the *Heading for the given old-style section heading line. |
| func (d *parseDoc) oldHeading(line string) Block { |
| return &Heading{Text: []Text{Plain(strings.TrimSpace(line))}} |
| } |
| |
| // isHeading reports whether line is a new-style section heading. |
| func isHeading(line string) bool { |
| return len(line) >= 2 && |
| line[0] == '#' && |
| (line[1] == ' ' || line[1] == '\t') && |
| strings.TrimSpace(line) != "#" |
| } |
| |
| // heading returns the *Heading for the given new-style section heading line. |
| func (d *parseDoc) heading(line string) Block { |
| return &Heading{Text: []Text{Plain(strings.TrimSpace(line[1:]))}} |
| } |
| |
| // code returns a code block built from the lines. |
| func (d *parseDoc) code(lines []string) *Code { |
| body := unindent(lines) |
| body = append(body, "") // to get final \n from Join |
| return &Code{Text: strings.Join(body, "\n")} |
| } |
| |
| // paragraph returns a paragraph block built from the lines. |
| // If the lines are link definitions, paragraph adds them to d and returns nil. |
| func (d *parseDoc) paragraph(lines []string) Block { |
| // Is this a block of known links? Handle. |
| var defs []*LinkDef |
| for _, line := range lines { |
| def, ok := parseLink(line) |
| if !ok { |
| goto NoDefs |
| } |
| defs = append(defs, def) |
| } |
| for _, def := range defs { |
| d.Links = append(d.Links, def) |
| if d.links[def.Text] == nil { |
| d.links[def.Text] = def |
| } |
| } |
| return nil |
| NoDefs: |
| |
| return &Paragraph{Text: []Text{Plain(strings.Join(lines, "\n"))}} |
| } |
| |
| // parseLink parses a single link definition line: |
| // |
| // [text]: url |
| // |
| // It returns the link definition and whether the line was well formed. |
| func parseLink(line string) (*LinkDef, bool) { |
| if line == "" || line[0] != '[' { |
| return nil, false |
| } |
| i := strings.Index(line, "]:") |
| if i < 0 || i+3 >= len(line) || (line[i+2] != ' ' && line[i+2] != '\t') { |
| return nil, false |
| } |
| |
| text := line[1:i] |
| url := strings.TrimSpace(line[i+3:]) |
| j := strings.Index(url, "://") |
| if j < 0 || !isScheme(url[:j]) { |
| return nil, false |
| } |
| |
| // Line has right form and has valid scheme://. |
| // That's good enough for us - we are not as picky |
| // about the characters beyond the :// as we are |
| // when extracting inline URLs from text. |
| return &LinkDef{Text: text, URL: url}, true |
| } |
| |
| // list returns a list built from the indented lines, |
| // using forceBlankBefore as the value of the List's ForceBlankBefore field. |
| func (d *parseDoc) list(lines []string, forceBlankBefore bool) *List { |
| num, _, _ := listMarker(lines[0]) |
| var ( |
| list *List = &List{ForceBlankBefore: forceBlankBefore} |
| item *ListItem |
| text []string |
| ) |
| flush := func() { |
| if item != nil { |
| if para := d.paragraph(text); para != nil { |
| item.Content = append(item.Content, para) |
| } |
| } |
| text = nil |
| } |
| |
| for _, line := range lines { |
| if n, after, ok := listMarker(line); ok && (n != "") == (num != "") { |
| // start new list item |
| flush() |
| |
| item = &ListItem{Number: n} |
| list.Items = append(list.Items, item) |
| line = after |
| } |
| line = strings.TrimSpace(line) |
| if line == "" { |
| list.ForceBlankBetween = true |
| flush() |
| continue |
| } |
| text = append(text, strings.TrimSpace(line)) |
| } |
| flush() |
| return list |
| } |
| |
| // listMarker parses the line as beginning with a list marker. |
| // If it can do that, it returns the numeric marker ("" for a bullet list), |
| // the rest of the line, and ok == true. |
| // Otherwise, it returns "", "", false. |
| func listMarker(line string) (num, rest string, ok bool) { |
| line = strings.TrimSpace(line) |
| if line == "" { |
| return "", "", false |
| } |
| |
| // Can we find a marker? |
| if r, n := utf8.DecodeRuneInString(line); r == '•' || r == '*' || r == '+' || r == '-' { |
| num, rest = "", line[n:] |
| } else if '0' <= line[0] && line[0] <= '9' { |
| n := 1 |
| for n < len(line) && '0' <= line[n] && line[n] <= '9' { |
| n++ |
| } |
| if n >= len(line) || (line[n] != '.' && line[n] != ')') { |
| return "", "", false |
| } |
| num, rest = line[:n], line[n+1:] |
| } else { |
| return "", "", false |
| } |
| |
| if !indented(rest) || strings.TrimSpace(rest) == "" { |
| return "", "", false |
| } |
| |
| return num, rest, true |
| } |
| |
| // isList reports whether the line is the first line of a list, |
| // meaning starts with a list marker after any indentation. |
| // (The caller is responsible for checking the line is indented, as appropriate.) |
| func isList(line string) bool { |
| _, _, ok := listMarker(line) |
| return ok |
| } |
| |
| // parseLinkedText parses text that is allowed to contain explicit links, |
| // such as [math.Sin] or [Go home page], into a slice of Text items. |
| // |
| // A “pkg” is only assumed to be a full import path if it starts with |
| // a domain name (a path element with a dot) or is one of the packages |
| // from the standard library (“[os]”, “[encoding/json]”, and so on). |
| // To avoid problems with maps, generics, and array types, doc links |
| // must be both preceded and followed by punctuation, spaces, tabs, |
| // or the start or end of a line. An example problem would be treating |
| // map[ast.Expr]TypeAndValue as containing a link. |
| func (d *parseDoc) parseLinkedText(text string) []Text { |
| var out []Text |
| wrote := 0 |
| flush := func(i int) { |
| if wrote < i { |
| out = d.parseText(out, text[wrote:i], true) |
| wrote = i |
| } |
| } |
| |
| start := -1 |
| var buf []byte |
| for i := 0; i < len(text); i++ { |
| c := text[i] |
| if c == '\n' || c == '\t' { |
| c = ' ' |
| } |
| switch c { |
| case '[': |
| start = i |
| case ']': |
| if start >= 0 { |
| if def, ok := d.links[string(buf)]; ok { |
| def.Used = true |
| flush(start) |
| out = append(out, &Link{ |
| Text: d.parseText(nil, text[start+1:i], false), |
| URL: def.URL, |
| }) |
| wrote = i + 1 |
| } else if link, ok := d.docLink(text[start+1:i], text[:start], text[i+1:]); ok { |
| flush(start) |
| link.Text = d.parseText(nil, text[start+1:i], false) |
| out = append(out, link) |
| wrote = i + 1 |
| } |
| } |
| start = -1 |
| buf = buf[:0] |
| } |
| if start >= 0 && i != start { |
| buf = append(buf, c) |
| } |
| } |
| |
| flush(len(text)) |
| return out |
| } |
| |
| // docLink parses text, which was found inside [ ] brackets, |
| // as a doc link if possible, returning the DocLink and ok == true |
| // or else nil, false. |
| // The before and after strings are the text before the [ and after the ] |
| // on the same line. Doc links must be preceded and followed by |
| // punctuation, spaces, tabs, or the start or end of a line. |
| func (d *parseDoc) docLink(text, before, after string) (link *DocLink, ok bool) { |
| if before != "" { |
| r, _ := utf8.DecodeLastRuneInString(before) |
| if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { |
| return nil, false |
| } |
| } |
| if after != "" { |
| r, _ := utf8.DecodeRuneInString(after) |
| if !unicode.IsPunct(r) && r != ' ' && r != '\t' && r != '\n' { |
| return nil, false |
| } |
| } |
| if strings.HasPrefix(text, "*") { |
| text = text[1:] |
| } |
| pkg, name, ok := splitDocName(text) |
| var recv string |
| if ok { |
| pkg, recv, _ = splitDocName(pkg) |
| } |
| if pkg != "" { |
| if pkg, ok = d.lookupPkg(pkg); !ok { |
| return nil, false |
| } |
| } else { |
| if ok = d.lookupSym(recv, name); !ok { |
| return nil, false |
| } |
| } |
| link = &DocLink{ |
| ImportPath: pkg, |
| Recv: recv, |
| Name: name, |
| } |
| return link, true |
| } |
| |
| // If text is of the form before.Name, where Name is a capitalized Go identifier, |
| // then splitDocName returns before, name, true. |
| // Otherwise it returns text, "", false. |
| func splitDocName(text string) (before, name string, foundDot bool) { |
| i := strings.LastIndex(text, ".") |
| name = text[i+1:] |
| if !isName(name) { |
| return text, "", false |
| } |
| if i >= 0 { |
| before = text[:i] |
| } |
| return before, name, true |
| } |
| |
| // parseText parses s as text and returns the result of appending |
| // those parsed Text elements to out. |
| // parseText does not handle explicit links like [math.Sin] or [Go home page]: |
| // those are handled by parseLinkedText. |
| // If autoLink is true, then parseText recognizes URLs and words from d.Words |
| // and converts those to links as appropriate. |
| func (d *parseDoc) parseText(out []Text, s string, autoLink bool) []Text { |
| var w strings.Builder |
| wrote := 0 |
| writeUntil := func(i int) { |
| w.WriteString(s[wrote:i]) |
| wrote = i |
| } |
| flush := func(i int) { |
| writeUntil(i) |
| if w.Len() > 0 { |
| out = append(out, Plain(w.String())) |
| w.Reset() |
| } |
| } |
| for i := 0; i < len(s); { |
| t := s[i:] |
| if autoLink { |
| if url, ok := autoURL(t); ok { |
| flush(i) |
| // Note: The old comment parser would look up the URL in words |
| // and replace the target with words[URL] if it was non-empty. |
| // That would allow creating links that display as one URL but |
| // when clicked go to a different URL. Not sure what the point |
| // of that is, so we're not doing that lookup here. |
| out = append(out, &Link{Auto: true, Text: []Text{Plain(url)}, URL: url}) |
| i += len(url) |
| wrote = i |
| continue |
| } |
| if id, ok := ident(t); ok { |
| url, italics := d.Words[id] |
| if !italics { |
| i += len(id) |
| continue |
| } |
| flush(i) |
| if url == "" { |
| out = append(out, Italic(id)) |
| } else { |
| out = append(out, &Link{Auto: true, Text: []Text{Italic(id)}, URL: url}) |
| } |
| i += len(id) |
| wrote = i |
| continue |
| } |
| } |
| switch { |
| case strings.HasPrefix(t, "``"): |
| if len(t) >= 3 && t[2] == '`' { |
| // Do not convert `` inside ```, in case people are mistakenly writing Markdown. |
| i += 3 |
| for i < len(t) && t[i] == '`' { |
| i++ |
| } |
| break |
| } |
| writeUntil(i) |
| w.WriteRune('“') |
| i += 2 |
| wrote = i |
| case strings.HasPrefix(t, "''"): |
| writeUntil(i) |
| w.WriteRune('”') |
| i += 2 |
| wrote = i |
| default: |
| i++ |
| } |
| } |
| flush(len(s)) |
| return out |
| } |
| |
| // autoURL checks whether s begins with a URL that should be hyperlinked. |
| // If so, it returns the URL, which is a prefix of s, and ok == true. |
| // Otherwise it returns "", false. |
| // The caller should skip over the first len(url) bytes of s |
| // before further processing. |
| func autoURL(s string) (url string, ok bool) { |
| // Find the ://. Fast path to pick off non-URL, |
| // since we call this at every position in the string. |
| // The shortest possible URL is ftp://x, 7 bytes. |
| var i int |
| switch { |
| case len(s) < 7: |
| return "", false |
| case s[3] == ':': |
| i = 3 |
| case s[4] == ':': |
| i = 4 |
| case s[5] == ':': |
| i = 5 |
| case s[6] == ':': |
| i = 6 |
| default: |
| return "", false |
| } |
| if i+3 > len(s) || s[i:i+3] != "://" { |
| return "", false |
| } |
| |
| // Check valid scheme. |
| if !isScheme(s[:i]) { |
| return "", false |
| } |
| |
| // Scan host part. Must have at least one byte, |
| // and must start and end in non-punctuation. |
| i += 3 |
| if i >= len(s) || !isHost(s[i]) || isPunct(s[i]) { |
| return "", false |
| } |
| i++ |
| end := i |
| for i < len(s) && isHost(s[i]) { |
| if !isPunct(s[i]) { |
| end = i + 1 |
| } |
| i++ |
| } |
| i = end |
| |
| // At this point we are definitely returning a URL (scheme://host). |
| // We just have to find the longest path we can add to it. |
| // Heuristics abound. |
| // We allow parens, braces, and brackets, |
| // but only if they match (#5043, #22285). |
| // We allow .,:;?! in the path but not at the end, |
| // to avoid end-of-sentence punctuation (#18139, #16565). |
| stk := []byte{} |
| end = i |
| Path: |
| for ; i < len(s); i++ { |
| if isPunct(s[i]) { |
| continue |
| } |
| if !isPath(s[i]) { |
| break |
| } |
| switch s[i] { |
| case '(': |
| stk = append(stk, ')') |
| case '{': |
| stk = append(stk, '}') |
| case '[': |
| stk = append(stk, ']') |
| case ')', '}', ']': |
| if len(stk) == 0 || stk[len(stk)-1] != s[i] { |
| break Path |
| } |
| stk = stk[:len(stk)-1] |
| } |
| if len(stk) == 0 { |
| end = i + 1 |
| } |
| } |
| |
| return s[:end], true |
| } |
| |
| // isScheme reports whether s is a recognized URL scheme. |
| // Note that if strings of new length (beyond 3-7) |
| // are added here, the fast path at the top of autoURL will need updating. |
| func isScheme(s string) bool { |
| switch s { |
| case "file", |
| "ftp", |
| "gopher", |
| "http", |
| "https", |
| "mailto", |
| "nntp": |
| return true |
| } |
| return false |
| } |
| |
| // isHost reports whether c is a byte that can appear in a URL host, |
| // like www.example.com or user@[::1]:8080 |
| func isHost(c byte) bool { |
| // mask is a 128-bit bitmap with 1s for allowed bytes, |
| // so that the byte c can be tested with a shift and an and. |
| // If c > 128, then 1<<c and 1<<(c-64) will both be zero, |
| // and this function will return false. |
| const mask = 0 | |
| (1<<26-1)<<'A' | |
| (1<<26-1)<<'a' | |
| (1<<10-1)<<'0' | |
| 1<<'_' | |
| 1<<'@' | |
| 1<<'-' | |
| 1<<'.' | |
| 1<<'[' | |
| 1<<']' | |
| 1<<':' |
| |
| return ((uint64(1)<<c)&(mask&(1<<64-1)) | |
| (uint64(1)<<(c-64))&(mask>>64)) != 0 |
| } |
| |
| // isPunct reports whether c is a punctuation byte that can appear |
| // inside a path but not at the end. |
| func isPunct(c byte) bool { |
| // mask is a 128-bit bitmap with 1s for allowed bytes, |
| // so that the byte c can be tested with a shift and an and. |
| // If c > 128, then 1<<c and 1<<(c-64) will both be zero, |
| // and this function will return false. |
| const mask = 0 | |
| 1<<'.' | |
| 1<<',' | |
| 1<<':' | |
| 1<<';' | |
| 1<<'?' | |
| 1<<'!' |
| |
| return ((uint64(1)<<c)&(mask&(1<<64-1)) | |
| (uint64(1)<<(c-64))&(mask>>64)) != 0 |
| } |
| |
| // isPath reports whether c is a (non-punctuation) path byte. |
| func isPath(c byte) bool { |
| // mask is a 128-bit bitmap with 1s for allowed bytes, |
| // so that the byte c can be tested with a shift and an and. |
| // If c > 128, then 1<<c and 1<<(c-64) will both be zero, |
| // and this function will return false. |
| const mask = 0 | |
| (1<<26-1)<<'A' | |
| (1<<26-1)<<'a' | |
| (1<<10-1)<<'0' | |
| 1<<'$' | |
| 1<<'\'' | |
| 1<<'(' | |
| 1<<')' | |
| 1<<'*' | |
| 1<<'+' | |
| 1<<'&' | |
| 1<<'#' | |
| 1<<'=' | |
| 1<<'@' | |
| 1<<'~' | |
| 1<<'_' | |
| 1<<'/' | |
| 1<<'-' | |
| 1<<'[' | |
| 1<<']' | |
| 1<<'{' | |
| 1<<'}' | |
| 1<<'%' |
| |
| return ((uint64(1)<<c)&(mask&(1<<64-1)) | |
| (uint64(1)<<(c-64))&(mask>>64)) != 0 |
| } |
| |
| // isName reports whether s is a capitalized Go identifier (like Name). |
| func isName(s string) bool { |
| t, ok := ident(s) |
| if !ok || t != s { |
| return false |
| } |
| r, _ := utf8.DecodeRuneInString(s) |
| return unicode.IsUpper(r) |
| } |
| |
| // ident checks whether s begins with a Go identifier. |
| // If so, it returns the identifier, which is a prefix of s, and ok == true. |
| // Otherwise it returns "", false. |
| // The caller should skip over the first len(id) bytes of s |
| // before further processing. |
| func ident(s string) (id string, ok bool) { |
| // Scan [\pL_][\pL_0-9]* |
| n := 0 |
| for n < len(s) { |
| if c := s[n]; c < utf8.RuneSelf { |
| if isIdentASCII(c) && (n > 0 || c < '0' || c > '9') { |
| n++ |
| continue |
| } |
| break |
| } |
| r, nr := utf8.DecodeRuneInString(s[n:]) |
| if unicode.IsLetter(r) { |
| n += nr |
| continue |
| } |
| break |
| } |
| return s[:n], n > 0 |
| } |
| |
| // isIdentASCII reports whether c is an ASCII identifier byte. |
| func isIdentASCII(c byte) bool { |
| // mask is a 128-bit bitmap with 1s for allowed bytes, |
| // so that the byte c can be tested with a shift and an and. |
| // If c > 128, then 1<<c and 1<<(c-64) will both be zero, |
| // and this function will return false. |
| const mask = 0 | |
| (1<<26-1)<<'A' | |
| (1<<26-1)<<'a' | |
| (1<<10-1)<<'0' | |
| 1<<'_' |
| |
| return ((uint64(1)<<c)&(mask&(1<<64-1)) | |
| (uint64(1)<<(c-64))&(mask>>64)) != 0 |
| } |
| |
| // validImportPath reports whether path is a valid import path. |
| // It is a lightly edited copy of golang.org/x/mod/module.CheckImportPath. |
| func validImportPath(path string) bool { |
| if !utf8.ValidString(path) { |
| return false |
| } |
| if path == "" { |
| return false |
| } |
| if path[0] == '-' { |
| return false |
| } |
| if strings.Contains(path, "//") { |
| return false |
| } |
| if path[len(path)-1] == '/' { |
| return false |
| } |
| elemStart := 0 |
| for i, r := range path { |
| if r == '/' { |
| if !validImportPathElem(path[elemStart:i]) { |
| return false |
| } |
| elemStart = i + 1 |
| } |
| } |
| return validImportPathElem(path[elemStart:]) |
| } |
| |
| func validImportPathElem(elem string) bool { |
| if elem == "" || elem[0] == '.' || elem[len(elem)-1] == '.' { |
| return false |
| } |
| for i := 0; i < len(elem); i++ { |
| if !importPathOK(elem[i]) { |
| return false |
| } |
| } |
| return true |
| } |
| |
| func importPathOK(c byte) bool { |
| // mask is a 128-bit bitmap with 1s for allowed bytes, |
| // so that the byte c can be tested with a shift and an and. |
| // If c > 128, then 1<<c and 1<<(c-64) will both be zero, |
| // and this function will return false. |
| const mask = 0 | |
| (1<<26-1)<<'A' | |
| (1<<26-1)<<'a' | |
| (1<<10-1)<<'0' | |
| 1<<'-' | |
| 1<<'.' | |
| 1<<'~' | |
| 1<<'_' | |
| 1<<'+' |
| |
| return ((uint64(1)<<c)&(mask&(1<<64-1)) | |
| (uint64(1)<<(c-64))&(mask>>64)) != 0 |
| } |