| // Copyright 2017 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package render |
| |
| import ( |
| "bytes" |
| "fmt" |
| "go/ast" |
| "go/printer" |
| "go/scanner" |
| "go/token" |
| "html/template" |
| "io" |
| "regexp" |
| "strconv" |
| "strings" |
| |
| "golang.org/x/pkgsite/internal/fetch/internal/doc" |
| ) |
| |
| /* |
| This logic is responsible for converting documentation comments and AST nodes |
| into formatted HTML. This relies on identifierResolver.toHTML to do the work |
| of converting words into links. |
| */ |
| |
| // TODO(golang.org/issue/17056): Support hiding deprecated declarations. |
| |
| const ( |
| // Regexp for URLs. |
| // Match any ".,:;?!" within path, but not at end (see #18139, #16565). |
| // This excludes some rare yet valid URLs ending in common punctuation |
| // in order to allow sentences ending in URLs. |
| urlRx = protoPart + `://` + hostPart + pathPart |
| |
| // Protocol (e.g. "http"). |
| protoPart = `(https?|s?ftps?|file|gopher|mailto|nntp)` |
| // Host (e.g. "www.example.com" or "[::1]:8080"). |
| hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)` |
| // Optional path, query, fragment (e.g. "/path/index.html?q=foo#bar"). |
| pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*` |
| |
| // Regexp for Go identifiers. |
| identRx = `[\pL_][\pL_0-9]*` |
| qualIdentRx = identRx + `(\.` + identRx + `)*` |
| ) |
| |
| var ( |
| matchRx = regexp.MustCompile(urlRx + `|` + qualIdentRx) |
| badAnchorRx = regexp.MustCompile(`[^a-zA-Z0-9]`) |
| ) |
| |
| func (r *Renderer) declHTML(doc string, decl ast.Decl) (out struct{ Doc, Decl template.HTML }) { |
| dids := newDeclIDs(decl) |
| idr := &identifierResolver{r.pids, dids, r.packageURL} |
| if doc != "" { |
| var b bytes.Buffer |
| for _, blk := range docToBlocks(doc) { |
| switch blk := blk.(type) { |
| case *paragraph: |
| b.WriteString("<p>\n") |
| for _, line := range blk.lines { |
| r.formatLineHTML(&b, line, idr) |
| b.WriteString("\n") |
| } |
| b.WriteString("</p>\n") |
| case *preformat: |
| b.WriteString("<pre>\n") |
| for _, line := range blk.lines { |
| r.formatLineHTML(&b, line, nil) |
| b.WriteString("\n") |
| } |
| b.WriteString("</pre>\n") |
| case *heading: |
| id := badAnchorRx.ReplaceAllString(blk.title, "_") |
| b.WriteString(`<h3 id="hdr-` + id + `">`) |
| b.WriteString(template.HTMLEscapeString(blk.title)) |
| if !r.disablePermalinks { |
| b.WriteString(` <a href="#hdr-` + id + `">¶</a>`) |
| } |
| b.WriteString("</h3>\n") |
| } |
| } |
| out.Doc = template.HTML(b.String()) |
| } |
| if decl != nil { |
| var b bytes.Buffer |
| b.WriteString("<pre>\n") |
| r.formatDeclHTML(&b, decl, idr) |
| b.WriteString("</pre>\n") |
| out.Decl = template.HTML(b.String()) |
| } |
| return out |
| } |
| |
| func (r *Renderer) codeHTML(code interface{}) template.HTML { |
| // TODO: Should we perform hotlinking for comments and code? |
| if code == nil { |
| return "" |
| } |
| |
| var b bytes.Buffer |
| p := printer.Config{Mode: printer.UseSpaces | printer.TabIndent, Tabwidth: 4} |
| p.Fprint(&b, r.fset, code) |
| src := b.String() |
| |
| // If code is an *ast.BlockStmt, then trim the braces. |
| var indent string |
| if len(src) >= 4 && strings.HasPrefix(src, "{\n") && strings.HasSuffix(src, "\n}") { |
| src = strings.Trim(src[2:len(src)-2], "\n") |
| indent = src[:indentLength(src)] |
| if len(indent) > 0 { |
| src = strings.TrimPrefix(src, indent) // handle remaining indents later |
| } |
| } |
| |
| // Scan through the source code, adding comment spans for comments, |
| // and stripping the trailing example output. |
| var bb bytes.Buffer |
| var lastOffset int // last src offset copied to output buffer |
| var outputOffset int // index in output buffer of output comment |
| var s scanner.Scanner |
| fset := token.NewFileSet() |
| file := fset.AddFile("", fset.Base(), len(src)) |
| s.Init(file, []byte(src), nil, scanner.ScanComments) |
| bb.WriteString("<pre>\n") |
| indent = "\n" + indent // prepend newline for easier search-and-replace. |
| scan: |
| for { |
| p, tok, lit := s.Scan() |
| offset := file.Offset(p) // current offset into source file |
| prev := src[lastOffset:offset] |
| prev = strings.Replace(prev, indent, "\n", -1) |
| bb.WriteString(template.HTMLEscapeString(prev)) |
| lastOffset = offset |
| switch tok { |
| case token.EOF: |
| break scan |
| case token.COMMENT: |
| if exampleOutputRx.MatchString(lit) && outputOffset == 0 { |
| outputOffset = bb.Len() |
| } |
| bb.WriteString(`<span class="comment">`) |
| lit = strings.Replace(lit, indent, "\n", -1) |
| bb.WriteString(template.HTMLEscapeString(lit)) |
| bb.WriteString(`</span>`) |
| lastOffset += len(lit) |
| case token.STRING: |
| // Avoid replacing indents in multi-line string literals. |
| outputOffset = 0 |
| bb.WriteString(template.HTMLEscapeString(lit)) |
| lastOffset += len(lit) |
| default: |
| outputOffset = 0 |
| } |
| } |
| |
| if outputOffset > 0 { |
| bb.Truncate(outputOffset) |
| } |
| for bb.Len() > 0 && bb.Bytes()[bb.Len()-1] == '\n' { |
| bb.Truncate(bb.Len() - 1) // trim trailing newlines |
| } |
| bb.WriteByte('\n') |
| bb.WriteString("</pre>\n") |
| return template.HTML(bb.String()) |
| } |
| |
| // formatLineHTML formats the line as HTML-annotated text. |
| // URLs and Go identifiers are linked to corresponding declarations. |
| func (r *Renderer) formatLineHTML(w io.Writer, line string, idr *identifierResolver) { |
| var lastChar, nextChar byte |
| var numQuotes int |
| for len(line) > 0 { |
| m0, m1 := len(line), len(line) |
| if m := matchRx.FindStringIndex(line); m != nil { |
| m0, m1 = m[0], m[1] |
| } |
| if m0 > 0 { |
| nonWord := line[:m0] |
| io.WriteString(w, template.HTMLEscapeString(nonWord)) |
| lastChar = nonWord[len(nonWord)-1] |
| numQuotes += countQuotes(nonWord) |
| } |
| if m1 > m0 { |
| word := line[m0:m1] |
| nextChar = 0 |
| if m1 < len(line) { |
| nextChar = line[m1] |
| } |
| |
| // Reduce false-positives by having a list of allowed |
| // characters preceding and succeeding an identifier. |
| // Also, forbid ID linking within unbalanced quotes on same line. |
| validPrefix := strings.IndexByte("\x00 \t()[]*\n", lastChar) >= 0 |
| validSuffix := strings.IndexByte("\x00 \t()[]:;,.'\n", nextChar) >= 0 |
| forbidLinking := !validPrefix || !validSuffix || numQuotes%2 != 0 |
| |
| // TODO: Should we provide hotlinks for related packages? |
| |
| switch { |
| case strings.Contains(word, "://"): |
| // Forbid closing brackets without prior opening brackets. |
| // See https://golang.org/issue/22285. |
| if i := strings.IndexByte(word, ')'); i >= 0 && i < strings.IndexByte(word, '(') { |
| m1 = m0 + i |
| word = line[m0:m1] |
| } |
| if i := strings.IndexByte(word, ']'); i >= 0 && i < strings.IndexByte(word, '[') { |
| m1 = m0 + i |
| word = line[m0:m1] |
| } |
| |
| // Require balanced pairs of parentheses. |
| // See https://golang.org/issue/5043. |
| for i := 0; strings.Count(word, "(") != strings.Count(word, ")") && i < 10; i++ { |
| m1 = strings.LastIndexAny(line[:m1], "()") |
| word = line[m0:m1] |
| } |
| for i := 0; strings.Count(word, "[") != strings.Count(word, "]") && i < 10; i++ { |
| m1 = strings.LastIndexAny(line[:m1], "[]") |
| word = line[m0:m1] |
| } |
| |
| word := template.HTMLEscapeString(word) |
| fmt.Fprintf(w, `<a href="%s">%s</a>`, word, word) |
| case !forbidLinking && !r.disableHotlinking && idr != nil: // && numQuotes%2 == 0: |
| io.WriteString(w, idr.toHTML(word)) |
| default: |
| io.WriteString(w, template.HTMLEscapeString(word)) |
| } |
| numQuotes += countQuotes(word) |
| } |
| line = line[m1:] |
| } |
| } |
| |
| func countQuotes(s string) int { |
| n := -1 // loop always iterates at least once |
| for i := len(s); i >= 0; i = strings.LastIndexAny(s[:i], `"“”`) { |
| n++ |
| } |
| return n |
| } |
| |
| // formatDeclHTML formats the decl as HTML-annotated source code for the |
| // provided decl. Type identifiers are linked to corresponding declarations. |
| func (r *Renderer) formatDeclHTML(w io.Writer, decl ast.Decl, idr *identifierResolver) { |
| // Generate all anchor points and links for the given decl. |
| anchorPointsMap := generateAnchorPoints(decl) |
| anchorLinksMap := generateAnchorLinks(idr, decl) |
| |
| // Convert the maps (keyed by *ast.Ident) to slices of idKinds or URLs. |
| // |
| // This relies on the ast.Inspect and scanner.Scanner both |
| // visiting *ast.Ident and token.IDENT nodes in the same order. |
| var anchorPoints []idKind |
| var anchorLinks []string |
| ast.Inspect(decl, func(node ast.Node) bool { |
| if id, ok := node.(*ast.Ident); ok { |
| anchorPoints = append(anchorPoints, anchorPointsMap[id]) |
| anchorLinks = append(anchorLinks, anchorLinksMap[id]) |
| } |
| return true |
| }) |
| |
| // Trim large string literals and slices. |
| v := &declVisitor{} |
| ast.Walk(v, decl) |
| |
| // Format decl as Go source code file. |
| var b bytes.Buffer |
| p := printer.Config{Mode: printer.UseSpaces | printer.TabIndent, Tabwidth: 4} |
| p.Fprint(&b, r.fset, &printer.CommentedNode{Node: decl, Comments: v.Comments}) |
| src := b.Bytes() |
| fset := token.NewFileSet() |
| file := fset.AddFile("", fset.Base(), b.Len()) |
| |
| // anchorLines is a list of anchor IDs that should be placed for each line. |
| // lineTypes is a list of the type (e.g., comment or code) of each line. |
| type lineType byte |
| const codeType, commentType lineType = 1 << 0, 1 << 1 // may OR together |
| numLines := bytes.Count(src, []byte("\n")) + 1 |
| anchorLines := make([][]idKind, numLines) |
| lineTypes := make([]lineType, numLines) |
| |
| // Scan through the source code, appropriately annotating it with HTML spans |
| // for comments, and HTML links and anchors for relevant identifiers. |
| var bb bytes.Buffer // temporary output buffer |
| var idIdx int // current index in anchorPoints and anchorLinks |
| var lastOffset int // last src offset copied to output buffer |
| var s scanner.Scanner |
| s.Init(file, src, nil, scanner.ScanComments) |
| scan: |
| for { |
| p, tok, lit := s.Scan() |
| line := file.Line(p) - 1 // current 0-indexed line number |
| offset := file.Offset(p) // current offset into source file |
| tokType := codeType // current token type (assume source code) |
| |
| template.HTMLEscape(&bb, src[lastOffset:offset]) |
| lastOffset = offset |
| switch tok { |
| case token.EOF: |
| break scan |
| case token.COMMENT: |
| tokType = commentType |
| bb.WriteString(`<span class="comment">`) |
| r.formatLineHTML(&bb, lit, idr) |
| bb.WriteString(`</span>`) |
| lastOffset += len(lit) |
| case token.IDENT: |
| if idIdx < len(anchorPoints) && anchorPoints[idIdx].id != "" { |
| anchorLines[line] = append(anchorLines[line], anchorPoints[idIdx]) |
| } |
| if idIdx < len(anchorLinks) && anchorLinks[idIdx] != "" { |
| u := template.HTMLEscapeString(anchorLinks[idIdx]) |
| s := template.HTMLEscapeString(lit) |
| fmt.Fprintf(&bb, `<a href="%s">%s</a>`, u, s) |
| lastOffset += len(lit) |
| } |
| idIdx++ |
| } |
| for i := strings.Count(strings.TrimSuffix(lit, "\n"), "\n"); i >= 0; i-- { |
| lineTypes[line+i] |= tokType |
| } |
| } |
| |
| // Move anchor points up to the start of a comment |
| // if the next line has no anchors. |
| for i := range anchorLines { |
| if i+1 == len(anchorLines) || len(anchorLines[i+1]) == 0 { |
| j := i |
| for j > 0 && lineTypes[j-1] == commentType { |
| j-- |
| } |
| anchorLines[i], anchorLines[j] = anchorLines[j], anchorLines[i] |
| } |
| } |
| |
| // Emit anchor IDs and data-kind attributes for each relevant line. |
| for _, iks := range anchorLines { |
| for _, ik := range iks { |
| // Attributes for types and functions are handled in the template |
| // that generates the full documentation HTML. |
| if ik.kind == "function" || ik.kind == "type" { |
| continue |
| } |
| // Top-level methods are handled in the template, but interface methods |
| // are handled here. |
| if fd, ok := decl.(*ast.FuncDecl); ok && fd.Recv != nil { |
| continue |
| } |
| fmt.Fprintf(w, `<span id="%s" data-kind="%s"></span>`, |
| template.HTMLEscapeString(ik.id), ik.kind) |
| } |
| b, _ := bb.ReadBytes('\n') |
| w.Write(b) // write remainder of line (contains newline) |
| } |
| } |
| |
| // declVisitor is used to walk over the AST and trim large string |
| // literals and arrays before package documentation is rendered. |
| // Comments are added to Comments to indicate that a part of the |
| // original code is not displayed. |
| type declVisitor struct { |
| Comments []*ast.CommentGroup |
| } |
| |
| // Visit implements ast.Visitor. |
| func (v *declVisitor) Visit(n ast.Node) ast.Visitor { |
| switch n := n.(type) { |
| case *ast.BasicLit: |
| if n.Kind == token.STRING && len(n.Value) > 128 { |
| v.Comments = append(v.Comments, |
| &ast.CommentGroup{List: []*ast.Comment{{ |
| Slash: n.Pos(), |
| Text: stringBasicLitSize(n.Value), |
| }}}) |
| n.Value = `""` |
| } |
| case *ast.CompositeLit: |
| if len(n.Elts) > 100 { |
| v.Comments = append(v.Comments, |
| &ast.CommentGroup{List: []*ast.Comment{{ |
| Slash: n.Lbrace, |
| Text: fmt.Sprintf("/* %d elements not displayed */", len(n.Elts)), |
| }}}) |
| n.Elts = n.Elts[:0] |
| } |
| } |
| return v |
| } |
| |
| // stringBasicLitSize computes the number of bytes in the given string basic literal. |
| // |
| // See noder.basicLit and syntax.StringLit cases in cmd/compile/internal/gc/noder.go. |
| func stringBasicLitSize(s string) string { |
| if len(s) > 0 && s[0] == '`' { |
| // strip carriage returns from raw string |
| s = strings.ReplaceAll(s, "\r", "") |
| } |
| u, err := strconv.Unquote(s) |
| if err != nil { |
| return fmt.Sprintf("/* invalid %d byte string literal not displayed */", len(s)) |
| } |
| return fmt.Sprintf("/* %d byte string literal not displayed */", len(u)) |
| } |
| |
| // An idKind holds an anchor ID and the kind of the identifier being anchored. |
| // The valid kinds are: "constant", "variable", "type", "function", "method" and "field". |
| type idKind struct { |
| id, kind string |
| } |
| |
| // generateAnchorPoints returns a mapping of *ast.Ident objects to the |
| // qualified ID that should be set as an anchor point, as well as the kind |
| // of identifer, used in the data-kind attribute. |
| func generateAnchorPoints(decl ast.Decl) map[*ast.Ident]idKind { |
| m := map[*ast.Ident]idKind{} |
| switch decl := decl.(type) { |
| case *ast.GenDecl: |
| for _, sp := range decl.Specs { |
| switch decl.Tok { |
| case token.CONST, token.VAR: |
| kind := "constant" |
| if decl.Tok == token.VAR { |
| kind = "variable" |
| } |
| for _, name := range sp.(*ast.ValueSpec).Names { |
| m[name] = idKind{name.Name, kind} |
| } |
| case token.TYPE: |
| ts := sp.(*ast.TypeSpec) |
| m[ts.Name] = idKind{ts.Name.Name, "type"} |
| |
| var fs []*ast.Field |
| var kind string |
| switch tx := ts.Type.(type) { |
| case *ast.StructType: |
| fs = tx.Fields.List |
| kind = "field" |
| case *ast.InterfaceType: |
| fs = tx.Methods.List |
| kind = "method" |
| } |
| for _, f := range fs { |
| for _, id := range f.Names { |
| m[id] = idKind{ts.Name.String() + "." + id.String(), kind} |
| } |
| // if f.Names == nil, we have an embedded struct field or embedded |
| // interface. |
| // |
| // Don't generate anchor points for embedded interfaces. They |
| // aren't interesting in and of themselves; they just represent an |
| // additional list of methods added to the interface. |
| // |
| // Do generate anchor points for embedded fields: they are |
| // interesting, because their names can be used in selector |
| // expressions and struct literals. |
| if f.Names == nil && kind == "field" { |
| // The name of an embedded field is the type name. |
| typeName, id := nodeName(f.Type) |
| typeName = typeName[strings.LastIndexByte(typeName, '.')+1:] |
| m[id] = idKind{ts.Name.String() + "." + typeName, kind} |
| } |
| } |
| } |
| } |
| case *ast.FuncDecl: |
| anchorID := decl.Name.Name |
| kind := "function" |
| if decl.Recv != nil && len(decl.Recv.List) > 0 { |
| recvName, _ := nodeName(decl.Recv.List[0].Type) |
| recvName = recvName[strings.LastIndexByte(recvName, '.')+1:] |
| anchorID = recvName + "." + anchorID |
| kind = "method" |
| } |
| m[decl.Name] = idKind{anchorID, kind} |
| } |
| return m |
| } |
| |
| // generateAnchorLinks returns a mapping of *ast.Ident objects to the URL |
| // that the identifier should link to. |
| func generateAnchorLinks(idr *identifierResolver, decl ast.Decl) map[*ast.Ident]string { |
| m := map[*ast.Ident]string{} |
| ignore := map[ast.Node]bool{} |
| ast.Inspect(decl, func(node ast.Node) bool { |
| if ignore[node] { |
| return false |
| } |
| switch node := node.(type) { |
| case *ast.SelectorExpr: |
| // Package qualified identifier (e.g., "io.EOF"). |
| if prefix, _ := node.X.(*ast.Ident); prefix != nil { |
| if obj := prefix.Obj; obj != nil && obj.Kind == ast.Pkg { |
| if spec, _ := obj.Decl.(*ast.ImportSpec); spec != nil { |
| if path, err := strconv.Unquote(spec.Path.Value); err == nil { |
| // Register two links, one for the package |
| // and one for the qualified identifier. |
| m[prefix] = idr.toURL(path, "") |
| m[node.Sel] = idr.toURL(path, node.Sel.Name) |
| return false |
| } |
| } |
| } |
| } |
| case *ast.Ident: |
| if node.Obj == nil && doc.IsPredeclared(node.Name) { |
| m[node] = idr.toURL("builtin", node.Name) |
| } else if node.Obj != nil && idr.topLevelDecls[node.Obj.Decl] { |
| m[node] = "#" + node.Name |
| } |
| case *ast.FuncDecl: |
| ignore[node.Name] = true // E.g., "func NoLink() int" |
| case *ast.TypeSpec: |
| ignore[node.Name] = true // E.g., "type NoLink int" |
| case *ast.ValueSpec: |
| for _, n := range node.Names { |
| ignore[n] = true // E.g., "var NoLink1, NoLink2 int" |
| } |
| case *ast.AssignStmt: |
| for _, n := range node.Lhs { |
| ignore[n] = true // E.g., "NoLink1, NoLink2 := 0, 1" |
| } |
| } |
| return true |
| }) |
| return m |
| } |