blob: ae717cc3df73b58258efdcbd332fb279d340ea72 [file] [log] [blame]
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package godoc
import (
"bytes"
"context"
"encoding/gob"
"fmt"
"go/ast"
"go/token"
"io"
"sort"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/godoc/codec"
)
// The encoding type identifies the encoding being used, to distinguish them
// when reading from the DB.
const (
encodingTypeLen = 4 // all encoding types must be this many bytes
gobEncodingType = "AST1"
fastEncodingType = "AST2"
)
// ErrInvalidEncodingType is returned when the data to DecodePackage has an
// invalid encoding type.
var ErrInvalidEncodingType = fmt.Errorf("want initial bytes to be %q or %q but they aren't", gobEncodingType, fastEncodingType)
// Register ast types for gob, so it can decode concrete types that are stored
// in interface variables.
func init() {
for _, n := range []interface{}{
&ast.ArrayType{},
&ast.AssignStmt{},
&ast.BadDecl{},
&ast.BadExpr{},
&ast.BadStmt{},
&ast.BasicLit{},
&ast.BinaryExpr{},
&ast.BlockStmt{},
&ast.BranchStmt{},
&ast.CallExpr{},
&ast.CaseClause{},
&ast.ChanType{},
&ast.CommClause{},
&ast.CommentGroup{},
&ast.Comment{},
&ast.CompositeLit{},
&ast.DeclStmt{},
&ast.DeferStmt{},
&ast.Ellipsis{},
&ast.EmptyStmt{},
&ast.ExprStmt{},
&ast.FieldList{},
&ast.Field{},
&ast.ForStmt{},
&ast.FuncDecl{},
&ast.FuncLit{},
&ast.FuncType{},
&ast.GenDecl{},
&ast.GoStmt{},
&ast.Ident{},
&ast.IfStmt{},
&ast.ImportSpec{},
&ast.IncDecStmt{},
&ast.IndexExpr{},
&ast.InterfaceType{},
&ast.KeyValueExpr{},
&ast.LabeledStmt{},
&ast.MapType{},
&ast.ParenExpr{},
&ast.RangeStmt{},
&ast.ReturnStmt{},
&ast.Scope{},
&ast.SelectStmt{},
&ast.SelectorExpr{},
&ast.SendStmt{},
&ast.SliceExpr{},
&ast.StarExpr{},
&ast.StructType{},
&ast.SwitchStmt{},
&ast.TypeAssertExpr{},
&ast.TypeSpec{},
&ast.TypeSwitchStmt{},
&ast.UnaryExpr{},
&ast.ValueSpec{},
} {
gob.Register(n)
}
}
// Encode encodes a Package into a byte slice.
// During its operation, Encode modifies the AST,
// but it restores it to a state suitable for
// rendering before it returns.
func (p *Package) Encode(ctx context.Context) (_ []byte, err error) {
defer derrors.Wrap(&err, "godoc.Package.Encode()")
return p.fastEncode()
}
// DecodPackage decodes a byte slice encoded with Package.Encode into a Package.
func DecodePackage(data []byte) (_ *Package, err error) {
defer derrors.Wrap(&err, "DecodePackage()")
if len(data) < encodingTypeLen {
return nil, ErrInvalidEncodingType
}
switch string(data[:encodingTypeLen]) {
case gobEncodingType:
return gobDecodePackage(data[encodingTypeLen:])
case fastEncodingType:
return fastDecodePackage(data[encodingTypeLen:])
default:
return nil, ErrInvalidEncodingType
}
}
func gobDecodePackage(data []byte) (_ *Package, err error) {
dec := gob.NewDecoder(bytes.NewReader(data))
p := &Package{Fset: token.NewFileSet()}
if err := p.Fset.Read(dec.Decode); err != nil {
return nil, err
}
if err := dec.Decode(&p.encPackage); err != nil {
return nil, err
}
for _, f := range p.Files {
fixupObjects(f)
}
return p, nil
}
// removeCycles removes cycles from f. There are two sources of cycles
// in an ast.File: Scopes and Objects. Also, some Idents are shared.
//
// removeCycles removes all Scopes, since doc generation doesn't use them. Doc
// generation does use Objects, and it needs object identity to be preserved
// (see internal/doc/example.go). It also needs the Object.Decl field, to create
// anchor links (see dochtml/internal/render/idents.go). The Object.Decl field
// is responsible for cycles. Doc generation It doesn't need the Data or Type
// fields of Object.
//
// We need to break the cycles, and preserve Object identity when decoding. For
// an example of the latter, if ast.Idents A and B both pointed to the same
// Object, gob would write them as two separate objects, and decoding would
// preserve that. (See TestObjectIdentity for a small example of this sort of
// sharing.)
//
// We solve both problems by assigning numbers to Decls and Objects. We first
// walk through the AST to assign the numbers, then walk it again to put the
// numbers into Ident.Objs. We take advantage of the fact that the Data and Decl
// fields are of type interface{}, storing the object number into Data and the
// Decl number into Decl.
//
// The AST includes a list of unresolved Idents, which are shared with Idents
// in the tree itself. We assign these numbers as well, and store the numbers
// in a separate field of File.
func removeCycles(f *File) {
// First pass: assign every Decl, Spec and Ident a number.
// Since these aren't shared and Inspect is deterministic,
// this walk will produce the same sequence of Decls after encoding/decoding.
// Also assign a unique number to each Object we find in an Ident.
// Objects may be shared; traversing the decoded AST would not
// produce the same sequence. So we store their numbers separately.
declNums := map[interface{}]int{}
objNums := map[*ast.Object]int{}
ast.Inspect(f.AST, func(n ast.Node) bool {
if isRelevantDecl(n) {
if _, ok := declNums[n]; ok {
panic(fmt.Sprintf("duplicate decl %+v", n))
}
declNums[n] = len(declNums)
} else if id, ok := n.(*ast.Ident); ok {
declNums[id] = len(declNums) // remember Idents for Unresolved list.
if id.Obj != nil {
if _, ok := objNums[id.Obj]; !ok {
objNums[id.Obj] = len(objNums)
}
}
}
return true
})
// Second pass: put the numbers into Ident.Objs.
// The Decl field gets a number from the declNums map, or nil
// if it's not a relevant Decl.
// The Data field gets a number from the objNums map. (This destroys
// whatever might be in the Data field, but doc generation doesn't care.)
ast.Inspect(f.AST, func(n ast.Node) bool {
id, ok := n.(*ast.Ident)
if !ok || id.Obj == nil {
return true
}
if _, ok := id.Obj.Decl.(int); ok { // seen this object already
return true
}
id.Obj.Type = nil // Not needed for doc gen.
id.Obj.Data, ok = objNums[id.Obj]
if !ok {
panic(fmt.Sprintf("no number for Object %v", id.Obj))
}
if d, ok := declNums[id.Obj.Decl]; ok {
id.Obj.Decl = d
} else {
// We may not have seen this Ident's Decl because the definition was
// removed from the AST, even though references remain. For example,
// an exported var initialized to a call of an unexported function.
// Ignore those by setting the Decl field to -1.
id.Obj.Decl = -1
}
return true
})
// Replace the unresolved identifiers with their numbers.
f.UnresolvedNums = nil
for _, id := range f.AST.Unresolved {
// If we can't find an identifier, assume it was in a part of the AST
// deleted by removeUnusedASTNodes, and ignore it.
if num, ok := declNums[id]; ok {
f.UnresolvedNums = append(f.UnresolvedNums, num)
}
}
f.AST.Unresolved = nil
// Remember only those scope items that have been assigned a number; the others
// are not relevant to doc (unexported functions, for instance).
f.ScopeItems = nil
for name, obj := range f.AST.Scope.Objects {
if num, ok := obj.Data.(int); ok {
f.ScopeItems = append(f.ScopeItems, scopeItem{name, num})
}
}
// Sort for deterministic encoding.
sort.Slice(f.ScopeItems, func(i, j int) bool {
return f.ScopeItems[i].Name < f.ScopeItems[j].Name
})
f.AST.Scope.Objects = nil
}
// fixupObjects re-establishes the original Object and Decl relationships of the
// File.
//
// f is the result of Encode, which uses removeCycles (see above) to modify
// ast.Objects so that they are uniquely identified by their Data field, and
// refer to their Decl via a number in the Decl field. fixupObjects uses those
// values to reconstruct the same set of relationships.
func fixupObjects(f *File) {
// First pass: reconstruct the numbers of every Decl and Ident.
var decls []ast.Node
ast.Inspect(f.AST, func(n ast.Node) bool {
if _, ok := n.(*ast.Ident); ok || isRelevantDecl(n) {
decls = append(decls, n)
}
return true
})
// Second pass: replace the numbers in Ident.Objs with the right Nodes.
var objs []*ast.Object
ast.Inspect(f.AST, func(n ast.Node) bool {
id, ok := n.(*ast.Ident)
if !ok || id.Obj == nil {
return true
}
obj := id.Obj
if obj.Data == nil {
// We've seen this object already.
// Possible if fixing up without serializing/deserializing, because
// Objects are still shared in that case.
// Do nothing.
return true
}
num := obj.Data.(int)
switch {
case num < len(objs):
// We've seen this Object before.
id.Obj = objs[num]
case num == len(objs):
// A new object; fix it up and remember it.
if obj.Decl != nil {
num := obj.Decl.(int)
if num >= 0 {
obj.Decl = decls[num]
}
}
objs = append(objs, obj)
case num > len(objs):
panic("n > len(objs); shouldn't happen")
}
return true
})
// Fix up unresolved identifiers.
f.AST.Unresolved = make([]*ast.Ident, len(f.UnresolvedNums))
for i, num := range f.UnresolvedNums {
f.AST.Unresolved[i] = decls[num].(*ast.Ident)
}
f.UnresolvedNums = nil
// Fix up file scope objects.
f.AST.Scope.Objects = map[string]*ast.Object{}
for _, item := range f.ScopeItems {
f.AST.Scope.Objects[item.Name] = objs[item.Num]
}
f.ScopeItems = nil
}
// isRelevantDecl reports whether n is a Node for a declaration relevant to
// documentation.
func isRelevantDecl(n interface{}) bool {
switch n.(type) {
case *ast.FuncDecl, *ast.GenDecl, *ast.ValueSpec, *ast.TypeSpec, *ast.ImportSpec:
return true
default:
return false
}
}
func (p *Package) fastEncode() (_ []byte, err error) {
defer derrors.Wrap(&err, "godoc.Package.FastEncode()")
var buf bytes.Buffer
io.WriteString(&buf, fastEncodingType)
enc := codec.NewEncoder()
fsb, err := fsetToBytes(p.Fset)
if err != nil {
return nil, err
}
if err := enc.Encode(fsb); err != nil {
return nil, err
}
if err := enc.Encode(&p.encPackage); err != nil {
return nil, err
}
buf.Write(enc.Bytes())
return buf.Bytes(), nil
}
func fastDecodePackage(data []byte) (_ *Package, err error) {
defer derrors.Wrap(&err, "FastDecodePackage()")
dec := codec.NewDecoder(data)
x, err := dec.Decode()
if err != nil {
return nil, err
}
fsetBytes, ok := x.([]byte)
if !ok {
return nil, fmt.Errorf("first decoded value is %T, wanted []byte", fsetBytes)
}
fset, err := fsetFromBytes(fsetBytes)
if err != nil {
return nil, err
}
x, err = dec.Decode()
if err != nil {
return nil, err
}
ep, ok := x.(*encPackage)
if !ok {
return nil, fmt.Errorf("second decoded value is %T, wanted *encPackage", ep)
}
return &Package{
Fset: fset,
encPackage: *ep,
}, nil
}
// token.FileSet uses some unexported types in its encoding, so we can't use our
// own codec from it. Instead we use gob and encode the resulting bytes.
func fsetToBytes(fset *token.FileSet) ([]byte, error) {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
if err := fset.Write(enc.Encode); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func fsetFromBytes(data []byte) (*token.FileSet, error) {
dec := gob.NewDecoder(bytes.NewReader(data))
fset := token.NewFileSet()
if err := fset.Read(dec.Decode); err != nil {
return nil, err
}
return fset, nil
}
//go:generate go run gen_ast.go
// Used by the gen program to generate encodings for unexported types.
var TypesToGenerate = []interface{}{&encPackage{}}