internal/godoc: fix AST encoding
I was mistaken about documentation rendering not needing the Decl
field of ast.Objects: it does.
That means we have to be cleverer about encoding the AST, preserving
not just object identity but the Decl fields as well.
Also added a test to verify that the generated doc is the same.
Change-Id: Ia8af831437261d081a412423160c7ce0436d2c5e
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/258637
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/godoc/encode.go b/internal/godoc/encode.go
index 004221d..7894003 100644
--- a/internal/godoc/encode.go
+++ b/internal/godoc/encode.go
@@ -7,6 +7,7 @@
import (
"bytes"
"encoding/gob"
+ "errors"
"fmt"
"go/ast"
"go/token"
@@ -78,6 +79,10 @@
func (p *Package) Encode() (_ []byte, err error) {
defer derrors.Wrap(&err, "godoc.Package.Encode()")
+ if p.renderCalled {
+ return nil, errors.New("can't Encode after Render")
+ }
+
for _, f := range p.Files {
removeCycles(f.AST)
}
@@ -125,59 +130,132 @@
//
// removeCycles removes all Scopes, since doc generation doesn't use them. Doc
// generation does use Objects, and it needs object identity to be preserved
-// (see internal/fetch/internal/doc/example.go). But it doesn't need the Decl,
-// Data or Type fields of ast.Object, which are responsible for cycles.
+// (see internal/doc/example.go). It also needs the Object.Decl field, to create
+// anchor links (see dochtml/internal/render/idents.go). The Object.Decl field
+// is responsible for cycles. Doc generation It doesn't need the Data or Type
+// fields of Object.
//
-// If we just nulled out those three fields, there would be no cycles, but we
-// wouldn't preserve Object identity when we decoded. For example, if ast.Idents
-// A and B both pointed to the same Object, gob would write them as two separate
-// objects, and decoding would preserve that. (See TestObjectIdentity for
-// a small example of this sort of sharing.)
+// We need to break the cycles, and preserve Object identity when decoding. For
+// an example of the latter, if ast.Idents A and B both pointed to the same
+// Object, gob would write them as two separate objects, and decoding would
+// preserve that. (See TestObjectIdentity for a small example of this sort of
+// sharing.)
//
-// So after nulling out those fields, we place a unique integer into the Decl
-// field if one isn't there already. (Decl would never normally hold an int.)
-// That serves to give a unique label to each object, which decoding can use
-// to reconstruct the original set of relationships.
+// We solve both problems by assigning numbers to Decls and Objects. We first
+// walk through the AST to assign the numbers, then walk it again to put the
+// numbers into Ident.Objs. We take advantage of the fact that the Data and Decl
+// fields are of type interface{}, storing the object number into Data and the
+// Decl number into Decl.
func removeCycles(f *ast.File) {
- next := 0
+ f.Scope.Objects = nil // doc doesn't use scopes
+
+ // First pass: assign every Decl and Spec a number.
+ // Since these aren't shared and Inspect is deterministic,
+ // this walk will produce the same sequence of Decls after encoding/decoding.
+ // Also assign a unique number to each Object we find in an Ident.
+ // Objects may be shared; traversing the decoded AST would not
+ // produce the same sequence. So we store their numbers separately.
+ declNums := map[interface{}]int{}
+ objNums := map[*ast.Object]int{}
ast.Inspect(f, func(n ast.Node) bool {
- switch n := n.(type) {
- case *ast.File:
- n.Scope.Objects = nil // doc doesn't use scopes
- case *ast.Ident:
- if n.Obj != nil {
- if _, ok := n.Obj.Decl.(int); !ok {
- n.Obj.Data = nil
- n.Obj.Type = nil
- n.Obj.Decl = next
- next++
- }
+ if isRelevantDecl(n) {
+ if _, ok := declNums[n]; ok {
+ panic(fmt.Sprintf("duplicate decl %+v", n))
}
+ declNums[n] = len(declNums)
+ } else if id, ok := n.(*ast.Ident); ok && id.Obj != nil {
+ if _, ok := objNums[id.Obj]; !ok {
+ objNums[id.Obj] = len(objNums)
+ }
+ }
+ return true
+ })
+
+ // Second pass: put the numbers into Ident.Objs.
+ // The Decl field gets a number from the declNums map, or nil
+ // if it's not a relevant Decl.
+ // The Data field gets a number from the objNums map. (This destroys
+ // whatever might be in the Data field, but doc generation doesn't care.)
+ ast.Inspect(f, func(n ast.Node) bool {
+ id, ok := n.(*ast.Ident)
+ if !ok || id.Obj == nil {
+ return true
+ }
+ if _, ok := id.Obj.Decl.(int); ok { // seen this object already
+ return true
+ }
+ id.Obj.Type = nil // Not needed for doc gen.
+ id.Obj.Data, ok = objNums[id.Obj]
+ if !ok {
+ panic(fmt.Sprintf("no number for Object %v", id.Obj))
+ }
+ d, ok := declNums[id.Obj.Decl]
+ if !ok && isRelevantDecl(id.Obj.Decl) {
+ panic(fmt.Sprintf("no number for Decl %v", id.Obj.Decl))
+ }
+ id.Obj.Decl = d
+ return true
+ })
+}
+
+// fixupObjects re-establishes the original Object and Decl relationships of the
+// ast.File f.
+//
+// f is the result of EncodeASTFiles, which uses removeCycles (see above) to
+// modify ast.Objects so that they are uniquely identified by their Data field,
+// and refer to their Decl via a number in the Decl field. fixupObjects uses
+// those values to reconstruct the same set of relationships.
+func fixupObjects(f *ast.File) {
+ // First pass: reconstruct the numbers of every Decl.
+ var decls []ast.Node
+ ast.Inspect(f, func(n ast.Node) bool {
+ if isRelevantDecl(n) {
+ decls = append(decls, n)
+ }
+ return true
+ })
+
+ // Second pass: replace the numbers in Ident.Objs with the right Nodes.
+ var objs []*ast.Object
+ ast.Inspect(f, func(n ast.Node) bool {
+ id, ok := n.(*ast.Ident)
+ if !ok || id.Obj == nil {
+ return true
+ }
+ obj := id.Obj
+ if obj.Data == nil {
+ // We've seen this object already.
+ // Possible if fixing up without serializing/deserializing, because
+ // Objects are still shared in that case.
+ // Do nothing.
+ return true
+ }
+ num := obj.Data.(int)
+ switch {
+ case num < len(objs):
+ // We've seen this Object before.
+ id.Obj = objs[num]
+ case num == len(objs):
+ // A new object; fix it up and remember it.
+ obj.Data = nil
+ if obj.Decl != nil {
+ obj.Decl = decls[obj.Decl.(int)]
+ }
+ objs = append(objs, obj)
+ case num > len(objs):
+ panic("n > len(objs); shouldn't happen")
}
return true
})
}
-// fixupObjects re-establishes the original object relationships of the ast.File f.
-//
-// f is the result of EncodeASTFiles, which uses removeCycles (see above) to
-// modify ast.Objects so that they are uniquely identified by their Decl field.
-// fixupObjects uses that value to reconstruct the same set of relationships.
-func fixupObjects(f *ast.File) {
- objs := map[int]*ast.Object{}
- ast.Inspect(f, func(n ast.Node) bool {
- if id, ok := n.(*ast.Ident); ok {
- if id.Obj != nil {
- n := id.Obj.Decl.(int)
- if obj := objs[n]; obj != nil {
- // If we've seen object n before, then id.Obj should be the same object.
- id.Obj = obj
- } else {
- // If we haven't seen object n before, then remember it.
- objs[n] = id.Obj
- }
- }
- }
+// isRelevantDecl reports whether n is a Node for a declaration relevant to
+// documentation.
+func isRelevantDecl(n interface{}) bool {
+ switch n.(type) {
+ case *ast.FuncDecl, *ast.GenDecl, *ast.ValueSpec, *ast.TypeSpec, *ast.ImportSpec:
return true
- })
+ default:
+ return false
+ }
}
diff --git a/internal/godoc/godoc.go b/internal/godoc/godoc.go
index a262b5a..fac3e4d 100644
--- a/internal/godoc/godoc.go
+++ b/internal/godoc/godoc.go
@@ -21,6 +21,7 @@
type Package struct {
Fset *token.FileSet
gobPackage
+ renderCalled bool
}
type gobPackage struct { // fields that can be directly gob-encoded
diff --git a/internal/godoc/render.go b/internal/godoc/render.go
index 1199efe..b223d30 100644
--- a/internal/godoc/render.go
+++ b/internal/godoc/render.go
@@ -41,6 +41,8 @@
// This is mostly copied from internal/fetch/fetch.go.
defer derrors.Wrap(&err, "godoc.Package.Render(%q, %q, %q, %q, %q)", modInfo.ModulePath, modInfo.ResolvedVersion, innerPath, goos, goarch)
+ p.renderCalled = true
+
importPath := path.Join(modInfo.ModulePath, innerPath)
if modInfo.ModulePath == stdlib.ModulePath {
importPath = innerPath
diff --git a/internal/godoc/render_test.go b/internal/godoc/render_test.go
index d0c355f..fd59b4c 100644
--- a/internal/godoc/render_test.go
+++ b/internal/godoc/render_test.go
@@ -63,4 +63,20 @@
t.Fatal(err)
}
check(p)
+
+ // Verify that encoding then decoding generates the same doc.
+ // We can't re-use p to encode because it's been rendered.
+ p, err = packageForDir(filepath.Join("testdata", "p"), false)
+ if err != nil {
+ t.Fatal(err)
+ }
+ bytes, err := p.Encode()
+ if err != nil {
+ t.Fatal(err)
+ }
+ p2, err := DecodePackage(bytes)
+ if err != nil {
+ t.Fatal(err)
+ }
+ check(p2)
}