internal/godoc: fix AST encoding

I was mistaken about documentation rendering not needing the Decl
field of ast.Objects: it does.

That means we have to be cleverer about encoding the AST, preserving
not just object identity but the Decl fields as well.

Also added a test to verify that the generated doc is the same.

Change-Id: Ia8af831437261d081a412423160c7ce0436d2c5e
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/258637
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/godoc/encode.go b/internal/godoc/encode.go
index 004221d..7894003 100644
--- a/internal/godoc/encode.go
+++ b/internal/godoc/encode.go
@@ -7,6 +7,7 @@
 import (
 	"bytes"
 	"encoding/gob"
+	"errors"
 	"fmt"
 	"go/ast"
 	"go/token"
@@ -78,6 +79,10 @@
 func (p *Package) Encode() (_ []byte, err error) {
 	defer derrors.Wrap(&err, "godoc.Package.Encode()")
 
+	if p.renderCalled {
+		return nil, errors.New("can't Encode after Render")
+	}
+
 	for _, f := range p.Files {
 		removeCycles(f.AST)
 	}
@@ -125,59 +130,132 @@
 //
 // removeCycles removes all Scopes, since doc generation doesn't use them. Doc
 // generation does use Objects, and it needs object identity to be preserved
-// (see internal/fetch/internal/doc/example.go). But it doesn't need the Decl,
-// Data or Type fields of ast.Object, which are responsible for cycles.
+// (see internal/doc/example.go). It also needs the Object.Decl field, to create
+// anchor links (see dochtml/internal/render/idents.go). The Object.Decl field
+// is responsible for cycles. Doc generation It doesn't need the Data or Type
+// fields of Object.
 //
-// If we just nulled out those three fields, there would be no cycles, but we
-// wouldn't preserve Object identity when we decoded. For example, if ast.Idents
-// A and B both pointed to the same Object, gob would write them as two separate
-// objects, and decoding would preserve that. (See TestObjectIdentity for
-// a small example of this sort of sharing.)
+// We need to break the cycles, and preserve Object identity when decoding. For
+// an example of the latter, if ast.Idents A and B both pointed to the same
+// Object, gob would write them as two separate objects, and decoding would
+// preserve that. (See TestObjectIdentity for a small example of this sort of
+// sharing.)
 //
-// So after nulling out those fields, we place a unique integer into the Decl
-// field if one isn't there already. (Decl would never normally hold an int.)
-// That serves to give a unique label to each object, which decoding can use
-// to reconstruct the original set of relationships.
+// We solve both problems by assigning numbers to Decls and Objects. We first
+// walk through the AST to assign the numbers, then walk it again to put the
+// numbers into Ident.Objs. We take advantage of the fact that the Data and Decl
+// fields are of type interface{}, storing the object number into Data and the
+// Decl number into Decl.
 func removeCycles(f *ast.File) {
-	next := 0
+	f.Scope.Objects = nil // doc doesn't use scopes
+
+	// First pass: assign every Decl and Spec a number.
+	// Since these aren't shared and Inspect is deterministic,
+	// this walk will produce the same sequence of Decls after encoding/decoding.
+	// Also assign a unique number to each Object we find in an Ident.
+	// Objects may be shared; traversing the decoded AST would not
+	// produce the same sequence. So we store their numbers separately.
+	declNums := map[interface{}]int{}
+	objNums := map[*ast.Object]int{}
 	ast.Inspect(f, func(n ast.Node) bool {
-		switch n := n.(type) {
-		case *ast.File:
-			n.Scope.Objects = nil // doc doesn't use scopes
-		case *ast.Ident:
-			if n.Obj != nil {
-				if _, ok := n.Obj.Decl.(int); !ok {
-					n.Obj.Data = nil
-					n.Obj.Type = nil
-					n.Obj.Decl = next
-					next++
-				}
+		if isRelevantDecl(n) {
+			if _, ok := declNums[n]; ok {
+				panic(fmt.Sprintf("duplicate decl %+v", n))
 			}
+			declNums[n] = len(declNums)
+		} else if id, ok := n.(*ast.Ident); ok && id.Obj != nil {
+			if _, ok := objNums[id.Obj]; !ok {
+				objNums[id.Obj] = len(objNums)
+			}
+		}
+		return true
+	})
+
+	// Second pass: put the numbers into Ident.Objs.
+	// The Decl field gets a number from the declNums map, or nil
+	// if it's not a relevant Decl.
+	// The Data field gets a number from the objNums map. (This destroys
+	// whatever might be in the Data field, but doc generation doesn't care.)
+	ast.Inspect(f, func(n ast.Node) bool {
+		id, ok := n.(*ast.Ident)
+		if !ok || id.Obj == nil {
+			return true
+		}
+		if _, ok := id.Obj.Decl.(int); ok { // seen this object already
+			return true
+		}
+		id.Obj.Type = nil // Not needed for doc gen.
+		id.Obj.Data, ok = objNums[id.Obj]
+		if !ok {
+			panic(fmt.Sprintf("no number for Object %v", id.Obj))
+		}
+		d, ok := declNums[id.Obj.Decl]
+		if !ok && isRelevantDecl(id.Obj.Decl) {
+			panic(fmt.Sprintf("no number for Decl %v", id.Obj.Decl))
+		}
+		id.Obj.Decl = d
+		return true
+	})
+}
+
+// fixupObjects re-establishes the original Object and Decl relationships of the
+// ast.File f.
+//
+// f is the result of EncodeASTFiles, which uses removeCycles (see above) to
+// modify ast.Objects so that they are uniquely identified by their Data field,
+// and refer to their Decl via a number in the Decl field. fixupObjects uses
+// those values to reconstruct the same set of relationships.
+func fixupObjects(f *ast.File) {
+	// First pass: reconstruct the numbers of every Decl.
+	var decls []ast.Node
+	ast.Inspect(f, func(n ast.Node) bool {
+		if isRelevantDecl(n) {
+			decls = append(decls, n)
+		}
+		return true
+	})
+
+	// Second pass: replace the numbers in Ident.Objs with the right Nodes.
+	var objs []*ast.Object
+	ast.Inspect(f, func(n ast.Node) bool {
+		id, ok := n.(*ast.Ident)
+		if !ok || id.Obj == nil {
+			return true
+		}
+		obj := id.Obj
+		if obj.Data == nil {
+			// We've seen this object already.
+			// Possible if fixing up without serializing/deserializing, because
+			// Objects are still shared in that case.
+			// Do nothing.
+			return true
+		}
+		num := obj.Data.(int)
+		switch {
+		case num < len(objs):
+			// We've seen this Object before.
+			id.Obj = objs[num]
+		case num == len(objs):
+			// A new object; fix it up and remember it.
+			obj.Data = nil
+			if obj.Decl != nil {
+				obj.Decl = decls[obj.Decl.(int)]
+			}
+			objs = append(objs, obj)
+		case num > len(objs):
+			panic("n > len(objs); shouldn't happen")
 		}
 		return true
 	})
 }
 
-// fixupObjects re-establishes the original object relationships of the ast.File f.
-//
-// f is the result of EncodeASTFiles, which uses removeCycles (see above) to
-// modify ast.Objects so that they are uniquely identified by their Decl field.
-// fixupObjects uses that value to reconstruct the same set of relationships.
-func fixupObjects(f *ast.File) {
-	objs := map[int]*ast.Object{}
-	ast.Inspect(f, func(n ast.Node) bool {
-		if id, ok := n.(*ast.Ident); ok {
-			if id.Obj != nil {
-				n := id.Obj.Decl.(int)
-				if obj := objs[n]; obj != nil {
-					// If we've seen object n before, then id.Obj should be the same object.
-					id.Obj = obj
-				} else {
-					// If we haven't seen object n before, then remember it.
-					objs[n] = id.Obj
-				}
-			}
-		}
+// isRelevantDecl reports whether n is a Node for a declaration relevant to
+// documentation.
+func isRelevantDecl(n interface{}) bool {
+	switch n.(type) {
+	case *ast.FuncDecl, *ast.GenDecl, *ast.ValueSpec, *ast.TypeSpec, *ast.ImportSpec:
 		return true
-	})
+	default:
+		return false
+	}
 }
diff --git a/internal/godoc/godoc.go b/internal/godoc/godoc.go
index a262b5a..fac3e4d 100644
--- a/internal/godoc/godoc.go
+++ b/internal/godoc/godoc.go
@@ -21,6 +21,7 @@
 type Package struct {
 	Fset *token.FileSet
 	gobPackage
+	renderCalled bool
 }
 
 type gobPackage struct { // fields that can be directly gob-encoded
diff --git a/internal/godoc/render.go b/internal/godoc/render.go
index 1199efe..b223d30 100644
--- a/internal/godoc/render.go
+++ b/internal/godoc/render.go
@@ -41,6 +41,8 @@
 	// This is mostly copied from internal/fetch/fetch.go.
 	defer derrors.Wrap(&err, "godoc.Package.Render(%q, %q, %q, %q, %q)", modInfo.ModulePath, modInfo.ResolvedVersion, innerPath, goos, goarch)
 
+	p.renderCalled = true
+
 	importPath := path.Join(modInfo.ModulePath, innerPath)
 	if modInfo.ModulePath == stdlib.ModulePath {
 		importPath = innerPath
diff --git a/internal/godoc/render_test.go b/internal/godoc/render_test.go
index d0c355f..fd59b4c 100644
--- a/internal/godoc/render_test.go
+++ b/internal/godoc/render_test.go
@@ -63,4 +63,20 @@
 		t.Fatal(err)
 	}
 	check(p)
+
+	// Verify that encoding then decoding generates the same doc.
+	// We can't re-use p to encode because it's been rendered.
+	p, err = packageForDir(filepath.Join("testdata", "p"), false)
+	if err != nil {
+		t.Fatal(err)
+	}
+	bytes, err := p.Encode()
+	if err != nil {
+		t.Fatal(err)
+	}
+	p2, err := DecodePackage(bytes)
+	if err != nil {
+		t.Fatal(err)
+	}
+	check(p2)
 }