internal/godoc: Package and File types

Add Package and File types to the godoc package, so that all the
information for doc rendering can be in one place.

Move the existing contents of godoc.go to parse.go (pure code motion).

This is the first, partial step towards refactoring doc generation.

Change-Id: Id9891877f53ff6e1545490e65f79c2bc0119d471
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/258418
Reviewed-by: Julie Qiu <julie@golang.org>
Trust: Jonathan Amsterdam <jba@google.com>
diff --git a/internal/fetch/load.go b/internal/fetch/load.go
index 7966c50..eb9ce91 100644
--- a/internal/fetch/load.go
+++ b/internal/fetch/load.go
@@ -112,14 +112,17 @@
 	if err != nil {
 		return nil, err
 	}
+	docPkg := godoc.NewPackage(fset)
 	var allGoFiles []*ast.File
 	for _, pf := range goFiles {
 		if experiment.IsActive(ctx, internal.ExperimentRemoveUnusedAST) {
+			removeNodes := true
 			// Don't strip the seemingly unexported functions from the builtin package;
 			// they are actually Go builtins like make, new, etc.
 			if !(modulePath == stdlib.ModulePath && innerPath == "builtin") {
-				godoc.RemoveUnusedASTNodes(pf)
+				removeNodes = false
 			}
+			docPkg.AddFile(pf, removeNodes)
 		}
 		allGoFiles = append(allGoFiles, pf)
 	}
@@ -133,7 +136,7 @@
 	}
 	var src []byte
 	if experiment.IsActive(ctx, internal.ExperimentInsertPackageSource) {
-		src, err = godoc.EncodeASTFiles(fset, allGoFiles)
+		src, err = docPkg.Encode()
 		if err != nil {
 			return nil, err
 		}
diff --git a/internal/godoc/encode.go b/internal/godoc/encode.go
index 05d55c8..4efdd74 100644
--- a/internal/godoc/encode.go
+++ b/internal/godoc/encode.go
@@ -11,6 +11,8 @@
 	"go/ast"
 	"go/token"
 	"io"
+
+	"golang.org/x/pkgsite/internal/derrors"
 )
 
 // encodingType identifies the encoding being used, in case
@@ -68,50 +70,44 @@
 	}
 }
 
-// Encode fset and files into a byte slice.
-func EncodeASTFiles(fset *token.FileSet, files []*ast.File) ([]byte, error) {
+// Encode encodes a Package into a byte slice.
+func (p *Package) Encode() (_ []byte, err error) {
+	defer derrors.Wrap(&err, "godoc.Package.Encode()")
+
 	var buf bytes.Buffer
 	io.WriteString(&buf, encodingType)
 	enc := gob.NewEncoder(&buf)
 	// Encode the fset using the Write method it provides.
-	if err := fset.Write(enc.Encode); err != nil {
+	if err := p.Fset.Write(enc.Encode); err != nil {
 		return nil, err
 	}
-	// Encode each file.
-	for _, f := range files {
-		removeCycles(f)
-		if err := enc.Encode(f); err != nil {
-			return nil, err
-		}
+	if err := enc.Encode(p.Files); err != nil {
+		return nil, err
 	}
 	return buf.Bytes(), nil
 }
 
-// Decode a byte slice encoded with EncodeASTFiles into a FileSet and a list of files.
-func DecodeASTFiles(data []byte) (*token.FileSet, []*ast.File, error) {
+// DecodPackage decodes a byte slice encoded with Package.Encode into a Package.
+func DecodePackage(data []byte) (_ *Package, err error) {
+	defer derrors.Wrap(&err, "DecodePackage()")
+
 	le := len(encodingType)
 	if len(data) < le || string(data[:le]) != encodingType {
-		return nil, nil, fmt.Errorf("want initial bytes to be %q but they aren't", encodingType)
+		return nil, fmt.Errorf("want initial bytes to be %q but they aren't", encodingType)
 	}
 	dec := gob.NewDecoder(bytes.NewReader(data[le:]))
-	fset := token.NewFileSet()
-	if err := fset.Read(dec.Decode); err != nil {
-		return nil, nil, err
+	p := &Package{}
+	p.Fset = token.NewFileSet()
+	if err := p.Fset.Read(dec.Decode); err != nil {
+		return nil, err
 	}
-	var files []*ast.File
-	for {
-		var f *ast.File
-		err := dec.Decode(&f)
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			return nil, nil, err
-		}
-		fixupObjects(f)
-		files = append(files, f)
+	if err := dec.Decode(&p.Files); err != nil {
+		return nil, err
 	}
-	return fset, files, nil
+	for _, f := range p.Files {
+		fixupObjects(f.AST)
+	}
+	return p, nil
 }
 
 // removeCycles removes cycles from f. There are two sources of cycles
diff --git a/internal/godoc/encode_test.go b/internal/godoc/encode_test.go
index 8154a3c..1433cd9 100644
--- a/internal/godoc/encode_test.go
+++ b/internal/godoc/encode_test.go
@@ -6,6 +6,7 @@
 
 import (
 	"bytes"
+	"fmt"
 	"go/ast"
 	"go/parser"
 	"go/token"
@@ -14,27 +15,26 @@
 
 func TestEncodeDecodeASTFiles(t *testing.T) {
 	// Verify that we can encode and decode the Go files in this directory.
-	files, fset, err := astFilesForDir(".")
+	p, err := packageForDir(".", true)
 	if err != nil {
 		t.Fatal(err)
 	}
 
-	data, err := EncodeASTFiles(fset, files)
+	data, err := p.Encode()
 	if err != nil {
 		t.Fatal(err)
 	}
-	gotFset, gotFiles, err := DecodeASTFiles(data)
+	p2, err := DecodePackage(data)
 	if err != nil {
 		t.Fatal(err)
 	}
-	data2, err := EncodeASTFiles(gotFset, gotFiles)
+	data2, err := p2.Encode()
 	if err != nil {
 		t.Fatal(err)
 	}
 	if !bytes.Equal(data, data2) {
 		t.Fatal("datas unequal")
 	}
-
 }
 
 func TestObjectIdentity(t *testing.T) {
@@ -63,64 +63,58 @@
 	}
 	compareObjs(f)
 
-	data, err := EncodeASTFiles(fset, []*ast.File{f})
+	p := NewPackage(fset)
+	p.AddFile(f, false)
+	data, err := p.Encode()
 	if err != nil {
 		t.Fatal(err)
 	}
-	_, files, err := DecodeASTFiles(data)
+	p, err = DecodePackage(data)
 	if err != nil {
 		t.Fatal(err)
 	}
-	compareObjs(files[0])
+	compareObjs(p.Files[0].AST)
+}
+
+func packageForDir(dir string, removeNodes bool) (*Package, error) {
+	fset := token.NewFileSet()
+	pkgs, err := parser.ParseDir(fset, dir, nil, parser.ParseComments)
+	if err != nil {
+		return nil, err
+	}
+	p := NewPackage(fset)
+	for _, pkg := range pkgs {
+		for _, f := range pkg.Files {
+			p.AddFile(f, removeNodes)
+		}
+	}
+	return p, nil
 }
 
 // Compare the time to decode AST files with and without
 // removing parts of the AST not relevant to documentation.
 //
 // Run on a cloudtop 9/29/2020:
-// - data size is 3x smaller
-// - decode time is 3.5x faster
+// - data size is 3.5x smaller
+// - decode time is 4.5x faster
 func BenchmarkRemovingAST(b *testing.B) {
-	files, fset, err := astFilesForDir(".")
-	if err != nil {
-		b.Fatal(err)
-	}
-
-	run := func(name string) {
-		data, err := EncodeASTFiles(fset, files)
-		if err != nil {
-			b.Fatal(err)
-		}
-		b.Logf("len(data) = %d", len(data))
-		b.Run(name, func(b *testing.B) {
+	for _, removeNodes := range []bool{false, true} {
+		b.Run(fmt.Sprintf("removeNodes=%t", removeNodes), func(b *testing.B) {
+			p, err := packageForDir(".", removeNodes)
+			if err != nil {
+				b.Fatal(err)
+			}
+			data, err := p.Encode()
+			if err != nil {
+				b.Fatal(err)
+			}
+			b.Logf("len(data) = %d", len(data))
+			b.ResetTimer()
 			for i := 0; i < b.N; i++ {
-				_, _, err := DecodeASTFiles(data)
-				if err != nil {
+				if _, err := DecodePackage(data); err != nil {
 					b.Fatal(err)
 				}
 			}
 		})
 	}
-
-	run("not removed")
-
-	for _, f := range files {
-		RemoveUnusedASTNodes(f)
-	}
-	run("removed")
-}
-
-func astFilesForDir(dir string) ([]*ast.File, *token.FileSet, error) {
-	fset := token.NewFileSet()
-	pkgs, err := parser.ParseDir(fset, dir, nil, parser.ParseComments)
-	if err != nil {
-		return nil, nil, err
-	}
-	var files []*ast.File
-	for _, p := range pkgs {
-		for _, f := range p.Files {
-			files = append(files, f)
-		}
-	}
-	return files, fset, nil
 }
diff --git a/internal/godoc/godoc.go b/internal/godoc/godoc.go
index b3cb983..78c7ac6 100644
--- a/internal/godoc/godoc.go
+++ b/internal/godoc/godoc.go
@@ -2,84 +2,47 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package godoc returns a section of the dochtml, based on the template from
-// internal/fetch/dochtml.
-//
-// This package is intended to aid frontend development for the unit page,
-// and is a temporary solution for prototyping the new designs. It is not
-// intended for use in production.
+// Package godoc is for rendering Go documentation.
 package godoc
 
 import (
-	"fmt"
 	"go/ast"
-	"html"
-	"regexp"
-
-	"github.com/google/safehtml"
-	"github.com/google/safehtml/uncheckedconversions"
-	"golang.org/x/pkgsite/internal/derrors"
-	"golang.org/x/pkgsite/internal/fetch/dochtml"
+	"go/token"
 )
 
-// SectionType is a section of the docHTML.
-type SectionType uint32
-
-const (
-	// SidenavSection is the section of the docHTML starting at
-	// dochtml.IdentifierSidenav and ending at the last </nav> tag in the
-	// docHTML. This is inclusive of SideNavMobile.
-	SidenavSection SectionType = iota
-	// SidenavMobileSection is the section of the docHTML starting at
-	// dochtml.IdentifierMobileNavStart and ending at the last </nav> tag in the
-	// docHTML.
-	SidenavMobileSection
-	// BodySection is the section of the docHTML starting at
-	// dochtml.IdentifierBody and ending at the last </div> tag in the
-	// docHTML.
-	BodySection
-)
-
-// Parse return the section of docHTML specified by section. It is expected that
-// docHTML was generated using the template in internal/fetch/dochtml.
-func Parse(docHTML safehtml.HTML, section SectionType) (_ safehtml.HTML, err error) {
-	defer derrors.Wrap(&err, "Parse(docHTML, %q)", section)
-	switch section {
-	case SidenavSection:
-		return findHTML(docHTML, dochtml.IdentifierSidenavStart)
-	case SidenavMobileSection:
-		return findHTML(docHTML, dochtml.IdentifierSidenavMobileStart)
-	case BodySection:
-		return findHTML(docHTML, dochtml.IdentifierBodyStart)
-	default:
-		return safehtml.HTML{}, derrors.NotFound
-	}
+// A package contains everything needed to render Go documentation for a package.
+type Package struct {
+	Fset  *token.FileSet
+	Files []*File
 }
 
-func findHTML(docHTML safehtml.HTML, identifier string) (_ safehtml.HTML, err error) {
-	defer derrors.Wrap(&err, "findHTML(%q)", identifier)
-	var closeTag string
-	switch identifier {
-	case dochtml.IdentifierBodyStart:
-		closeTag = dochtml.IdentifierBodyEnd
-	default:
-		closeTag = dochtml.IdentifierSidenavEnd
-	}
-
-	// The regex is greedy, so it will capture the last matching closeTag in
-	// the docHTML.
-	//
-	// For the sidenav, this will capture both the mobile and main
-	// sidenav sections. For the body, this will capture all content up the
-	// last </div> tag in the HTML.
-	reg := fmt.Sprintf("(%s(.|\n)*%s)", identifier, closeTag)
-	b := regexp.MustCompile(reg).FindString(html.UnescapeString(docHTML.String()))
-	return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(string(b)), nil
+// A File contains everything needed about a source file to render documentation.
+type File struct {
+	Name string // full file pathname relative to zip content directory
+	AST  *ast.File
 }
 
-// RemoveUnusedASTNodes removes parts of the AST not needed for documentation.
+// NewPackage returns a new Package with the given fset.
+func NewPackage(fset *token.FileSet) *Package {
+	return &Package{Fset: fset}
+}
+
+// AddFile adds a file to the Package. After it returns, the contents of the ast.File
+// are unsuitable for anything other than the methods of this package.
+func (p *Package) AddFile(f *ast.File, removeNodes bool) {
+	if removeNodes {
+		removeUnusedASTNodes(f)
+	}
+	removeCycles(f)
+	p.Files = append(p.Files, &File{
+		Name: p.Fset.Position(f.Package).Filename,
+		AST:  f,
+	})
+}
+
+// removeUnusedASTNodes removes parts of the AST not needed for documentation.
 // It doesn't remove unexported consts, vars or types, although it probably could.
-func RemoveUnusedASTNodes(pf *ast.File) {
+func removeUnusedASTNodes(pf *ast.File) {
 	var decls []ast.Decl
 	for _, d := range pf.Decls {
 		if f, ok := d.(*ast.FuncDecl); ok {
diff --git a/internal/godoc/godoc_test.go b/internal/godoc/godoc_test.go
index 738eaa0..876f96d 100644
--- a/internal/godoc/godoc_test.go
+++ b/internal/godoc/godoc_test.go
@@ -12,44 +12,8 @@
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
-	"github.com/google/safehtml"
 )
 
-func TestParse(t *testing.T) {
-	for _, test := range []struct {
-		name, want string
-		section    SectionType
-	}{
-		{
-			name:    "sidenav",
-			section: SidenavSection,
-			want:    quoteSidenav,
-		},
-		{
-			name:    "sidenav-mobile",
-			section: SidenavMobileSection,
-			want:    quoteSidenavMobile,
-		},
-		{
-			name:    "body",
-			section: BodySection,
-			want:    quoteBody,
-		},
-	} {
-		{
-			t.Run(test.name, func(t *testing.T) {
-				got, err := Parse(safehtml.HTMLEscaped(quoteDocHTML), test.section)
-				if err != nil {
-					t.Fatal(err)
-				}
-				if diff := cmp.Diff(test.want, got.String()); diff != "" {
-					t.Errorf("mismatch (-want +got):\n%s", diff)
-				}
-			})
-		}
-	}
-}
-
 func TestRemoveUnusedASTNodes(t *testing.T) {
 	const file = `
 // Package-level comment.
@@ -132,7 +96,7 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	RemoveUnusedASTNodes(astFile)
+	removeUnusedASTNodes(astFile)
 	var buf bytes.Buffer
 	if err := format.Node(&buf, fset, astFile); err != nil {
 		t.Fatal(err)
diff --git a/internal/godoc/parse.go b/internal/godoc/parse.go
new file mode 100644
index 0000000..d2e4f04
--- /dev/null
+++ b/internal/godoc/parse.go
@@ -0,0 +1,75 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The functions in this file are intended to aid frontend development for the
+// unit page, and is a temporary solution for prototyping the new designs. It is
+// not intended for use in production.
+
+package godoc
+
+import (
+	"fmt"
+	"html"
+	"regexp"
+
+	"github.com/google/safehtml"
+	"github.com/google/safehtml/uncheckedconversions"
+	"golang.org/x/pkgsite/internal/derrors"
+	"golang.org/x/pkgsite/internal/fetch/dochtml"
+)
+
+// SectionType is a section of the docHTML.
+type SectionType uint32
+
+const (
+	// SidenavSection is the section of the docHTML starting at
+	// dochtml.IdentifierSidenav and ending at the last </nav> tag in the
+	// docHTML. This is inclusive of SideNavMobile.
+	SidenavSection SectionType = iota
+	// SidenavMobileSection is the section of the docHTML starting at
+	// dochtml.IdentifierMobileNavStart and ending at the last </nav> tag in the
+	// docHTML.
+	SidenavMobileSection
+	// BodySection is the section of the docHTML starting at
+	// dochtml.IdentifierBody and ending at the last </div> tag in the
+	// docHTML.
+	BodySection
+)
+
+// Parse return the section of docHTML specified by section. It is expected that
+// docHTML was generated using the template in internal/fetch/dochtml.
+func Parse(docHTML safehtml.HTML, section SectionType) (_ safehtml.HTML, err error) {
+	defer derrors.Wrap(&err, "Parse(docHTML, %q)", section)
+	switch section {
+	case SidenavSection:
+		return findHTML(docHTML, dochtml.IdentifierSidenavStart)
+	case SidenavMobileSection:
+		return findHTML(docHTML, dochtml.IdentifierSidenavMobileStart)
+	case BodySection:
+		return findHTML(docHTML, dochtml.IdentifierBodyStart)
+	default:
+		return safehtml.HTML{}, derrors.NotFound
+	}
+}
+
+func findHTML(docHTML safehtml.HTML, identifier string) (_ safehtml.HTML, err error) {
+	defer derrors.Wrap(&err, "findHTML(%q)", identifier)
+	var closeTag string
+	switch identifier {
+	case dochtml.IdentifierBodyStart:
+		closeTag = dochtml.IdentifierBodyEnd
+	default:
+		closeTag = dochtml.IdentifierSidenavEnd
+	}
+
+	// The regex is greedy, so it will capture the last matching closeTag in
+	// the docHTML.
+	//
+	// For the sidenav, this will capture both the mobile and main
+	// sidenav sections. For the body, this will capture all content up the
+	// last </div> tag in the HTML.
+	reg := fmt.Sprintf("(%s(.|\n)*%s)", identifier, closeTag)
+	b := regexp.MustCompile(reg).FindString(html.UnescapeString(docHTML.String()))
+	return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(string(b)), nil
+}
diff --git a/internal/godoc/parse_test.go b/internal/godoc/parse_test.go
new file mode 100644
index 0000000..8ddfab2
--- /dev/null
+++ b/internal/godoc/parse_test.go
@@ -0,0 +1,47 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package godoc
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/google/safehtml"
+)
+
+func TestParse(t *testing.T) {
+	for _, test := range []struct {
+		name, want string
+		section    SectionType
+	}{
+		{
+			name:    "sidenav",
+			section: SidenavSection,
+			want:    quoteSidenav,
+		},
+		{
+			name:    "sidenav-mobile",
+			section: SidenavMobileSection,
+			want:    quoteSidenavMobile,
+		},
+		{
+			name:    "body",
+			section: BodySection,
+			want:    quoteBody,
+		},
+	} {
+		{
+			t.Run(test.name, func(t *testing.T) {
+				got, err := Parse(safehtml.HTMLEscaped(quoteDocHTML), test.section)
+				if err != nil {
+					t.Fatal(err)
+				}
+				if diff := cmp.Diff(test.want, got.String()); diff != "" {
+					t.Errorf("mismatch (-want +got):\n%s", diff)
+				}
+			})
+		}
+	}
+}
diff --git a/internal/unit.go b/internal/unit.go
index c9b5f7f..d916530 100644
--- a/internal/unit.go
+++ b/internal/unit.go
@@ -66,7 +66,7 @@
 	GOARCH   string
 	Synopsis string
 	HTML     safehtml.HTML
-	Source   []byte // encoded ast.Files; see fetch.EncodeASTFiles
+	Source   []byte // encoded ast.Files; see godoc.Package.Encode
 }
 
 // Readme is a README at the specified filepath.