internal/fetch: read zip files only once

Now that we visit all the build contexts every time, pull out the
reading of zip contents so it happens once, instead of for each build
context.

Change-Id: I65ee0f8fb947dfe8ddcaf6de854577899bcdfad6
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/279796
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Jamal Carvalho <jamal@golang.org>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/fetch/load.go b/internal/fetch/load.go
index be5b7ad..638331e 100644
--- a/internal/fetch/load.go
+++ b/internal/fetch/load.go
@@ -66,8 +66,19 @@
 	ctx, span := trace.StartSpan(ctx, "fetch.loadPackage")
 	defer span.End()
 	var pkgs []*goPackage
+	// Make a map with all the zip file contents.
+	files := make(map[string][]byte)
+	for _, f := range zipGoFiles {
+		_, name := path.Split(f.Name)
+		b, err := readZipFile(f, MaxFileSize)
+		if err != nil {
+			return nil, err
+		}
+		files[name] = b
+	}
+
 	for _, env := range goEnvs {
-		pkg, err := loadPackageWithBuildContext(ctx, env.GOOS, env.GOARCH, zipGoFiles, innerPath, sourceInfo, modInfo)
+		pkg, err := loadPackageWithBuildContext(ctx, env.GOOS, env.GOARCH, files, innerPath, sourceInfo, modInfo)
 		if err != nil && !errors.Is(err, godoc.ErrTooLarge) && !errors.Is(err, derrors.NotFound) {
 			return nil, err
 		}
@@ -126,12 +137,12 @@
 // or all .go files have been excluded by constraints.
 // A *BadPackageError error is returned if the directory
 // contains .go files but do not make up a valid package.
-func loadPackageWithBuildContext(ctx context.Context, goos, goarch string, zipGoFiles []*zip.File, innerPath string, sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (_ *goPackage, err error) {
+func loadPackageWithBuildContext(ctx context.Context, goos, goarch string, files map[string][]byte, innerPath string, sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (_ *goPackage, err error) {
 	modulePath := modInfo.ModulePath
-	defer derrors.Wrap(&err, "loadPackageWithBuildContext(%q, %q, zipGoFiles, %q, %q, %+v)",
+	defer derrors.Wrap(&err, "loadPackageWithBuildContext(%q, %q, files, %q, %q, %+v)",
 		goos, goarch, innerPath, modulePath, sourceInfo)
 
-	packageName, goFiles, fset, err := loadFilesWithBuildContext(innerPath, goos, goarch, zipGoFiles)
+	packageName, goFiles, fset, err := loadFilesWithBuildContext(innerPath, goos, goarch, files)
 	if err != nil {
 		return nil, err
 	}
@@ -180,9 +191,9 @@
 // and goarch in the zip. It returns the package name as it occurs in the
 // source, a map of the ASTs of all the Go files, and the token.FileSet used for
 // parsing.
-func loadFilesWithBuildContext(innerPath, goos, goarch string, zipGoFiles []*zip.File) (pkgName string, fileMap map[string]*ast.File, _ *token.FileSet, _ error) {
+func loadFilesWithBuildContext(innerPath, goos, goarch string, allFiles map[string][]byte) (pkgName string, fileMap map[string]*ast.File, _ *token.FileSet, _ error) {
 	// Apply build constraints to get a map from matching file names to their contents.
-	files, err := matchingFiles(goos, goarch, zipGoFiles)
+	files, err := matchingFiles(goos, goarch, allFiles)
 	if err != nil {
 		return "", nil, nil, err
 	}
@@ -232,18 +243,8 @@
 
 // matchingFiles returns a map from file names to their contents, read from zipGoFiles.
 // It includes only those files that match the build context determined by goos and goarch.
-func matchingFiles(goos, goarch string, zipGoFiles []*zip.File) (files map[string][]byte, err error) {
+func matchingFiles(goos, goarch string, allFiles map[string][]byte) (matchedFiles map[string][]byte, err error) {
 	defer derrors.Wrap(&err, "matchingFiles(%q, %q, zipGoFiles)", goos, goarch)
-	// Populate the map with all the zip files.
-	files = make(map[string][]byte)
-	for _, f := range zipGoFiles {
-		_, name := path.Split(f.Name)
-		b, err := readZipFile(f, MaxFileSize)
-		if err != nil {
-			return nil, err
-		}
-		files[name] = b
-	}
 
 	// bctx is used to make decisions about which of the .go files are included
 	// by build constraints.
@@ -256,7 +257,7 @@
 
 		JoinPath: path.Join,
 		OpenFile: func(name string) (io.ReadCloser, error) {
-			return ioutil.NopCloser(bytes.NewReader(files[name])), nil
+			return ioutil.NopCloser(bytes.NewReader(allFiles[name])), nil
 		},
 
 		// If left nil, the default implementations of these read from disk,
@@ -270,17 +271,21 @@
 		ReadDir:       func(string) ([]os.FileInfo, error) { panic("internal error: unexpected call to ReadDir") },
 	}
 
-	for name := range files {
+	// Copy the input map so we don't modify it.
+	matchedFiles = map[string][]byte{}
+	for n, c := range allFiles {
+		matchedFiles[n] = c
+	}
+	for name := range allFiles {
 		match, err := bctx.MatchFile(".", name) // This will access the file we just added to files map above.
 		if err != nil {
 			return nil, &BadPackageError{Err: fmt.Errorf(`bctx.MatchFile(".", %q): %w`, name, err)}
 		}
 		if !match {
-			// Excluded by build context.
-			delete(files, name)
+			delete(matchedFiles, name)
 		}
 	}
-	return files, nil
+	return matchedFiles, nil
 }
 
 // readZipFile decompresses zip file f and returns its uncompressed contents.
diff --git a/internal/fetch/load_test.go b/internal/fetch/load_test.go
index 166a369..21bfbd3 100644
--- a/internal/fetch/load_test.go
+++ b/internal/fetch/load_test.go
@@ -5,8 +5,6 @@
 package fetch
 
 import (
-	"archive/zip"
-	"bytes"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
@@ -28,15 +26,15 @@
 		type Value int`
 
 	plainContents := map[string]string{
-		"README.md":      "THIS IS A README",
-		"LICENSE.md":     testhelper.MITLicense,
-		"plain/plain.go": plainGoBody,
+		"README.md":  "THIS IS A README",
+		"LICENSE.md": testhelper.MITLicense,
+		"plain.go":   plainGoBody,
 	}
 
 	jsContents := map[string]string{
 		"README.md":  "THIS IS A README",
 		"LICENSE.md": testhelper.MITLicense,
-		"js/js.go":   jsGoBody,
+		"js.go":      jsGoBody,
 	}
 	for _, test := range []struct {
 		name         string
@@ -80,15 +78,11 @@
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
-			data, err := testhelper.ZipContents(test.contents)
-			if err != nil {
-				t.Fatal(err)
+			files := map[string][]byte{}
+			for n, c := range test.contents {
+				files[n] = []byte(c)
 			}
-			r, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
-			if err != nil {
-				t.Fatal(err)
-			}
-			got, err := matchingFiles(test.goos, test.goarch, r.File)
+			got, err := matchingFiles(test.goos, test.goarch, files)
 			if err != nil {
 				t.Fatal(err)
 			}
diff --git a/internal/fetch/package.go b/internal/fetch/package.go
index 8acee3b..314bef3 100644
--- a/internal/fetch/package.go
+++ b/internal/fetch/package.go
@@ -108,7 +108,7 @@
 	// only after we're sure this phase passed without errors.
 	for _, f := range r.File {
 		if f.Mode().IsDir() {
-			// While "go mod download" will never put a directory in a zip, any can serve their
+			// While "go mod download" will never put a directory in a zip, anyone can serve their
 			// own zips. Example: go.felesatra.moe/binpack@v0.1.0.
 			// Directory entries are harmless, so we just ignore them.
 			continue