internal/fetch: use fs.FS

Rewrite fetch code to use fs.FS instead of zip.Reader.

For golang/go#47834

Change-Id: Iefdd16b367218690c4e5bea2a4688bea10a94be1
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/343952
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index b691e31..e7847eb 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -96,8 +96,9 @@
 	Info(ctx context.Context, path, version string) (*proxy.VersionInfo, error)
 	// Mod returns the contents of the module's go.mod file.
 	Mod(ctx context.Context, path, version string) ([]byte, error)
-	// Zip returns a reader for the module's zip file.
-	Zip(ctx context.Context, path, version string) (*zip.Reader, error)
+	// FS returns an FS for the module's contents. The FS should match the format
+	// of a module zip file.
+	FS(ctx context.Context, path, version string) (fs.FS, error)
 	// ZipSize returns the approximate size of the zip file in bytes.
 	// It is used only for load-shedding.
 	ZipSize(ctx context.Context, path, version string) (int64, error)
@@ -215,19 +216,23 @@
 	}
 	startFetchInfo(fi)
 
-	var zipReader *zip.Reader
+	var fsys fs.FS
 	if fr.ModulePath == stdlib.ModulePath {
-		var resolvedVersion string
-		zipReader, resolvedVersion, commitTime, err = stdlib.Zip(fr.RequestedVersion)
+		var (
+			resolvedVersion string
+			zr              *zip.Reader
+		)
+		zr, resolvedVersion, commitTime, err = stdlib.Zip(fr.RequestedVersion)
 		if err != nil {
 			return fi, err
 		}
+		fsys = zr
 		// If the requested version is a branch name like "master" or "main", we cannot
 		// determine the right resolved version until we start working with the repo.
 		fr.ResolvedVersion = resolvedVersion
 		fi.Version = resolvedVersion
 	} else {
-		zipReader, err = mg.Zip(ctx, fr.ModulePath, fr.ResolvedVersion)
+		fsys, err = mg.FS(ctx, fr.ModulePath, fr.ResolvedVersion)
 		if err != nil {
 			return fi, err
 		}
@@ -239,7 +244,7 @@
 	if fr.ModulePath == stdlib.ModulePath {
 		fr.HasGoMod = true
 	} else {
-		fr.HasGoMod = hasGoModFile(zipReader, fr.ModulePath, fr.ResolvedVersion)
+		fr.HasGoMod = hasGoModFile(fsys, fr.ModulePath, fr.ResolvedVersion)
 	}
 
 	// getGoModPath may return a non-empty goModPath even if the error is
@@ -255,7 +260,7 @@
 	// see if this is a fork. The intent is to avoid processing certain known
 	// large modules, not to find every fork.
 	if !fr.HasGoMod {
-		contentsDir, err := fs.Sub(zipReader, fr.ModulePath+"@"+fr.ResolvedVersion)
+		contentsDir, err := fs.Sub(fsys, fr.ModulePath+"@"+fr.ResolvedVersion)
 		if err != nil {
 			return fi, err
 		}
@@ -268,8 +273,7 @@
 		}
 	}
 
-	mod, pvs, err := processZipFile(ctx, fr.ModulePath, fr.ResolvedVersion, fr.RequestedVersion,
-		commitTime, zipReader, sourceClient)
+	mod, pvs, err := processModuleContents(ctx, fr.ModulePath, fr.ResolvedVersion, fr.RequestedVersion, commitTime, fsys, sourceClient)
 	if err != nil {
 		return fi, err
 	}
@@ -335,9 +339,9 @@
 	return goModPath, goModBytes, nil
 }
 
-// processZipFile extracts information from the module version zip.
-func processZipFile(ctx context.Context, modulePath, resolvedVersion, requestedVersion string,
-	commitTime time.Time, zipReader *zip.Reader, sourceClient *source.Client) (_ *internal.Module, _ []*internal.PackageVersionState, err error) {
+// processModuleContents extracts information from the module filesystem.
+func processModuleContents(ctx context.Context, modulePath, resolvedVersion, requestedVersion string,
+	commitTime time.Time, fsys fs.FS, sourceClient *source.Client) (_ *internal.Module, _ []*internal.PackageVersionState, err error) {
 	defer derrors.Wrap(&err, "processZipFile(%q, %q)", modulePath, resolvedVersion)
 
 	ctx, span := trace.StartSpan(ctx, "fetch.processZipFile")
@@ -351,21 +355,21 @@
 	if err != nil {
 		log.Infof(ctx, "error getting source info: %v", err)
 	}
-	readmes, err := extractReadmesFromZip(modulePath, resolvedVersion, zipReader)
+	readmes, err := extractReadmes(modulePath, resolvedVersion, fsys)
 	if err != nil {
-		return nil, nil, fmt.Errorf("extractReadmesFromZip(%q, %q, zipReader): %v", modulePath, resolvedVersion, err)
+		return nil, nil, err
 	}
 	logf := func(format string, args ...interface{}) {
 		log.Infof(ctx, format, args...)
 	}
-	d := licenses.NewDetector(modulePath, v, zipReader, logf)
+	d := licenses.NewDetectorFS(modulePath, v, fsys, logf)
 	allLicenses := d.AllLicenses()
-	packages, packageVersionStates, err := extractPackagesFromZip(ctx, modulePath, resolvedVersion, requestedVersion, zipReader, d, sourceInfo)
+	packages, packageVersionStates, err := extractPackages(ctx, modulePath, resolvedVersion, requestedVersion, fsys, d, sourceInfo)
 	if errors.Is(err, ErrModuleContainsNoPackages) || errors.Is(err, errMalformedZip) {
 		return nil, nil, fmt.Errorf("%v: %w", err.Error(), derrors.BadModule)
 	}
 	if err != nil {
-		return nil, nil, fmt.Errorf("extractPackagesFromZip(%q, %q, zipReader, %v): %v", modulePath, resolvedVersion, allLicenses, err)
+		return nil, nil, err
 	}
 	return &internal.Module{
 		ModuleInfo: internal.ModuleInfo{
@@ -381,8 +385,9 @@
 	}, packageVersionStates, nil
 }
 
-func hasGoModFile(zr *zip.Reader, m, v string) bool {
-	return zipFile(zr, path.Join(moduleVersionDir(m, v), "go.mod")) != nil
+func hasGoModFile(fsys fs.FS, m, v string) bool {
+	info, err := fs.Stat(fsys, path.Join(moduleVersionDir(m, v), "go.mod"))
+	return err == nil && !info.IsDir()
 }
 
 // processGoModFile populates mod with information extracted from the contents of the go.mod file.
@@ -421,17 +426,6 @@
 	return fmt.Sprintf("%s@%s", modulePath, version)
 }
 
-// zipFile returns the file in r whose name matches the given name, or nil
-// if there isn't one.
-func zipFile(r *zip.Reader, name string) *zip.File {
-	for _, f := range r.File {
-		if f.Name == name {
-			return f
-		}
-	}
-	return nil
-}
-
 type FetchInfo struct {
 	ModulePath string
 	Version    string
diff --git a/internal/fetch/fetchlocal.go b/internal/fetch/fetchlocal.go
index f22bb64..b7ffcbd 100644
--- a/internal/fetch/fetchlocal.go
+++ b/internal/fetch/fetchlocal.go
@@ -11,6 +11,7 @@
 	"errors"
 	"fmt"
 	"io"
+	"io/fs"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -86,8 +87,8 @@
 	return data, err
 }
 
-// Zip returns a reader for the module's zip file.
-func (g *directoryModuleGetter) Zip(ctx context.Context, path, version string) (*zip.Reader, error) {
+// FS returns an fs.FS for the module.
+func (g *directoryModuleGetter) FS(ctx context.Context, path, version string) (fs.FS, error) {
 	if err := g.checkPath(path); err != nil {
 		return nil, err
 	}
diff --git a/internal/fetch/fs.go b/internal/fetch/fs.go
index 7624119..6a78897 100644
--- a/internal/fetch/fs.go
+++ b/internal/fetch/fs.go
@@ -11,6 +11,7 @@
 	"encoding/json"
 	"errors"
 	"fmt"
+	"io/fs"
 	"io/ioutil"
 	"os"
 	"path/filepath"
@@ -55,9 +56,9 @@
 	return g.readFile(path, version, "mod")
 }
 
-// Zip returns a reader for the module's zip file.
-func (g *fsModuleGetter) Zip(ctx context.Context, path, version string) (_ *zip.Reader, err error) {
-	defer derrors.Wrap(&err, "fsModuleGetter.Zip(%q, %q)", path, version)
+// FS returns an FS for the module's zip file.
+func (g *fsModuleGetter) FS(ctx context.Context, path, version string) (_ fs.FS, err error) {
+	defer derrors.Wrap(&err, "fsModuleGetter.FS(%q, %q)", path, version)
 
 	data, err := g.readFile(path, version, "zip")
 	if err != nil {
diff --git a/internal/fetch/fs_test.go b/internal/fetch/fs_test.go
index c4e3988..060103f 100644
--- a/internal/fetch/fs_test.go
+++ b/internal/fetch/fs_test.go
@@ -72,31 +72,24 @@
 			t.Errorf("got %q, want %q", got, want)
 		}
 	})
-	t.Run("zip", func(t *testing.T) {
-		zr, err := g.Zip(ctx, modulePath, version)
+	t.Run("fs", func(t *testing.T) {
+		fsys, err := g.FS(ctx, modulePath, version)
 		if err != nil {
 			t.Fatal(err)
 		}
 		// Just check that the go.mod file is there and has the right contents.
-		goModPath := fmt.Sprintf("%s@%s/go.mod", modulePath, version)
-		for _, f := range zr.File {
-			if f.Name == goModPath {
-				f, err := f.Open()
-				if err != nil {
-					t.Fatal(err)
-				}
-				defer f.Close()
-				got, err := ioutil.ReadAll(f)
-				if err != nil {
-					t.Fatal(err)
-				}
-				want := []byte(goMod)
-				if !cmp.Equal(got, want) {
-					t.Errorf("got %q, want %q", got, want)
-				}
-				return
-			}
+		f, err := fsys.Open(fmt.Sprintf("%s@%s/go.mod", modulePath, version))
+		if err != nil {
+			t.Fatal(err)
 		}
-		t.Fatal("go.mod not found")
+		defer f.Close()
+		got, err := ioutil.ReadAll(f)
+		if err != nil {
+			t.Fatal(err)
+		}
+		want := []byte(goMod)
+		if !cmp.Equal(got, want) {
+			t.Errorf("got %q, want %q", got, want)
+		}
 	})
 }
diff --git a/internal/fetch/getters.go b/internal/fetch/getters.go
new file mode 100644
index 0000000..8b05250
--- /dev/null
+++ b/internal/fetch/getters.go
@@ -0,0 +1,42 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package fetch
+
+import (
+	"context"
+	"io/fs"
+
+	"golang.org/x/pkgsite/internal/proxy"
+)
+
+type proxyModuleGetter struct {
+	prox *proxy.Client
+}
+
+func NewProxyModuleGetter(p *proxy.Client) ModuleGetter {
+	return &proxyModuleGetter{p}
+}
+
+// Info returns basic information about the module.
+func (g *proxyModuleGetter) Info(ctx context.Context, path, version string) (*proxy.VersionInfo, error) {
+	return g.prox.Info(ctx, path, version)
+}
+
+// Mod returns the contents of the module's go.mod file.
+func (g *proxyModuleGetter) Mod(ctx context.Context, path, version string) ([]byte, error) {
+	return g.prox.Mod(ctx, path, version)
+}
+
+// FS returns an FS for the module's contents. The FS should match the format
+// of a module zip file.
+func (g *proxyModuleGetter) FS(ctx context.Context, path, version string) (fs.FS, error) {
+	return g.prox.Zip(ctx, path, version)
+}
+
+// ZipSize returns the approximate size of the zip file in bytes.
+// It is used only for load-shedding.
+func (g *proxyModuleGetter) ZipSize(ctx context.Context, path, version string) (int64, error) {
+	return g.prox.ZipSize(ctx, path, version)
+}
diff --git a/internal/fetch/helper_test.go b/internal/fetch/helper_test.go
index 871ee8f..db49a89 100644
--- a/internal/fetch/helper_test.go
+++ b/internal/fetch/helper_test.go
@@ -148,7 +148,7 @@
 		Files:      mod.Files,
 	}})
 	defer teardownProxy()
-	got := FetchModule(ctx, modulePath, fetchVersion, proxyClient, source.NewClientForTesting())
+	got := FetchModule(ctx, modulePath, fetchVersion, NewProxyModuleGetter(proxyClient), source.NewClientForTesting())
 	if !withLicenseDetector {
 		return got, nil
 	}
diff --git a/internal/fetch/load.go b/internal/fetch/load.go
index a64554a..ac91739 100644
--- a/internal/fetch/load.go
+++ b/internal/fetch/load.go
@@ -6,7 +6,6 @@
 package fetch
 
 import (
-	"archive/zip"
 	"bytes"
 	"context"
 	"errors"
@@ -16,6 +15,7 @@
 	"go/parser"
 	"go/token"
 	"io"
+	"io/fs"
 	"io/ioutil"
 	"math"
 	"net/http"
@@ -53,7 +53,7 @@
 //
 // If a package is fine except that its documentation is too large, loadPackage
 // returns a goPackage whose err field is a non-nil error with godoc.ErrTooLarge in its chain.
-func loadPackage(ctx context.Context, zipGoFiles []*zip.File, innerPath string,
+func loadPackage(ctx context.Context, fsys fs.FS, goFilePaths []string, innerPath string,
 	sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (_ *goPackage, err error) {
 	defer derrors.Wrap(&err, "loadPackage(ctx, zipGoFiles, %q, sourceInfo, modInfo)", innerPath)
 	ctx, span := trace.StartSpan(ctx, "fetch.loadPackage")
@@ -61,9 +61,9 @@
 
 	// Make a map with all the zip file contents.
 	files := make(map[string][]byte)
-	for _, f := range zipGoFiles {
-		_, name := path.Split(f.Name)
-		b, err := readZipFile(f, MaxFileSize)
+	for _, p := range goFilePaths {
+		_, name := path.Split(p)
+		b, err := readFSFile(fsys, p, MaxFileSize)
 		if err != nil {
 			return nil, err
 		}
@@ -348,27 +348,15 @@
 	return matchedFiles, nil
 }
 
-// readZipFile decompresses zip file f and returns its uncompressed contents.
-// The caller can check f.UncompressedSize64 before calling readZipFile to
-// get the expected uncompressed size of f.
-//
-// limit is the maximum number of bytes to read.
-func readZipFile(f *zip.File, limit int64) (_ []byte, err error) {
-	defer derrors.Add(&err, "readZipFile(%q)", f.Name)
-
-	r, err := f.Open()
+// readFSFile reads up to limit bytes from path in fsys.
+func readFSFile(fsys fs.FS, path string, limit int64) (_ []byte, err error) {
+	defer derrors.Add(&err, "readFSFile(%q)", path)
+	f, err := fsys.Open(path)
 	if err != nil {
-		return nil, fmt.Errorf("f.Open(): %v", err)
+		return nil, err
 	}
-	b, err := ioutil.ReadAll(io.LimitReader(r, limit))
-	if err != nil {
-		r.Close()
-		return nil, fmt.Errorf("ioutil.ReadAll(r): %v", err)
-	}
-	if err := r.Close(); err != nil {
-		return nil, fmt.Errorf("closing: %v", err)
-	}
-	return b, nil
+	defer f.Close()
+	return ioutil.ReadAll(io.LimitReader(f, limit))
 }
 
 // mib is the number of bytes in a mebibyte (Mi).
diff --git a/internal/fetch/package.go b/internal/fetch/package.go
index e357674..e194b05 100644
--- a/internal/fetch/package.go
+++ b/internal/fetch/package.go
@@ -6,10 +6,10 @@
 package fetch
 
 import (
-	"archive/zip"
 	"context"
 	"errors"
 	"fmt"
+	"io/fs"
 	"path"
 	"runtime/debug"
 	"strings"
@@ -40,7 +40,8 @@
 	err    error                     // non-fatal error when loading the package (e.g. documentation is too large)
 }
 
-// extractPackagesFromZip returns a slice of packages from the module zip r.
+// extractPackages returns a slice of packages from a filesystem arranged like a
+// module zip.
 // It matches against the given licenses to determine the subset of licenses
 // that applies to each package.
 // The second return value says whether any packages are "incomplete," meaning
@@ -49,9 +50,9 @@
 // * a maximum file size (MaxFileSize)
 // * the particular set of build contexts we consider (goEnvs)
 // * whether the import path is valid.
-func extractPackagesFromZip(ctx context.Context, modulePath, resolvedVersion, requestedVersion string, r *zip.Reader, d *licenses.Detector, sourceInfo *source.Info) (_ []*goPackage, _ []*internal.PackageVersionState, err error) {
-	defer derrors.Wrap(&err, "extractPackagesFromZip(ctx, %q, %q, r, d)", modulePath, resolvedVersion)
-	ctx, span := trace.StartSpan(ctx, "fetch.extractPackagesFromZip")
+func extractPackages(ctx context.Context, modulePath, resolvedVersion, requestedVersion string, fsys fs.FS, d *licenses.Detector, sourceInfo *source.Info) (_ []*goPackage, _ []*internal.PackageVersionState, err error) {
+	defer derrors.Wrap(&err, "extractPackages(ctx, %q, %q, r, d)", modulePath, resolvedVersion)
+	ctx, span := trace.StartSpan(ctx, "fetch.extractPackages")
 	defer span.End()
 	defer func() {
 		if e := recover(); e != nil {
@@ -85,8 +86,8 @@
 		// to be populated during phase 1 and used during phase 2.
 		//
 		// The map key is the directory path, with the modulePrefix trimmed.
-		// The map value is a slice of all .go files, and no other files.
-		dirs = make(map[string][]*zip.File)
+		// The map value is a slice of all .go file paths, and no other files.
+		dirs = make(map[string][]string)
 
 		// modInfo contains all the module information a package in the module
 		// needs to render its documentation, to be populated during phase 1
@@ -110,31 +111,31 @@
 	// that can be detected by looking at metadata alone.
 	// We'll be looking at file contents starting with phase 2 only,
 	// only after we're sure this phase passed without errors.
-	for _, f := range r.File {
-		if f.Mode().IsDir() {
-			// While "go mod download" will never put a directory in a zip, anyone can serve their
-			// own zips. Example: go.felesatra.moe/binpack@v0.1.0.
-			// Directory entries are harmless, so we just ignore them.
-			continue
+	err = fs.WalkDir(fsys, ".", func(pathname string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
 		}
-		if !strings.HasPrefix(f.Name, modulePrefix) {
+		if d.IsDir() {
+			// Skip directories.
+			return nil
+		}
+		if !strings.HasPrefix(pathname, modulePrefix) {
 			// Well-formed module zips have all files under modulePrefix.
-			return nil, nil, fmt.Errorf("expected file to have prefix %q; got = %q: %w",
-				modulePrefix, f.Name, errMalformedZip)
+			return fmt.Errorf("expected file to have prefix %q; got = %q: %w", modulePrefix, pathname, errMalformedZip)
 		}
-		innerPath := path.Dir(f.Name[len(modulePrefix):])
+		innerPath := path.Dir(pathname[len(modulePrefix):])
 		if incompleteDirs[innerPath] {
 			// We already know this directory cannot be processed, so skip.
-			continue
+			return nil
 		}
 		importPath := path.Join(modulePath, innerPath)
 		if ignoredByGoTool(importPath) || isVendored(importPath) {
 			// File is in a directory we're not looking to process at this time, so skip it.
-			continue
+			return nil
 		}
-		if !strings.HasSuffix(f.Name, ".go") {
+		if !strings.HasSuffix(pathname, ".go") {
 			// We care about .go files only.
-			continue
+			return nil
 		}
 		// It's possible to have a Go package in a directory that does not result in a valid import path.
 		// That package cannot be imported, but that may be fine if it's a main package, intended to built
@@ -150,13 +151,17 @@
 				Status:      derrors.ToStatus(derrors.PackageBadImportPath),
 				Error:       err.Error(),
 			})
-			continue
+			return nil
 		}
-		if f.UncompressedSize64 > MaxFileSize {
+		info, err := d.Info()
+		if err != nil {
+			return err
+		}
+		if info.Size() > MaxFileSize {
 			incompleteDirs[innerPath] = true
 			status := derrors.ToStatus(derrors.PackageMaxFileSizeLimitExceeded)
 			err := fmt.Sprintf("Unable to process %s: file size %d exceeds max limit %d",
-				f.Name, f.UncompressedSize64, MaxFileSize)
+				pathname, info.Size(), MaxFileSize)
 			packageVersionStates = append(packageVersionStates, &internal.PackageVersionState{
 				ModulePath:  modulePath,
 				PackagePath: importPath,
@@ -164,13 +169,18 @@
 				Status:      status,
 				Error:       err,
 			})
-			continue
+			return nil
 		}
-		dirs[innerPath] = append(dirs[innerPath], f)
+		dirs[innerPath] = append(dirs[innerPath], pathname)
 		if len(dirs) > maxPackagesPerModule {
-			return nil, nil, fmt.Errorf("%d packages found in %q; exceeds limit %d for maxPackagePerModule", len(dirs), modulePath, maxPackagesPerModule)
+			return fmt.Errorf("%d packages found in %q; exceeds limit %d for maxPackagePerModule", len(dirs), modulePath, maxPackagesPerModule)
 		}
+		return nil
+	})
+	if err != nil {
+		return nil, nil, err
 	}
+
 	for pkgName := range dirs {
 		modInfo.ModulePackages[path.Join(modulePath, pkgName)] = true
 	}
@@ -191,7 +201,7 @@
 			status error
 			errMsg string
 		)
-		pkg, err := loadPackage(ctx, goFiles, innerPath, sourceInfo, modInfo)
+		pkg, err := loadPackage(ctx, fsys, goFiles, innerPath, sourceInfo, modInfo)
 		if bpe := (*BadPackageError)(nil); errors.As(err, &bpe) {
 			incompleteDirs[innerPath] = true
 			status = derrors.PackageInvalidContents
diff --git a/internal/fetch/readme.go b/internal/fetch/readme.go
index 395f825..e300b29 100644
--- a/internal/fetch/readme.go
+++ b/internal/fetch/readme.go
@@ -6,8 +6,9 @@
 package fetch
 
 import (
-	"archive/zip"
+	"errors"
 	"fmt"
+	"io/fs"
 	"path"
 	"strings"
 
@@ -15,32 +16,40 @@
 	"golang.org/x/pkgsite/internal/derrors"
 )
 
-// extractReadmesFromZip returns the file path and contents of all files from r
+// extractReadmes returns the file path and contents of all files from r
 // that are README files.
-func extractReadmesFromZip(modulePath, resolvedVersion string, r *zip.Reader) (_ []*internal.Readme, err error) {
-	defer derrors.Wrap(&err, "extractReadmesFromZip(ctx, %q, %q, r)", modulePath, resolvedVersion)
+func extractReadmes(modulePath, resolvedVersion string, fsys fs.FS) (_ []*internal.Readme, err error) {
+	defer derrors.Wrap(&err, "extractReadmes(ctx, %q, %q, r)", modulePath, resolvedVersion)
 
 	// The key is the README directory. Since we only store one README file per
 	// directory, we use this below to prioritize READMEs in markdown.
 	readmes := map[string]*internal.Readme{}
-	for _, zipFile := range r.File {
-		if isReadme(zipFile.Name) {
-			if zipFile.UncompressedSize64 > MaxFileSize {
-				return nil, fmt.Errorf("file size %d exceeds max limit %d", zipFile.UncompressedSize64, MaxFileSize)
-			}
-			c, err := readZipFile(zipFile, MaxFileSize)
+	mvdir := moduleVersionDir(modulePath, resolvedVersion)
+	err = fs.WalkDir(fsys, mvdir, func(pathname string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if isReadme(pathname) {
+			info, err := d.Info()
 			if err != nil {
-				return nil, err
+				return err
+			}
+			if info.Size() > MaxFileSize {
+				return fmt.Errorf("file size %d exceeds max limit %d", info.Size(), MaxFileSize)
+			}
+			c, err := readFSFile(fsys, pathname, MaxFileSize)
+			if err != nil {
+				return err
 			}
 
-			f := strings.TrimPrefix(zipFile.Name, moduleVersionDir(modulePath, resolvedVersion)+"/")
+			f := strings.TrimPrefix(pathname, mvdir+"/")
 			key := path.Dir(f)
 			if r, ok := readmes[key]; ok {
 				// Prefer READMEs written in markdown, since we style these on
 				// the frontend.
 				ext := path.Ext(r.Filepath)
 				if ext == ".md" || ext == ".markdown" {
-					continue
+					return nil
 				}
 			}
 			readmes[key] = &internal.Readme{
@@ -48,8 +57,11 @@
 				Contents: string(c),
 			}
 		}
+		return nil
+	})
+	if err != nil && !errors.Is(err, fs.ErrNotExist) { // we can get NotExist on an empty FS {
+		return nil, err
 	}
-
 	var rs []*internal.Readme
 	for _, r := range readmes {
 		rs = append(rs, r)
diff --git a/internal/fetch/readme_test.go b/internal/fetch/readme_test.go
index 714d575..ee57fee 100644
--- a/internal/fetch/readme_test.go
+++ b/internal/fetch/readme_test.go
@@ -16,7 +16,7 @@
 	"golang.org/x/pkgsite/internal/stdlib"
 )
 
-func TestExtractReadmesFromZip(t *testing.T) {
+func TestExtractReadmes(t *testing.T) {
 	stdlib.UseTestData = true
 
 	ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
@@ -101,7 +101,7 @@
 				}
 			}
 
-			got, err := extractReadmesFromZip(test.modulePath, test.version, reader)
+			got, err := extractReadmes(test.modulePath, test.version, reader)
 			if err != nil {
 				t.Fatal(err)
 			}
diff --git a/internal/frontend/fetch.go b/internal/frontend/fetch.go
index f3a63dc..c468f61 100644
--- a/internal/frontend/fetch.go
+++ b/internal/frontend/fetch.go
@@ -556,7 +556,7 @@
 		derrors.Wrap(&err, "FetchAndUpdateState(%q, %q)", modulePath, requestedVersion)
 	}()
 
-	fr := fetch.FetchModule(ctx, modulePath, requestedVersion, proxyClient, sourceClient)
+	fr := fetch.FetchModule(ctx, modulePath, requestedVersion, fetch.NewProxyModuleGetter(proxyClient), sourceClient)
 	defer fr.Defer()
 	if fr.Error == nil {
 		// Only attempt to insert the module into module_version_states if the
diff --git a/internal/proxydatasource/datasource.go b/internal/proxydatasource/datasource.go
index 5e412bf..43ba301 100644
--- a/internal/proxydatasource/datasource.go
+++ b/internal/proxydatasource/datasource.go
@@ -95,7 +95,7 @@
 	if e, ok := ds.versionCache[key]; ok {
 		return e.module, e.err
 	}
-	res := fetch.FetchModule(ctx, modulePath, version, ds.proxyClient, ds.sourceClient)
+	res := fetch.FetchModule(ctx, modulePath, version, fetch.NewProxyModuleGetter(ds.proxyClient), ds.sourceClient)
 	defer res.Defer()
 	m := res.Module
 	if m != nil {
diff --git a/internal/testing/integration/frontend_test.go b/internal/testing/integration/frontend_test.go
index 1183d4e..c64bbb6 100644
--- a/internal/testing/integration/frontend_test.go
+++ b/internal/testing/integration/frontend_test.go
@@ -99,7 +99,7 @@
 
 func fetchAndInsertModule(ctx context.Context, t *testing.T, tm *proxytest.Module, proxyClient *proxy.Client) {
 	sourceClient := source.NewClient(1 * time.Second)
-	res := fetch.FetchModule(ctx, tm.ModulePath, tm.Version, proxyClient, sourceClient)
+	res := fetch.FetchModule(ctx, tm.ModulePath, tm.Version, fetch.NewProxyModuleGetter(proxyClient), sourceClient)
 	defer res.Defer()
 	if res.Error != nil {
 		t.Fatal(res.Error)
diff --git a/internal/worker/fetch.go b/internal/worker/fetch.go
index 6dd9983..d1cf479 100644
--- a/internal/worker/fetch.go
+++ b/internal/worker/fetch.go
@@ -188,6 +188,7 @@
 		return ft
 	}
 
+	proxyGetter := fetch.NewProxyModuleGetter(f.ProxyClient)
 	// Fetch the module, and the current @main and @master version of this module.
 	// The @main and @master version will be used to update the version_map
 	// target if applicable.
@@ -196,7 +197,7 @@
 	go func() {
 		defer wg.Done()
 		start := time.Now()
-		fr := fetch.FetchModule(ctx, modulePath, requestedVersion, f.ProxyClient, f.SourceClient)
+		fr := fetch.FetchModule(ctx, modulePath, requestedVersion, proxyGetter, f.SourceClient)
 		if fr == nil {
 			panic("fetch.FetchModule should never return a nil FetchResult")
 		}
@@ -210,7 +211,7 @@
 	go func() {
 		defer wg.Done()
 		if !f.ProxyClient.FetchDisabled() {
-			main = resolvedVersion(ctx, modulePath, internal.MainVersion, f.ProxyClient)
+			main = resolvedVersion(ctx, modulePath, internal.MainVersion, proxyGetter)
 		}
 	}()
 	var master string
@@ -218,7 +219,7 @@
 	go func() {
 		defer wg.Done()
 		if !f.ProxyClient.FetchDisabled() {
-			master = resolvedVersion(ctx, modulePath, internal.MasterVersion, f.ProxyClient)
+			master = resolvedVersion(ctx, modulePath, internal.MasterVersion, proxyGetter)
 		}
 	}()
 	wg.Wait()
@@ -298,15 +299,15 @@
 	return nil
 }
 
-func resolvedVersion(ctx context.Context, modulePath, requestedVersion string, proxyClient *proxy.Client) string {
+func resolvedVersion(ctx context.Context, modulePath, requestedVersion string, getter fetch.ModuleGetter) string {
 	if modulePath == stdlib.ModulePath && requestedVersion == internal.MainVersion {
 		return ""
 	}
-	info, err := fetch.GetInfo(ctx, modulePath, requestedVersion, proxyClient)
+	info, err := fetch.GetInfo(ctx, modulePath, requestedVersion, getter)
 	if err != nil {
 		if !errors.Is(err, derrors.NotFound) {
 			// If an error occurs, log it as a warning and insert the module as normal.
-			log.Warningf(ctx, "fetch.GetInfo(ctx, %q, %q, f.ProxyClient, false): %v", modulePath, requestedVersion, err)
+			log.Warningf(ctx, "fetch.GetInfo(ctx, %v, %q, getter, false): %v", modulePath, requestedVersion, err)
 		}
 		return ""
 	}