internal/fetch: split FetchModule in two

First we get a LazyModule that essentially just contains the UnitMetas
for each of the modules and then we process each Unit as needed so
that FetchDataSource doesn't need to process all the Units when it
just needs one. To construct at FetchResult, FetchModule will now get
a LazyModule and then compute all the Units.

A compromise to get this to work is that FetchDataSource will no
longer populate Synopsis, IsRedistributable and Licenses on
Subdirectories to avoid computing all those fields for all units when
returning a single unit.

MainVersion and MasterVersion are removed from FetchResult because
they were only set on the internal/worker.FetchTask struct that embeds
a FetchResult.
 
Change-Id: Ia0db850ae570d421712ec484ee8b7815a779128e
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/557818
Run-TryBot: Michael Matloob <matloob@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
kokoro-CI: kokoro <noreply+kokoro@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
diff --git a/internal/fetch/fetch.go b/internal/fetch/fetch.go
index f91e059..80f4edd 100644
--- a/internal/fetch/fetch.go
+++ b/internal/fetch/fetch.go
@@ -12,11 +12,11 @@
 	"io/fs"
 	"net/http"
 	"strings"
-	"time"
 
 	"golang.org/x/mod/modfile"
 	"golang.org/x/pkgsite/internal"
 	"golang.org/x/pkgsite/internal/derrors"
+	"golang.org/x/pkgsite/internal/godoc"
 	"golang.org/x/pkgsite/internal/licenses"
 	"golang.org/x/pkgsite/internal/log"
 	"golang.org/x/pkgsite/internal/proxy"
@@ -30,8 +30,6 @@
 	ModulePath       string
 	RequestedVersion string
 	ResolvedVersion  string
-	MainVersion      string
-	MasterVersion    string
 	// HasGoMod says whether the zip contain a go.mod file. If Module (below) is non-nil, then
 	// Module.HasGoMod will be the same value. But HasGoMod will be populated even if Module is nil
 	// because there were problems with it, as long as we can download and read the zip.
@@ -43,35 +41,54 @@
 	PackageVersionStates []*internal.PackageVersionState
 }
 
+// A LazyModule contains the information needed to compute a FetchResult,
+// but has only done enough work to compute the UnitMetas in the module.
+// It provides a Unit method to compute a single unit or a fetchResult
+// method to compute the whole FetchResult.
+type LazyModule struct {
+	internal.ModuleInfo
+	UnitMetas        []*internal.UnitMeta
+	goModPath        string
+	requestedVersion string
+	failedPackages   []*internal.PackageVersionState
+	licenseDetector  *licenses.Detector
+	contentDir       fs.FS
+	godocModInfo     *godoc.ModuleInfo
+	Error            error
+}
+
 // FetchModule queries the proxy or the Go repo for the requested module
 // version, downloads the module zip, and processes the contents to return an
 // *internal.Module and related information.
 //
 // Even if err is non-nil, the result may contain useful information, like the go.mod path.
 func FetchModule(ctx context.Context, modulePath, requestedVersion string, mg ModuleGetter) (fr *FetchResult) {
-	fr = &FetchResult{
-		ModulePath:       modulePath,
-		RequestedVersion: requestedVersion,
-	}
-	defer derrors.Wrap(&fr.Error, "FetchModule(%q, %q)", modulePath, requestedVersion)
-
-	err := fetchModule(ctx, fr, mg)
-	fr.Error = err
-	if err != nil {
-		fr.Status = derrors.ToStatus(fr.Error)
-	}
-	if fr.Status == 0 {
-		fr.Status = http.StatusOK
-	}
-	return fr
+	lm := FetchLazyModule(ctx, modulePath, requestedVersion, mg)
+	return lm.fetchResult(ctx)
 }
 
-func fetchModule(ctx context.Context, fr *FetchResult, mg ModuleGetter) error {
-	info, err := GetInfo(ctx, fr.ModulePath, fr.RequestedVersion, mg)
+// FetchLazyModule queries the proxy or the Go repo for the requested module
+// version, downloads the module zip, and does just enough processing to produce
+// UnitMetas for all the modules. The full units are computed as needed.
+func FetchLazyModule(ctx context.Context, modulePath, requestedVersion string, mg ModuleGetter) *LazyModule {
+	lm, err := fetchLazyModule(ctx, modulePath, requestedVersion, mg)
 	if err != nil {
-		return err
+		lm.Error = err
 	}
-	fr.ResolvedVersion = info.Version
+	return lm
+}
+
+func fetchLazyModule(ctx context.Context, modulePath, requestedVersion string, mg ModuleGetter) (*LazyModule, error) {
+	lm := &LazyModule{
+		requestedVersion: requestedVersion,
+	}
+	lm.ModuleInfo.ModulePath = modulePath
+
+	info, err := GetInfo(ctx, modulePath, requestedVersion, mg)
+	if err != nil {
+		return lm, err
+	}
+	lm.ModuleInfo.Version = info.Version
 	commitTime := info.Time
 
 	var contentDir fs.FS
@@ -80,35 +97,37 @@
 		// Special behavior for stdlibZipModuleGetter because its info doesn't actually
 		// give us the true resolved version.
 		var resolvedVersion string
-		contentDir, resolvedVersion, commitTime, err = stdlib.ContentDir(ctx, fr.RequestedVersion)
+		contentDir, resolvedVersion, commitTime, err = stdlib.ContentDir(ctx, requestedVersion)
 		if err != nil {
-			return err
+			return lm, err
 		}
 		// If the requested version is a branch name like "master" or "main", we cannot
 		// determine the right resolved version until we start working with the repo.
-		fr.ResolvedVersion = resolvedVersion
+		lm.ModuleInfo.Version = resolvedVersion
 	default:
-		contentDir, err = mg.ContentDir(ctx, fr.ModulePath, fr.ResolvedVersion)
+		contentDir, err = mg.ContentDir(ctx, modulePath, lm.ModuleInfo.Version)
 		if err != nil {
-			return err
+			return lm, err
 		}
 	}
+	lm.ModuleInfo.CommitTime = commitTime
+	lm.contentDir = contentDir
 
-	// Set fr.HasGoMod as early as possible, because the go command uses it to
+	// Set HasGoMod as early as possible, because the go command uses it to
 	// decide the latest version in some cases (see fetchRawLatestVersion in
 	// this package) and all it requires is a valid zip.
-	if fr.ModulePath == stdlib.ModulePath {
-		fr.HasGoMod = true
+	if modulePath == stdlib.ModulePath {
+		lm.ModuleInfo.HasGoMod = true
 	} else {
-		fr.HasGoMod = hasGoModFile(contentDir)
+		lm.ModuleInfo.HasGoMod = hasGoModFile(contentDir)
 	}
 
 	// getGoModPath may return a non-empty goModPath even if the error is
 	// non-nil, if the module version is an alternative module.
 	var goModBytes []byte
-	fr.GoModPath, goModBytes, err = getGoModPath(ctx, fr.ModulePath, fr.ResolvedVersion, mg)
+	lm.goModPath, goModBytes, err = getGoModPath(ctx, modulePath, lm.ModuleInfo.Version, mg)
 	if err != nil {
-		return err
+		return lm, err
 	}
 
 	// If there is no go.mod file in the zip, try other ways to detect
@@ -117,37 +136,131 @@
 	// 2. Compare the zip signature to a list of known ones to see if this is a
 	//    fork. The intent is to avoid processing certain known large modules, not
 	//    to find every fork.
-	if !fr.HasGoMod {
-		if modPath := knownAlternativeFor(fr.ModulePath); modPath != "" {
-			return fmt.Errorf("known alternative to %s: %w", modPath, derrors.AlternativeModule)
+	if !lm.ModuleInfo.HasGoMod {
+		if modPath := knownAlternativeFor(modulePath); modPath != "" {
+			return lm, fmt.Errorf("known alternative to %s: %w", modPath, derrors.AlternativeModule)
 		}
-		forkedModule, err := forkedFrom(contentDir, fr.ModulePath, fr.ResolvedVersion)
+		forkedModule, err := forkedFrom(contentDir, modulePath, lm.ModuleInfo.Version)
 		if err != nil {
-			return err
+			return lm, err
 		}
 		if forkedModule != "" {
-			return fmt.Errorf("forked from %s: %w", forkedModule, derrors.AlternativeModule)
+			return lm, fmt.Errorf("forked from %s: %w", forkedModule, derrors.AlternativeModule)
 		}
 	}
 
-	mod, pvs, err := processModuleContents(ctx, fr.ModulePath, fr.ResolvedVersion, fr.RequestedVersion, commitTime, contentDir, mg)
-	if err != nil {
-		return err
-	}
-	mod.HasGoMod = fr.HasGoMod
-	if goModBytes != nil {
-		if err := processGoModFile(goModBytes, mod); err != nil {
-			return fmt.Errorf("%v: %w", err.Error(), derrors.BadModule)
+	// populate the rest of lm.ModuleInfo before calling extractUnitMetas with it.
+	v := lm.ModuleInfo.Version // version to use for SourceInfo and licenses.NewDetectorFS
+	if _, ok := mg.(*stdlibZipModuleGetter); ok {
+		if modulePath == stdlib.ModulePath && stdlib.SupportedBranches[requestedVersion] {
+			v = requestedVersion
 		}
 	}
-	fr.Module = mod
-	fr.PackageVersionStates = pvs
+	lm.ModuleInfo.SourceInfo, err = mg.SourceInfo(ctx, modulePath, v)
+	if err != nil {
+		log.Infof(ctx, "error getting source info: %v", err)
+	}
+	logf := func(format string, args ...any) {
+		log.Infof(ctx, format, args...)
+	}
+	lm.licenseDetector = licenses.NewDetectorFS(modulePath, v, contentDir, logf)
+	lm.ModuleInfo.IsRedistributable = lm.licenseDetector.ModuleIsRedistributable()
+	lm.UnitMetas, lm.godocModInfo, lm.failedPackages, err = extractUnitMetas(ctx, lm.ModuleInfo, contentDir)
+	if err != nil {
+		return lm, err
+	}
+	if goModBytes != nil {
+		if err := processGoModFile(goModBytes, &lm.ModuleInfo); err != nil {
+			return lm, err
+		}
+	}
+
+	return lm, nil
+}
+
+func (lm *LazyModule) Unit(ctx context.Context, path string) (*internal.Unit, error) {
+	var unitMeta *internal.UnitMeta
+	for _, um := range lm.UnitMetas {
+		if um.Path == path {
+			unitMeta = um
+		}
+	}
+	u, _, err := lm.unit(ctx, unitMeta)
+	if err == nil && u == nil {
+		return nil, fmt.Errorf("unit %v does not exist in module", path)
+	}
+	return u, err
+}
+
+// unit returns the Unit for the given path. It also returns a packageVersionState representing
+// the state of the work of computing the Unit after the LazyModule was computed. PackageVersionStates
+// representing packages that failed while the LazyModule was computed are set on the LazyModule.
+func (lm *LazyModule) unit(ctx context.Context, unitMeta *internal.UnitMeta) (*internal.Unit, *internal.PackageVersionState, error) {
+	readme, err := extractReadme(lm.ModulePath, unitMeta.Path, lm.ModuleInfo.Version, lm.contentDir)
+	if err != nil {
+		return nil, nil, err
+	}
+	pkg, pvs, err := extractPackage(ctx, lm.ModulePath, unitMeta.Path, lm.contentDir, lm.licenseDetector, lm.SourceInfo, lm.godocModInfo)
+	if err != nil || (pvs != nil && pvs.Status != 200) {
+		// pvs can be non-nil even if err is non-nil.
+		return nil, pvs, err
+	}
+
+	u := moduleUnit(lm.ModulePath, unitMeta, pkg, readme, lm.licenseDetector)
+	return u, pvs, nil
+}
+
+func (lm *LazyModule) fetchResult(ctx context.Context) *FetchResult {
+	fr := &FetchResult{
+		ModulePath:       lm.ModulePath,
+		RequestedVersion: lm.requestedVersion,
+		ResolvedVersion:  lm.ModuleInfo.Version,
+		Module: &internal.Module{
+			ModuleInfo: lm.ModuleInfo,
+		},
+		HasGoMod:  lm.HasGoMod,
+		GoModPath: lm.goModPath,
+	}
+	if lm.Error != nil {
+		fr.Error = lm.Error
+		fr.Status = derrors.ToStatus(lm.Error)
+		if fr.Status == 0 {
+			fr.Status = http.StatusOK
+		}
+		return fr
+	}
+	fr.Module.Licenses = lm.licenseDetector.AllLicenses()
+	// We need to set HasGoMod here rather than on the ModuleInfo when
+	// it's created because the ModuleInfo that goes on the units shouldn't
+	// have HasGoMod set on it.
+	packageVersionStates := append([]*internal.PackageVersionState{}, lm.failedPackages...)
+	for _, um := range lm.UnitMetas {
+		unit, pvs, err := lm.unit(ctx, um)
+		if err != nil {
+			fr.Error = err
+		}
+		if pvs != nil {
+			packageVersionStates = append(packageVersionStates, pvs)
+		}
+		if unit == nil {
+			// No unit was produced but we still had a useful pvs.
+			continue
+		}
+		fr.Module.Units = append(fr.Module.Units, unit)
+	}
+	if fr.Error != nil {
+		fr.Status = derrors.ToStatus(fr.Error)
+	}
+	if fr.Status == 0 {
+		fr.Status = http.StatusOK
+	}
+	fr.PackageVersionStates = packageVersionStates
 	for _, state := range fr.PackageVersionStates {
 		if state.Status != http.StatusOK {
 			fr.Status = derrors.ToStatus(derrors.HasIncompletePackages)
 		}
 	}
-	return nil
+	return fr
 }
 
 // GetInfo returns the result of a request to the proxy .info endpoint. If
@@ -178,53 +291,24 @@
 	return goModPath, goModBytes, nil
 }
 
-// processModuleContents extracts information from the module filesystem.
-func processModuleContents(ctx context.Context, modulePath, resolvedVersion, requestedVersion string,
-	commitTime time.Time, contentDir fs.FS, mg ModuleGetter) (_ *internal.Module, _ []*internal.PackageVersionState, err error) {
-	defer derrors.Wrap(&err, "processModuleContents(%q, %q)", modulePath, resolvedVersion)
+// extractUnitMetas extracts UnitMeta information from the module filesystem and
+// populates the LazyModule with that information and additional module-level data.
+func extractUnitMetas(ctx context.Context, minfo internal.ModuleInfo,
+	contentDir fs.FS) (unitMetas []*internal.UnitMeta, _ *godoc.ModuleInfo, _ []*internal.PackageVersionState, err error) {
+	defer derrors.Wrap(&err, "extractUnitMetas(%q, %q)", minfo.ModulePath, minfo.Version)
 
-	ctx, span := trace.StartSpan(ctx, "fetch.processModuleContents")
+	ctx, span := trace.StartSpan(ctx, "fetch.extractUnitMetas")
 	defer span.End()
 
-	v := resolvedVersion
-	if _, ok := mg.(*stdlibZipModuleGetter); ok {
-		if modulePath == stdlib.ModulePath && stdlib.SupportedBranches[requestedVersion] {
-			v = requestedVersion
-		}
-	}
-	sourceInfo, err := mg.SourceInfo(ctx, modulePath, v)
-	if err != nil {
-		log.Infof(ctx, "error getting source info: %v", err)
-	}
-	readmes, err := extractReadmes(modulePath, resolvedVersion, contentDir)
-	if err != nil {
-		return nil, nil, err
-	}
-	logf := func(format string, args ...any) {
-		log.Infof(ctx, format, args...)
-	}
-	d := licenses.NewDetectorFS(modulePath, v, contentDir, logf)
-	allLicenses := d.AllLicenses()
-	packages, packageVersionStates, err := extractPackages(ctx, modulePath, resolvedVersion, contentDir, d, sourceInfo)
+	packageMetas, godocModInfo, failedMetaPackages, err := extractPackageMetas(ctx, minfo.ModulePath, minfo.Version, contentDir)
 	if errors.Is(err, ErrModuleContainsNoPackages) {
-		return nil, nil, fmt.Errorf("%v: %w", err.Error(), derrors.BadModule)
+		return nil, nil, nil, fmt.Errorf("%v: %w", err.Error(), derrors.BadModule)
 	}
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
-	minfo := internal.ModuleInfo{
-		ModulePath:        modulePath,
-		Version:           resolvedVersion,
-		CommitTime:        commitTime,
-		IsRedistributable: d.ModuleIsRedistributable(),
-		SourceInfo:        sourceInfo,
-		// HasGoMod is populated by the caller.
-	}
-	return &internal.Module{
-		ModuleInfo: minfo,
-		Licenses:   allLicenses,
-		Units:      moduleUnits(modulePath, minfo, packages, readmes, d),
-	}, packageVersionStates, nil
+
+	return moduleUnitMetas(minfo, packageMetas), godocModInfo, failedMetaPackages, nil
 }
 
 func hasGoModFile(contentDir fs.FS) bool {
@@ -233,7 +317,7 @@
 }
 
 // processGoModFile populates mod with information extracted from the contents of the go.mod file.
-func processGoModFile(goModBytes []byte, mod *internal.Module) (err error) {
+func processGoModFile(goModBytes []byte, mod *internal.ModuleInfo) (err error) {
 	defer derrors.Wrap(&err, "processGoModFile")
 
 	mf, err := modfile.Parse("go.mod", goModBytes, nil)
diff --git a/internal/fetch/helper_test.go b/internal/fetch/helper_test.go
index 2d92041..1574228 100644
--- a/internal/fetch/helper_test.go
+++ b/internal/fetch/helper_test.go
@@ -58,6 +58,7 @@
 				ModulePath:        fr.Module.ModulePath,
 				Version:           fr.Module.Version,
 				IsRedistributable: fr.Module.IsRedistributable,
+				HasGoMod:          fr.Module.HasGoMod,
 			},
 			Path: u.Path,
 			Name: u.Name,
diff --git a/internal/fetch/load.go b/internal/fetch/load.go
index 97105a3..664193a 100644
--- a/internal/fetch/load.go
+++ b/internal/fetch/load.go
@@ -182,6 +182,74 @@
 	return pkg, nil
 }
 
+// loadPackagesMeta loads only the parts of a package that are needed to load a
+// packageMeta.
+func loadPackageMeta(ctx context.Context, contentDir fs.FS, goFilePaths []string, innerPath string, modInfo *godoc.ModuleInfo) (_ *packageMeta, err error) {
+	defer derrors.Wrap(&err, "loadPackageMeta(ctx, zipGoFiles, %q, sourceInfo, modInfo)", innerPath)
+
+	// Make a map with all the zip file contents.
+	files := make(map[string][]byte)
+	for _, p := range goFilePaths {
+		name := path.Base(p)
+		b, err := readFSFile(contentDir, p, MaxFileSize)
+		if err != nil {
+			return nil, err
+		}
+		files[name] = b
+	}
+
+	modulePath := modInfo.ModulePath
+	importPath := path.Join(modulePath, innerPath)
+	if modulePath == stdlib.ModulePath {
+		importPath = innerPath
+	}
+
+	var pkg *packageMeta
+	// Try to load the package name for each build context. We're okay
+	// as long as all the build contexts that successfully loadPackageName agree
+	// on the package name.
+	// TODO(matloob): See if we can rewrite this so each file needs to be loaded
+	// only once. What we probably want to do is map each file to the package name
+	// in the file and then run the logic in loadPackageName on the collection of
+	// package name values.
+	for _, bc := range internal.BuildContexts {
+		mfiles, err := matchingFiles(bc.GOOS, bc.GOARCH, files)
+		if err != nil {
+			return nil, err
+		}
+		name, err := loadPackageName(innerPath, mfiles)
+		switch {
+		case errors.Is(err, derrors.NotFound):
+			// No package for this build context.
+			continue
+		case errors.As(err, new(*BadPackageError)):
+			// This build context was bad, but maybe others aren't.
+			continue
+		case err != nil:
+			// Serious error. Fail.
+			return nil, err
+		default:
+			// No error.
+			if pkg == nil {
+				pkg = &packageMeta{
+					path: importPath,
+					name: name,
+				}
+			}
+			// All the build contexts should use the same package name. Although
+			// it's technically legal for different build tags to result in different
+			// package names, it's not something we support.
+			if name != pkg.name {
+				return nil, &BadPackageError{
+					Err: fmt.Errorf("more than one package name (%q and %q)", pkg.name, name),
+				}
+			}
+		}
+	}
+
+	return pkg, nil
+}
+
 // mapKeyForFiles generates a value that corresponds to the given set of file
 // names and can be used as a map key.
 // It assumes the filenames do not contain spaces.
@@ -297,6 +365,47 @@
 	return packageName, goFiles, fset, nil
 }
 
+// loadPackageName returns the package name from the files as it occurs in the source.
+// If there are no non-test Go files, it returns a NotFound error.
+func loadPackageName(innerPath string, files map[string][]byte) (pkgName string, _ error) {
+	// Parse .go files and add them to the goFiles slice.
+	var (
+		fset            = token.NewFileSet()
+		numNonTestFiles int
+		packageName     string
+		packageNameFile string // Name of file where packageName came from.
+	)
+	for name, b := range files {
+		if strings.HasSuffix(name, "_test.go") {
+			continue
+		}
+		pf, err := parser.ParseFile(fset, name, b, parser.PackageClauseOnly)
+		if err != nil {
+			if pf == nil {
+				return "", fmt.Errorf("internal error: the source couldn't be read: %v", err)
+			}
+			return "", &BadPackageError{Err: err}
+		}
+		numNonTestFiles++
+		if numNonTestFiles == 1 {
+			packageName = pf.Name.Name
+			packageNameFile = name
+		} else if pf.Name.Name != packageName {
+			return "", &BadPackageError{Err: &build.MultiplePackageError{
+				Dir:      innerPath,
+				Packages: []string{packageName, pf.Name.Name},
+				Files:    []string{packageNameFile, name},
+			}}
+		}
+	}
+	if numNonTestFiles == 0 {
+		// This directory doesn't contain a package, or at least not one
+		// that matches this build context.
+		return "", derrors.NotFound
+	}
+	return packageName, nil
+}
+
 // matchingFiles returns a map from file names to their contents, read from zipGoFiles.
 // It includes only those files that match the build context determined by goos and goarch.
 func matchingFiles(goos, goarch string, allFiles map[string][]byte) (matchedFiles map[string][]byte, err error) {
diff --git a/internal/fetch/package.go b/internal/fetch/package.go
index a172be8..9db9ec8 100644
--- a/internal/fetch/package.go
+++ b/internal/fetch/package.go
@@ -41,8 +41,108 @@
 	err    error                     // non-fatal error when loading the package (e.g. documentation is too large)
 }
 
-// extractPackages returns a slice of packages from a filesystem arranged like a
-// module zip.
+// rel returns the relative path from the modulePath to the pkgPath
+// returning "." if they're the same.
+func rel(pkgPath, modulePath string) string {
+	suff := internal.Suffix(pkgPath, modulePath)
+	if suff == "" {
+		return "."
+	}
+	return suff
+}
+
+// extractPackage returns a package from a filesystem arranged like a module zip.
+// It matches against the given licenses to determine the subset of licenses
+// that applies to each package.
+// It returns a packageVersionState representing the status of doing the work
+// of computing the package after the UnitMeta was computed. The packageVersionState
+// of a package that failed to have a UnitMeta produced was produced by extractPackageMetas.
+func extractPackage(ctx context.Context, modulePath, pkgPath string, contentDir fs.FS, d *licenses.Detector, sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (*goPackage, *internal.PackageVersionState, error) {
+	innerPath := rel(pkgPath, modulePath)
+	f, err := contentDir.Open(innerPath)
+	if err != nil {
+		return nil, nil, err
+	}
+	dir, ok := f.(fs.ReadDirFile)
+	if !ok {
+		return nil, nil, fmt.Errorf("file is not a directory")
+	}
+	entries, err := dir.ReadDir(0)
+	if err != nil {
+		panic(err)
+	}
+	var goFiles []string
+	for _, e := range entries {
+		if e.IsDir() {
+			continue
+		}
+		if !strings.HasSuffix(e.Name(), ".go") {
+			// We care about .go files only.
+			continue
+		}
+		goFiles = append(goFiles, path.Join(innerPath, e.Name()))
+	}
+	if len(goFiles) == 0 {
+		// This is a unit but not a package, so return a nil package
+		// for it.
+		return nil, nil, nil
+	}
+
+	var (
+		status error
+		errMsg string
+	)
+	pkg, err := loadPackage(ctx, contentDir, goFiles, innerPath, sourceInfo, modInfo)
+	if bpe := (*BadPackageError)(nil); errors.As(err, &bpe) {
+		log.Infof(ctx, "Error loading %s: %v", innerPath, err)
+		status = derrors.PackageInvalidContents
+		errMsg = err.Error()
+	} else if err != nil {
+		return nil, nil, fmt.Errorf("unexpected error loading package: %v", err)
+	}
+
+	if pkg == nil {
+		// No package.
+		if len(goFiles) > 0 {
+			// There were go files, but no build contexts matched them.
+			status = derrors.PackageBuildContextNotSupported
+		}
+	} else {
+		if errors.Is(pkg.err, godoc.ErrTooLarge) {
+			status = derrors.PackageDocumentationHTMLTooLarge
+			errMsg = pkg.err.Error()
+		} else if pkg.err != nil {
+			// ErrTooLarge is the only valid value of pkg.err.
+			return nil, nil, fmt.Errorf("bad package error for %s: %v", pkg.path, pkg.err)
+		}
+		if d != nil { //  should only be nil for tests
+			isRedist, lics := d.PackageInfo(innerPath)
+			pkg.isRedistributable = isRedist
+			for _, l := range lics {
+				pkg.licenseMeta = append(pkg.licenseMeta, l.Metadata)
+			}
+		}
+	}
+
+	pvs := &internal.PackageVersionState{
+		ModulePath:  modulePath,
+		PackagePath: pkgPath,
+		Version:     modInfo.ResolvedVersion,
+		Status:      derrors.ToStatus(status),
+		Error:       errMsg,
+	}
+
+	return pkg, pvs, nil
+}
+
+type packageMeta struct {
+	path string
+	name string
+}
+
+// extractPackageMetas returns a slice of packageMetas containing only the information
+// needed to produce a UnitMeta from filesystem arranged like a module zip. extractPackage
+// does the work to complete the package with all the information needed for a Unit.
 // It matches against the given licenses to determine the subset of licenses
 // that applies to each package.
 // The second return value says whether any packages are "incomplete," meaning
@@ -51,7 +151,7 @@
 // * a maximum file size (MaxFileSize)
 // * the particular set of build contexts we consider (goEnvs)
 // * whether the import path is valid.
-func extractPackages(ctx context.Context, modulePath, resolvedVersion string, contentDir fs.FS, d *licenses.Detector, sourceInfo *source.Info) (_ []*goPackage, _ []*internal.PackageVersionState, err error) {
+func extractPackageMetas(ctx context.Context, modulePath, resolvedVersion string, contentDir fs.FS) (_ []*packageMeta, _ *godoc.ModuleInfo, _ []*internal.PackageVersionState, err error) {
 	defer derrors.Wrap(&err, "extractPackages(ctx, %q, %q, r, d)", modulePath, resolvedVersion)
 	ctx, span := trace.StartSpan(ctx, "fetch.extractPackages")
 	defer span.End()
@@ -167,10 +267,10 @@
 		return nil
 	})
 	if errors.Is(err, fs.ErrNotExist) {
-		return nil, nil, fmt.Errorf("no files: %w", ErrModuleContainsNoPackages)
+		return nil, nil, nil, fmt.Errorf("no files: %w", ErrModuleContainsNoPackages)
 	}
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
 
 	for pkgName := range dirs {
@@ -181,12 +281,13 @@
 	// If we got this far, the file metadata was okay.
 	// Start reading the file contents now to extract information
 	// about Go packages.
-	var pkgs []*goPackage
-	var mu sync.Mutex // gards pkgs, incompleteDirs, packageVersionStates
+	var pkgs []*packageMeta
+	var mu sync.Mutex // guards pkgs, incompleteDirs, packageVersionStates
 	var errgroup errgroup.Group
 	for innerPath, goFiles := range dirs {
 		innerPath, goFiles := innerPath, goFiles
 		errgroup.Go(func() error {
+			var addedPackage bool
 			mu.Lock()
 			incomplete := incompleteDirs[innerPath]
 			mu.Unlock()
@@ -200,7 +301,7 @@
 				status error
 				errMsg string
 			)
-			pkg, err := loadPackage(ctx, contentDir, goFiles, innerPath, sourceInfo, modInfo)
+			pkg, err := loadPackageMeta(ctx, contentDir, goFiles, innerPath, modInfo)
 			if bpe := (*BadPackageError)(nil); errors.As(err, &bpe) {
 				log.Infof(ctx, "Error loading %s: %v", innerPath, err)
 				mu.Lock()
@@ -223,48 +324,36 @@
 				}
 				pkgPath = path.Join(modulePath, innerPath)
 			} else {
-				if errors.Is(pkg.err, godoc.ErrTooLarge) {
-					status = derrors.PackageDocumentationHTMLTooLarge
-					errMsg = pkg.err.Error()
-				} else if pkg.err != nil {
-					// ErrTooLarge is the only valid value of pkg.err.
-					return fmt.Errorf("bad package error for %s: %v", pkg.path, pkg.err)
-				}
-				if d != nil { //  should only be nil for tests
-					isRedist, lics := d.PackageInfo(innerPath)
-					pkg.isRedistributable = isRedist
-					for _, l := range lics {
-						pkg.licenseMeta = append(pkg.licenseMeta, l.Metadata)
-					}
-				}
-
 				mu.Lock()
 				pkgs = append(pkgs, pkg)
 				mu.Unlock()
+				addedPackage = true
 				pkgPath = pkg.path
 			}
-			mu.Lock()
-			packageVersionStates = append(packageVersionStates, &internal.PackageVersionState{
-				ModulePath:  modulePath,
-				PackagePath: pkgPath,
-				Version:     resolvedVersion,
-				Status:      derrors.ToStatus(status),
-				Error:       errMsg,
-			})
-			mu.Unlock()
+			if !addedPackage {
+				mu.Lock()
+				packageVersionStates = append(packageVersionStates, &internal.PackageVersionState{
+					ModulePath:  modulePath,
+					PackagePath: pkgPath,
+					Version:     resolvedVersion,
+					Status:      derrors.ToStatus(status),
+					Error:       errMsg,
+				})
+				mu.Unlock()
+			}
 
 			return nil
 		})
 	}
 
 	if err := errgroup.Wait(); err != nil {
-		return nil, nil, err
+		return nil, nil, nil, err
 	}
 
 	if len(pkgs) == 0 {
-		return nil, packageVersionStates, ErrModuleContainsNoPackages
+		return nil, nil, packageVersionStates, ErrModuleContainsNoPackages
 	}
-	return pkgs, packageVersionStates, nil
+	return pkgs, modInfo, packageVersionStates, nil
 }
 
 // ignoredByGoTool reports whether the given import path corresponds
diff --git a/internal/fetch/readme.go b/internal/fetch/readme.go
index 1823e13..e38bdb3 100644
--- a/internal/fetch/readme.go
+++ b/internal/fetch/readme.go
@@ -6,7 +6,6 @@
 package fetch
 
 import (
-	"errors"
 	"fmt"
 	"io/fs"
 	"os"
@@ -17,48 +16,6 @@
 	"golang.org/x/pkgsite/internal/derrors"
 )
 
-// extractReadmes returns the file path and contents of all files from r
-// that are README files.
-func extractReadmes(modulePath, resolvedVersion string, contentDir fs.FS) (_ []*internal.Readme, err error) {
-	defer derrors.Wrap(&err, "extractReadmes(ctx, %q, %q, r)", modulePath, resolvedVersion)
-
-	var readmes []*internal.Readme
-	err = fs.WalkDir(contentDir, ".", func(pathname string, d fs.DirEntry, err error) error {
-		if err != nil {
-			return err
-		}
-
-		if !d.IsDir() {
-			return nil
-		}
-
-		readme, err := extractReadme(modulePath, path.Join(modulePath, pathname), resolvedVersion, contentDir)
-		if err != nil {
-			return err
-		}
-		if readme == nil {
-			// no readme for the directory
-			return nil
-		}
-		readmes = append(readmes, readme)
-		return nil
-	})
-	if err != nil && !errors.Is(err, fs.ErrNotExist) { // we can get NotExist on an empty FS {
-		return nil, err
-	}
-	return readmes, nil
-}
-
-// rel returns the relative path from the modulePath to the pkgPath
-// returning "." if they're the same.
-func rel(pkgPath, modulePath string) string {
-	suff := internal.Suffix(pkgPath, modulePath)
-	if suff == "" {
-		return "."
-	}
-	return suff
-}
-
 // extractReadme returns the file path and contents the unit's README,
 // if there is one. dir is the directory path prefixed with the modulePath.
 func extractReadme(modulePath, dir, resolvedVersion string, contentDir fs.FS) (_ *internal.Readme, err error) {
diff --git a/internal/fetch/readme_test.go b/internal/fetch/readme_test.go
index a4c4400..e037cda 100644
--- a/internal/fetch/readme_test.go
+++ b/internal/fetch/readme_test.go
@@ -8,7 +8,6 @@
 	"context"
 	"errors"
 	"io/fs"
-	"sort"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
@@ -19,70 +18,63 @@
 	"golang.org/x/pkgsite/internal/testenv"
 )
 
-func TestExtractReadmes(t *testing.T) {
+func TestExtractReadme(t *testing.T) {
 	testenv.MustHaveExecPath(t, "git")
 	defer stdlib.WithTestData()()
 
 	ctx := context.Background()
 
-	sortReadmes := func(readmes []*internal.Readme) {
-		sort.Slice(readmes, func(i, j int) bool {
-			return readmes[i].Filepath < readmes[j].Filepath
-		})
-	}
-
 	for _, test := range []struct {
-		name, modulePath, version string
-		files                     map[string]string
-		want                      []*internal.Readme
+		name, modulePath, pkgPath, version string
+		files                              map[string]string
+		want                               *internal.Readme
 	}{
 		{
 			name:       "README at root and README in unit and README in _",
 			modulePath: stdlib.ModulePath,
+			pkgPath:    "cmd/pprof",
 			version:    "v1.12.5",
-			want: []*internal.Readme{
-				{
-					Filepath: "cmd/pprof/README",
-					Contents: "This directory is the copy of Google's pprof shipped as part of the Go distribution.\n",
-				},
+			want: &internal.Readme{
+				Filepath: "cmd/pprof/README",
+				Contents: "This directory is the copy of Google's pprof shipped as part of the Go distribution.\n",
 			},
 		},
 		{
 			name:       "directory start with _",
 			modulePath: "github.com/my/module",
+			pkgPath:    "github.com/my/module/_foo",
 			version:    "v1.0.0",
 			files: map[string]string{
 				"_foo/README.md": "README",
 			},
+			want: nil,
 		},
 		{
 			name:       "prefer README.md",
 			modulePath: "github.com/my/module",
+			pkgPath:    "github.com/my/module/foo",
 			version:    "v1.0.0",
 			files: map[string]string{
 				"foo/README":    "README",
 				"foo/README.md": "README",
 			},
-			want: []*internal.Readme{
-				{
-					Filepath: "foo/README.md",
-					Contents: "README",
-				},
+			want: &internal.Readme{
+				Filepath: "foo/README.md",
+				Contents: "README",
 			},
 		},
 		{
 			name:       "prefer readme.markdown",
 			modulePath: "github.com/my/module",
+			pkgPath:    "github.com/my/module/foo",
 			version:    "v1.0.0",
 			files: map[string]string{
 				"foo/README.markdown": "README",
 				"foo/readme.rst":      "README",
 			},
-			want: []*internal.Readme{
-				{
-					Filepath: "foo/README.markdown",
-					Contents: "README",
-				},
+			want: &internal.Readme{
+				Filepath: "foo/README.markdown",
+				Contents: "README",
 			},
 		},
 		{
@@ -98,6 +90,7 @@
 			files: map[string]string{
 				"foo/README/bar": "README",
 			},
+			want: nil,
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
@@ -123,13 +116,11 @@
 					t.Fatal(err)
 				}
 			}
-			got, err := extractReadmes(test.modulePath, test.version, contentDir)
+			got, err := extractReadme(test.modulePath, test.pkgPath, test.version, contentDir)
 			if err != nil {
 				t.Fatal(err)
 			}
 
-			sortReadmes(test.want)
-			sortReadmes(got)
 			if diff := cmp.Diff(test.want, got); diff != "" {
 				t.Errorf("mismatch (-want +got):\n%s", diff)
 			}
@@ -142,6 +133,7 @@
 
 	var (
 		modulePath = "github.com/my/module"
+		pkgPath    = "github.com/my/module/foo"
 		version    = "v1.0.0"
 		files      = map[string]string{
 			"foo/README.md": string(make([]byte, MaxFileSize+100)),
@@ -162,7 +154,7 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	got, err := extractReadmes(modulePath, version, contentDir)
+	got, err := extractReadme(modulePath, pkgPath, version, contentDir)
 	if err == nil {
 		t.Fatalf("want error, got %v", cmp.Diff([]*internal.Readme{}, got))
 	}
diff --git a/internal/fetch/unit.go b/internal/fetch/unit.go
index 9963cda..e0c9c6f 100644
--- a/internal/fetch/unit.go
+++ b/internal/fetch/unit.go
@@ -13,71 +13,70 @@
 	"golang.org/x/pkgsite/internal/stdlib"
 )
 
-// moduleUnits returns all of the units in a given module, along
-// with the contents for those units.
-func moduleUnits(modulePath string, minfo internal.ModuleInfo,
-	pkgs []*goPackage,
-	readmes []*internal.Readme,
-	d *licenses.Detector) []*internal.Unit {
-	pkgLookup := map[string]*goPackage{}
+// moduleUnit returns the requested unit in a given module, along
+// with the contents for the unit.
+func moduleUnit(modulePath string, unitMeta *internal.UnitMeta,
+	pkg *goPackage,
+	readme *internal.Readme,
+	d *licenses.Detector) *internal.Unit {
+
+	suffix := internal.Suffix(unitMeta.Path, modulePath)
+	if modulePath == stdlib.ModulePath {
+		suffix = unitMeta.Path
+	}
+	isRedist, lics := d.PackageInfo(suffix)
+	var meta []*licenses.Metadata
+	for _, l := range lics {
+		meta = append(meta, l.Metadata)
+	}
+	unit := &internal.Unit{
+		UnitMeta:          *unitMeta,
+		Licenses:          meta,
+		IsRedistributable: isRedist,
+	}
+	if readme != nil {
+		unit.Readme = readme
+	}
+	if pkg != nil {
+		unit.Name = pkg.name
+		unit.Imports = pkg.imports
+		unit.Documentation = pkg.docs
+		var bcs []internal.BuildContext
+		for _, d := range unit.Documentation {
+			bcs = append(bcs, internal.BuildContext{GOOS: d.GOOS, GOARCH: d.GOARCH})
+		}
+		sort.Slice(bcs, func(i, j int) bool {
+			return internal.CompareBuildContexts(bcs[i], bcs[j]) < 0
+		})
+		unit.BuildContexts = bcs
+	}
+	return unit
+}
+
+// moduleUnitMetas returns UnitMetas for all the units in a given module.
+func moduleUnitMetas(minfo internal.ModuleInfo, pkgs []*packageMeta) []*internal.UnitMeta {
+	pkgLookup := map[string]*packageMeta{}
 	for _, pkg := range pkgs {
 		pkgLookup[pkg.path] = pkg
 	}
-	dirPaths := unitPaths(modulePath, pkgs)
+	dirPaths := unitPaths(minfo.ModulePath, pkgs)
 
-	readmeLookup := map[string]*internal.Readme{}
-	for _, readme := range readmes {
-		if path.Dir(readme.Filepath) == "." {
-			readmeLookup[modulePath] = readme
-		} else if modulePath == stdlib.ModulePath {
-			readmeLookup[path.Dir(readme.Filepath)] = readme
-		} else {
-			readmeLookup[path.Join(modulePath, path.Dir(readme.Filepath))] = readme
-		}
-	}
-
-	var units []*internal.Unit
+	var ums []*internal.UnitMeta
 	for _, dirPath := range dirPaths {
-		suffix := internal.Suffix(dirPath, modulePath)
-		if modulePath == stdlib.ModulePath {
-			suffix = dirPath
-		}
-		isRedist, lics := d.PackageInfo(suffix)
-		var meta []*licenses.Metadata
-		for _, l := range lics {
-			meta = append(meta, l.Metadata)
-		}
-		dir := &internal.Unit{
-			UnitMeta: internal.UnitMeta{
-				ModuleInfo: minfo,
-				Path:       dirPath,
-			},
-			Licenses:          meta,
-			IsRedistributable: isRedist,
-		}
-		if r, ok := readmeLookup[dirPath]; ok {
-			dir.Readme = r
+		um := &internal.UnitMeta{
+			ModuleInfo: minfo,
+			Path:       dirPath,
 		}
 		if pkg, ok := pkgLookup[dirPath]; ok {
-			dir.Name = pkg.name
-			dir.Imports = pkg.imports
-			dir.Documentation = pkg.docs
-			var bcs []internal.BuildContext
-			for _, d := range dir.Documentation {
-				bcs = append(bcs, internal.BuildContext{GOOS: d.GOOS, GOARCH: d.GOARCH})
-			}
-			sort.Slice(bcs, func(i, j int) bool {
-				return internal.CompareBuildContexts(bcs[i], bcs[j]) < 0
-			})
-			dir.BuildContexts = bcs
+			um.Name = pkg.name
 		}
-		units = append(units, dir)
+		ums = append(ums, um)
 	}
-	return units
+	return ums
 }
 
 // unitPaths returns the paths for all the units in a module.
-func unitPaths(modulePath string, packages []*goPackage) []string {
+func unitPaths(modulePath string, packageMetas []*packageMeta) []string {
 	shouldContinue := func(p string) bool {
 		if modulePath == stdlib.ModulePath {
 			return p != "."
@@ -86,7 +85,7 @@
 	}
 
 	pathSet := map[string]bool{modulePath: true}
-	for _, p := range packages {
+	for _, p := range packageMetas {
 		for p := p.path; shouldContinue(p); p = path.Dir(p) {
 			pathSet[p] = true
 		}
diff --git a/internal/fetch/unit_test.go b/internal/fetch/unit_test.go
index 359ac2f..a4a1130 100644
--- a/internal/fetch/unit_test.go
+++ b/internal/fetch/unit_test.go
@@ -10,9 +10,7 @@
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
-	"golang.org/x/pkgsite/internal"
 	"golang.org/x/pkgsite/internal/stdlib"
-	"golang.org/x/pkgsite/internal/testing/sample"
 )
 
 func TestDirectoryPaths(t *testing.T) {
@@ -58,9 +56,9 @@
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
-			var packages []*goPackage
+			var packages []*packageMeta
 			for _, suffix := range test.packageSuffixes {
-				packages = append(packages, samplePackage(test.modulePath, suffix))
+				packages = append(packages, samplePackageMeta(test.modulePath, suffix))
 			}
 			got := unitPaths(test.modulePath, packages)
 			sort.Strings(got)
@@ -79,20 +77,11 @@
 // is the concatenation of modulePath and suffix.
 //
 // The package name is last component of the package path.
-func samplePackage(modulePath, suffix string) *goPackage {
+func samplePackageMeta(modulePath, suffix string) *packageMeta {
 	p := constructFullPath(modulePath, suffix)
-	return &goPackage{
-		name:              path.Base(p),
-		path:              p,
-		v1path:            internal.V1Path(p, modulePath),
-		isRedistributable: true,
-		licenseMeta:       sample.LicenseMetadata(),
-		imports:           sample.Imports(),
-		docs: []*internal.Documentation{{
-			GOOS:     sample.GOOS,
-			GOARCH:   sample.GOARCH,
-			Synopsis: sample.Doc.Synopsis,
-		}},
+	return &packageMeta{
+		name: path.Base(p),
+		path: p,
 	}
 }
 
diff --git a/internal/fetchdatasource/fetchdatasource.go b/internal/fetchdatasource/fetchdatasource.go
index f8ae0d2..44be189 100644
--- a/internal/fetchdatasource/fetchdatasource.go
+++ b/internal/fetchdatasource/fetchdatasource.go
@@ -60,14 +60,14 @@
 // cacheEntry holds a fetched module or an error, if the fetch failed.
 type cacheEntry struct {
 	g      fetch.ModuleGetter
-	module *internal.Module
+	module *fetch.LazyModule
 	err    error
 }
 
 const maxCachedModules = 100
 
 // cacheGet returns information from the cache if it is present, and (nil, nil) otherwise.
-func (ds *FetchDataSource) cacheGet(path, version string) (fetch.ModuleGetter, *internal.Module, error) {
+func (ds *FetchDataSource) cacheGet(path, version string) (fetch.ModuleGetter, *fetch.LazyModule, error) {
 	// Look for an exact match first, then use LocalVersion, as for a
 	// directory-based or GOPATH-mode module.
 	for _, v := range []string{version, fetch.LocalVersion} {
@@ -79,13 +79,13 @@
 }
 
 // cachePut puts information into the cache.
-func (ds *FetchDataSource) cachePut(g fetch.ModuleGetter, path, version string, m *internal.Module, err error) {
+func (ds *FetchDataSource) cachePut(g fetch.ModuleGetter, path, version string, m *fetch.LazyModule, err error) {
 	ds.cache.Put(internal.Modver{Path: path, Version: version}, cacheEntry{g, m, err})
 }
 
 // getModule gets the module at the given path and version. It first checks the
 // cache, and if it isn't there it then tries to fetch it.
-func (ds *FetchDataSource) getModule(ctx context.Context, modulePath, vers string) (_ *internal.Module, err error) {
+func (ds *FetchDataSource) getModule(ctx context.Context, modulePath, vers string) (_ *fetch.LazyModule, err error) {
 	defer derrors.Wrap(&err, "FetchDataSource.getModule(%q, %q)", modulePath, vers)
 
 	g, mod, err := ds.cacheGet(modulePath, vers)
@@ -121,13 +121,6 @@
 			lmv.PopulateModuleInfo(&m.ModuleInfo)
 		}
 	}
-	// Populate unit subdirectories. When we use a database, this only happens when we read
-	// a unit from the DB.
-	if m != nil {
-		for _, u := range m.Units {
-			ds.populateUnitSubdirectories(u, m)
-		}
-	}
 
 	// Cache both successes and failures, but not cancellations.
 	if !errors.Is(err, context.Canceled) {
@@ -143,47 +136,35 @@
 
 // fetch fetches a module using the configured ModuleGetters.
 // It tries each getter in turn until it finds one that has the module.
-func (ds *FetchDataSource) fetch(ctx context.Context, modulePath, version string) (_ *internal.Module, g fetch.ModuleGetter, err error) {
+func (ds *FetchDataSource) fetch(ctx context.Context, modulePath, version string) (_ *fetch.LazyModule, g fetch.ModuleGetter, err error) {
 	log.Infof(ctx, "FetchDataSource: fetching %s@%s", modulePath, version)
 	start := time.Now()
 	defer func() {
 		log.Infof(ctx, "FetchDataSource: fetched %s@%s using %T in %s with error %v", modulePath, version, g, time.Since(start), err)
 	}()
 	for _, g := range ds.opts.Getters {
-		fr := fetch.FetchModule(ctx, modulePath, version, g)
-		if fr.Error == nil {
-			m := fr.Module
+		m := fetch.FetchLazyModule(ctx, modulePath, version, g)
+		if m.Error == nil {
 			if ds.opts.BypassLicenseCheck {
 				m.IsRedistributable = true
-				for _, unit := range m.Units {
-					unit.IsRedistributable = true
-				}
-			} else {
-				m.RemoveNonRedistributableData()
 			}
 			return m, g, nil
 		}
-		if !errors.Is(fr.Error, derrors.NotFound) {
-			return nil, g, fr.Error
+		if !errors.Is(m.Error, derrors.NotFound) {
+			return nil, g, m.Error
 		}
 	}
 	return nil, nil, fmt.Errorf("%s@%s: %w", modulePath, version, derrors.NotFound)
 }
 
-func (ds *FetchDataSource) populateUnitSubdirectories(u *internal.Unit, m *internal.Module) {
+func (ds *FetchDataSource) populateUnitSubdirectories(u *internal.Unit, m *fetch.LazyModule) {
 	p := u.Path + "/"
-	for _, u2 := range m.Units {
+	for _, u2 := range m.UnitMetas {
 		if strings.HasPrefix(u2.Path, p) || u.Path == "std" {
-			var syn string
-			if len(u2.Documentation) > 0 {
-				syn = u2.Documentation[0].Synopsis
-			}
 			u.Subdirectories = append(u.Subdirectories, &internal.PackageMeta{
-				Path:              u2.Path,
-				Name:              u2.Name,
-				Synopsis:          syn,
-				IsRedistributable: u2.IsRedistributable,
-				Licenses:          u2.Licenses,
+				Path: u2.Path,
+				Name: u2.Name,
+				// Syn, IsRedistributable, and Licences are not populated from FetchDataSource.
 			})
 		}
 	}
@@ -191,7 +172,7 @@
 
 // findModule finds the module with longest module path containing the given
 // package path. It returns an error if no module is found.
-func (ds *FetchDataSource) findModule(ctx context.Context, pkgPath, modulePath, version string) (_ *internal.Module, err error) {
+func (ds *FetchDataSource) findModule(ctx context.Context, pkgPath, modulePath, version string) (_ *fetch.LazyModule, err error) {
 	defer derrors.Wrap(&err, "FetchDataSource.findModule(%q, %q, %q)", pkgPath, modulePath, version)
 
 	if modulePath != internal.UnknownModulePath {
@@ -218,16 +199,7 @@
 	if err != nil {
 		return nil, err
 	}
-	um := &internal.UnitMeta{
-		Path:       path,
-		ModuleInfo: module.ModuleInfo,
-	}
-	u := findUnit(module, path)
-	if u == nil {
-		return nil, derrors.NotFound
-	}
-	um.Name = u.Name
-	return um, nil
+	return findUnitMeta(module, path)
 }
 
 // GetUnit returns information about a unit. Both the module path and package
@@ -239,7 +211,7 @@
 	if err != nil {
 		return nil, err
 	}
-	u := findUnit(m, um.Path)
+	u, err := ds.findUnit(ctx, m, um.Path)
 	if u == nil {
 		return nil, fmt.Errorf("import path %s not found in module %s: %w", um.Path, um.ModulePath, derrors.NotFound)
 	}
@@ -256,13 +228,27 @@
 }
 
 // findUnit returns the unit with the given path in m, or nil if none.
-func findUnit(m *internal.Module, path string) *internal.Unit {
-	for _, u := range m.Units {
-		if u.Path == path {
-			return u
+func (ds *FetchDataSource) findUnit(ctx context.Context, m *fetch.LazyModule, path string) (*internal.Unit, error) {
+	unit, err := m.Unit(ctx, path)
+	ds.populateUnitSubdirectories(unit, m)
+	if err != nil {
+		return nil, err
+	}
+	if ds.opts.BypassLicenseCheck {
+		unit.IsRedistributable = true
+	} else {
+		unit.RemoveNonRedistributableData()
+	}
+	return unit, nil
+}
+
+func findUnitMeta(m *fetch.LazyModule, path string) (*internal.UnitMeta, error) {
+	for _, um := range m.UnitMetas {
+		if um.Path == path {
+			return um, nil
 		}
 	}
-	return nil
+	return nil, derrors.NotFound
 }
 
 // matchingDoc returns the Documentation that matches the given build context
diff --git a/internal/fetchdatasource/fetchdatasource_test.go b/internal/fetchdatasource/fetchdatasource_test.go
index 53bca65..05cb121 100644
--- a/internal/fetchdatasource/fetchdatasource_test.go
+++ b/internal/fetchdatasource/fetchdatasource_test.go
@@ -560,13 +560,13 @@
 
 func TestCache(t *testing.T) {
 	ds := Options{}.New()
-	m1 := &internal.Module{}
+	m1 := &fetch.LazyModule{}
 	ds.cachePut(nil, "m1", fetch.LocalVersion, m1, nil)
 	ds.cachePut(nil, "m2", "v1.0.0", nil, derrors.NotFound)
 
 	for _, test := range []struct {
 		path, version string
-		wantm         *internal.Module
+		wantm         *fetch.LazyModule
 		wante         error
 	}{
 		{"m1", fetch.LocalVersion, m1, nil},
diff --git a/internal/worker/fetch.go b/internal/worker/fetch.go
index 28bfcc8..2c7e048 100644
--- a/internal/worker/fetch.go
+++ b/internal/worker/fetch.go
@@ -90,7 +90,9 @@
 // fetchTask represents the result of a fetch task that was processed.
 type fetchTask struct {
 	fetch.FetchResult
-	timings map[string]time.Duration
+	MainVersion   string
+	MasterVersion string
+	timings       map[string]time.Duration
 }
 
 // A Fetcher holds state for fetching modules.