cmd/go: add per-package indexing for modules outside mod cache

Packages outside the module cache including the standard library will be
indexed individually rather than as a whole module.

For #52876

Change-Id: I142dad6a790e9e8eb4dc6430a588fbfa86552e49
Reviewed-on: https://go-review.googlesource.com/c/go/+/413815
Reviewed-by: Michael Matloob <matloob@golang.org>
Run-TryBot: Michael Matloob <matloob@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/src/cmd/go/internal/load/pkg.go b/src/cmd/go/internal/load/pkg.go
index 1a7b9d2..95a06a3 100644
--- a/src/cmd/go/internal/load/pkg.go
+++ b/src/cmd/go/internal/load/pkg.go
@@ -878,8 +878,8 @@
 				buildMode = build.ImportComment
 			}
 			if modroot := modload.PackageModRoot(ctx, r.path); modroot != "" {
-				if mi, err := modindex.Get(modroot); err == nil {
-					data.p, data.err = mi.Import(cfg.BuildContext, mi.RelPath(r.dir), buildMode)
+				if rp, err := modindex.GetPackage(modroot, r.dir); err == nil {
+					data.p, data.err = rp.Import(cfg.BuildContext, buildMode)
 					goto Happy
 				} else if !errors.Is(err, modindex.ErrNotIndexed) {
 					base.Fatalf("go: %v", err)
diff --git a/src/cmd/go/internal/modindex/index_format.txt b/src/cmd/go/internal/modindex/index_format.txt
index 3768eea..c74b1d4 100644
--- a/src/cmd/go/internal/modindex/index_format.txt
+++ b/src/cmd/go/internal/modindex/index_format.txt
@@ -7,6 +7,8 @@
 is null-terminated. String offsets are relative to the start of the string table.
 Bools are written as uint32s: 0 for false and 1 for true.
 
+The following is the format for a full module:
+
 “go index v0\n”
 str uint32 - offset of string table
 n uint32 - number of packages
@@ -40,7 +42,16 @@
 			position - file, offset, line, column - uint32
 [string table]
 
-// parseError struct
+The following is the format for a single indexed package:
+
+“go index v0\n”
+str uint32 - offset of string table
+for the single RawPackage:
+    [same RawPackage format as above]
+[string table]
+
+The following is the definition of the json-serialized parseError struct:
+
 type parseError struct {
 	ErrorList *scanner.ErrorList // non-nil if the error was an ErrorList, nil otherwise
 	ErrorString string // non-empty for all other cases
diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go
index ea1ebb0..65a1ecf 100644
--- a/src/cmd/go/internal/modindex/read.go
+++ b/src/cmd/go/internal/modindex/read.go
@@ -23,11 +23,13 @@
 	"sort"
 	"strings"
 	"sync"
+	"time"
 	"unsafe"
 
 	"cmd/go/internal/base"
 	"cmd/go/internal/cache"
 	"cmd/go/internal/cfg"
+	"cmd/go/internal/fsys"
 	"cmd/go/internal/imports"
 	"cmd/go/internal/par"
 	"cmd/go/internal/str"
@@ -39,20 +41,16 @@
 // module index.
 var enabled bool = godebug.Get("goindex") != "0"
 
-// ModuleIndex represents and encoded module index file. It is used to
+// Module represents and encoded module index file. It is used to
 // do the equivalent of build.Import of packages in the module and answer other
 // questions based on the index file's data.
-type ModuleIndex struct {
+type Module struct {
 	modroot      string
 	od           offsetDecoder
 	packages     map[string]int // offsets of each package
 	packagePaths []string       // paths to package directories relative to modroot; these are the keys of packages
 }
 
-var fcache par.Cache
-
-var salt = godebug.Get("goindexsalt")
-
 // moduleHash returns an ActionID corresponding to the state of the module
 // located at filesystem path modroot.
 func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) {
@@ -75,7 +73,45 @@
 	}
 
 	h := cache.NewHash("moduleIndex")
-	fmt.Fprintf(h, "module index %s %s %s %v\n", runtime.Version(), salt, indexVersion, modroot)
+	fmt.Fprintf(h, "module index %s %s %v\n", runtime.Version(), indexVersion, modroot)
+	return h.Sum(), nil
+}
+
+const modTimeCutoff = 2 * time.Second
+
+// dirHash returns an ActionID corresponding to the state of the package
+// located at filesystem path pkgdir.
+func dirHash(pkgdir string) (cache.ActionID, error) {
+	h := cache.NewHash("moduleIndex")
+	fmt.Fprintf(h, "package %s %s %v\n", runtime.Version(), indexVersion, pkgdir)
+	entries, err := fsys.ReadDir(pkgdir)
+	if err != nil {
+		// pkgdir might not be a directory. give up on hashing.
+		return cache.ActionID{}, ErrNotIndexed
+	}
+	cutoff := time.Now().Add(-modTimeCutoff)
+	for _, info := range entries {
+		if info.IsDir() {
+			continue
+		}
+
+		if !info.Mode().IsRegular() {
+			return cache.ActionID{}, ErrNotIndexed
+		}
+		// To avoid problems for very recent files where a new
+		// write might not change the mtime due to file system
+		// mtime precision, reject caching if a file was read that
+		// is less than modTimeCutoff old.
+		//
+		// This is the same strategy used for hashing test inputs.
+		// See hashOpen in cmd/go/internal/test/test.go for the
+		// corresponding code.
+		if info.ModTime().After(cutoff) {
+			return cache.ActionID{}, ErrNotIndexed
+		}
+
+		fmt.Fprintf(h, "file %v %v %v\n", info.Name(), info.ModTime(), info.Size())
+	}
 	return h.Sum(), nil
 }
 
@@ -83,31 +119,61 @@
 
 var ErrNotIndexed = errors.New("not in module index")
 
-// Get returns the ModuleIndex for the module rooted at modroot.
+var (
+	errDisabled           = fmt.Errorf("%w: module indexing disabled", ErrNotIndexed)
+	errNotFromModuleCache = fmt.Errorf("%w: not from module cache", ErrNotIndexed)
+)
+
+// GetPackage returns the IndexPackage for the package at the given path.
 // It will return ErrNotIndexed if the directory should be read without
 // using the index, for instance because the index is disabled, or the packgae
 // is not in a module.
-func Get(modroot string) (*ModuleIndex, error) {
-	if !enabled || cache.DefaultDir() == "off" || cfg.BuildMod == "vendor" {
-		return nil, ErrNotIndexed
+func GetPackage(modroot, pkgdir string) (*IndexPackage, error) {
+	mi, err := GetModule(modroot)
+	if err == nil {
+		return mi.Package(relPath(pkgdir, modroot)), nil
 	}
-	if modroot == "" {
-		panic("modindex.Get called with empty modroot")
+	if !errors.Is(err, errNotFromModuleCache) {
+		return nil, err
 	}
-	modroot = filepath.Clean(modroot)
-	isModCache := str.HasFilePathPrefix(modroot, cfg.GOMODCACHE)
-	return openIndex(modroot, isModCache)
+	return openIndexPackage(modroot, pkgdir)
 }
 
-// openIndex returns the module index for modPath.
+// GetModule returns the Module for the given modroot.
+// It will return ErrNotIndexed if the directory should be read without
+// using the index, for instance because the index is disabled, or the packgae
+// is not in a module.
+func GetModule(modroot string) (*Module, error) {
+	if !enabled || cache.DefaultDir() == "off" {
+		return nil, errDisabled
+	}
+	if modroot == "" {
+		panic("modindex.GetPackage called with empty modroot")
+	}
+	if cfg.BuildMod == "vendor" {
+		// Even if the main module is in the module cache,
+		// its vendored dependencies are not loaded from their
+		// usual cached locations.
+		return nil, errNotFromModuleCache
+	}
+	modroot = filepath.Clean(modroot)
+	if !str.HasFilePathPrefix(modroot, cfg.GOMODCACHE) {
+		return nil, errNotFromModuleCache
+	}
+	return openIndexModule(modroot, true)
+}
+
+var mcache par.Cache
+
+// openIndexModule returns the module index for modPath.
 // It will return ErrNotIndexed if the module can not be read
 // using the index because it contains symlinks.
-func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) {
+func openIndexModule(modroot string, ismodcache bool) (*Module, error) {
 	type result struct {
-		mi  *ModuleIndex
+		mi  *Module
 		err error
 	}
-	r := fcache.Do(modroot, func() any {
+	r := mcache.Do(modroot, func() any {
 		id, err := moduleHash(modroot, ismodcache)
 		if err != nil {
 			return result{nil, err}
@@ -133,8 +199,38 @@
 	return r.mi, r.err
 }
 
-// fromBytes returns a *ModuleIndex given the encoded representation.
-func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) {
+var pcache par.Cache
+
+func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
+	type result struct {
+		pkg *IndexPackage
+		err error
+	}
+	r := pcache.Do(pkgdir, func() any {
+		id, err := dirHash(pkgdir)
+		if err != nil {
+			return result{nil, err}
+		}
+		data, _, err := cache.Default().GetMmap(id)
+		if err != nil {
+			// Couldn't read from index. Assume we couldn't read from
+			// the index because the package hasn't been indexed yet.
+			data = indexPackage(modroot, pkgdir)
+			if err = cache.Default().PutBytes(id, data); err != nil {
+				return result{nil, err}
+			}
+		}
+		pkg, err := packageFromBytes(modroot, data)
+		if err != nil {
+			return result{nil, err}
+		}
+		return result{pkg, nil}
+	}).(result)
+	return r.pkg, r.err
+}
+
+// fromBytes returns a *Module given the encoded representation.
+func fromBytes(moddir string, data []byte) (mi *Module, err error) {
 	if !enabled {
 		panic("use of index")
 	}
@@ -184,7 +280,7 @@
 		packages[packagePaths[i]] = packageOffsets[i]
 	}
 
-	return &ModuleIndex{
+	return &Module{
 		moddir,
 		offsetDecoder{data, st},
 		packages,
@@ -192,21 +288,60 @@
 	}, nil
 }
 
+// packageFromBytes returns a *IndexPackage given the encoded representation.
+func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) {
+	if !enabled {
+		panic("use of package index when not enabled")
+	}
+
+	// SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
+	// that all its errors satisfy this interface, we'll only check for these errors so that
+	// we don't suppress panics that could have been produced from other sources.
+	type addrer interface {
+		Addr() uintptr
+	}
+
+	// set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
+	// in case it's mmapped (the common case).
+	old := debug.SetPanicOnFault(true)
+	defer func() {
+		debug.SetPanicOnFault(old)
+		if e := recover(); e != nil {
+			if _, ok := e.(addrer); ok {
+				// This panic was almost certainly caused by SetPanicOnFault.
+				err = fmt.Errorf("error reading module index: %v", e)
+				return
+			}
+			// The panic was likely not caused by SetPanicOnFault.
+			panic(e)
+		}
+	}()
+
+	gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
+	if string(gotVersion) != indexVersion {
+		return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+	}
+	stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
+	st := newStringTable(data[stringTableOffset:])
+	d := &decoder{unread, st}
+	p = decodePackage(d, offsetDecoder{data, st})
+	p.modroot = modroot
+	return p, nil
+}
+
 // Returns a list of directory paths, relative to the modroot, for
 // packages contained in the module index.
-func (mi *ModuleIndex) Packages() []string {
+func (mi *Module) Packages() []string {
 	return mi.packagePaths
 }
 
-// RelPath returns the path relative to the module's root.
-func (mi *ModuleIndex) RelPath(path string) string {
-	return str.TrimFilePathPrefix(filepath.Clean(path), mi.modroot) // mi.modroot is already clean
+// relPath returns the path relative to the module's root.
+func relPath(path, modroot string) string {
+	return str.TrimFilePathPrefix(filepath.Clean(path), filepath.Clean(modroot))
 }
 
-// ImportPackage is the equivalent of build.Import given the information in ModuleIndex.
-func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.ImportMode) (p *build.Package, err error) {
-	rp := mi.indexPackage(relpath)
-
+// Import is the equivalent of build.Import given the information in Module.
+func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) {
 	defer func() {
 		if e := recover(); e != nil {
 			err = fmt.Errorf("error reading module index: %v", e)
@@ -218,7 +353,7 @@
 	p = &build.Package{}
 
 	p.ImportPath = "."
-	p.Dir = filepath.Join(mi.modroot, rp.dir)
+	p.Dir = filepath.Join(rp.modroot, rp.dir)
 
 	var pkgerr error
 	switch ctxt.Compiler {
@@ -236,7 +371,7 @@
 	inTestdata := func(sub string) bool {
 		return strings.Contains(sub, "/testdata/") || strings.HasSuffix(sub, "/testdata") || str.HasPathPrefix(sub, "testdata")
 	}
-	if !inTestdata(relpath) {
+	if !inTestdata(rp.dir) {
 		// In build.go, p.Root should only be set in the non-local-import case, or in
 		// GOROOT or GOPATH. Since module mode only calls Import with path set to "."
 		// and the module index doesn't apply outside modules, the GOROOT case is
@@ -248,8 +383,8 @@
 		if ctxt.GOROOT != "" && str.HasFilePathPrefix(p.Dir, cfg.GOROOTsrc) && p.Dir != cfg.GOROOTsrc {
 			p.Root = ctxt.GOROOT
 			p.Goroot = true
-			modprefix := str.TrimFilePathPrefix(mi.modroot, cfg.GOROOTsrc)
-			p.ImportPath = relpath
+			modprefix := str.TrimFilePathPrefix(rp.modroot, cfg.GOROOTsrc)
+			p.ImportPath = rp.dir
 			if modprefix != "" {
 				p.ImportPath = filepath.Join(modprefix, p.ImportPath)
 			}
@@ -521,20 +656,21 @@
 		reldir = str.TrimFilePathPrefix(reldir, "cmd")
 		modroot = filepath.Join(modroot, "cmd")
 	}
-	mod, err := Get(modroot)
-	if err != nil {
+	if _, err := GetPackage(modroot, filepath.Join(modroot, reldir)); err == nil {
+		// Note that goroot.IsStandardPackage doesn't check that the directory
+		// actually contains any go files-- merely that it exists. GetPackage
+		// returning a nil error is enough for us to know the directory exists.
+		return true
+	} else if errors.Is(err, ErrNotIndexed) {
+		// Fall back because package isn't indexable. (Probably because
+		// a file was modified recently)
 		return goroot.IsStandardPackage(goroot_, compiler, path)
 	}
-
-	pkgs := mod.Packages()
-	i := sort.SearchStrings(pkgs, reldir)
-	return i != len(pkgs) && pkgs[i] == reldir
+	return false
 }
 
 // IsDirWithGoFiles is the equivalent of fsys.IsDirWithGoFiles using the information in the index.
-func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) {
-	rp := mi.indexPackage(relpath)
-
+func (rp *IndexPackage) IsDirWithGoFiles() (_ bool, err error) {
 	defer func() {
 		if e := recover(); e != nil {
 			err = fmt.Errorf("error reading module index: %v", e)
@@ -549,9 +685,7 @@
 }
 
 // ScanDir implements imports.ScanDir using the information in the index.
-func (mi *ModuleIndex) ScanDir(path string, tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
-	rp := mi.indexPackage(path)
-
+func (rp *IndexPackage) ScanDir(tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
 	// TODO(matloob) dir should eventually be relative to indexed directory
 	// TODO(matloob): skip reading raw package and jump straight to data we need?
 
@@ -639,20 +773,22 @@
 	return true
 }
 
-// index package holds the information needed to access information in the
-// index about a package.
-type indexPackage struct {
+// IndexPackage holds the information needed to access information in the
+// index needed to load a package in a specific directory.
+type IndexPackage struct {
 	error error
 	dir   string // directory of the package relative to the modroot
 
+	modroot string
+
 	// Source files
 	sourceFiles []*sourceFile
 }
 
 var errCannotFindPackage = errors.New("cannot find package")
 
-// indexPackage returns an indexPackage constructed using the information in the ModuleIndex.
-func (mi *ModuleIndex) indexPackage(path string) *indexPackage {
+// Package returns an IndexPackage constructed using the information in the Module.
+func (mi *Module) Package(path string) *IndexPackage {
 	defer func() {
 		if e := recover(); e != nil {
 			base.Fatalf("error reading module index: %v", e)
@@ -660,12 +796,18 @@
 	}()
 	offset, ok := mi.packages[path]
 	if !ok {
-		return &indexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
+		return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
 	}
 
 	// TODO(matloob): do we want to lock on the module index?
 	d := mi.od.decoderAt(offset)
-	rp := new(indexPackage)
+	p := decodePackage(d, mi.od)
+	p.modroot = mi.modroot
+	return p
+}
+
+func decodePackage(d *decoder, od offsetDecoder) *IndexPackage {
+	rp := new(IndexPackage)
 	if errstr := d.string(); errstr != "" {
 		rp.error = errors.New(errstr)
 	}
@@ -675,7 +817,7 @@
 	for i := uint32(0); i < numSourceFiles; i++ {
 		offset := d.uint32()
 		rp.sourceFiles[i] = &sourceFile{
-			od: mi.od.offsetDecoderAt(offset),
+			od: od.offsetDecoderAt(offset),
 		}
 	}
 	return rp
diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go
index d1f73dbb53..eb84bf8 100644
--- a/src/cmd/go/internal/modindex/scan.go
+++ b/src/cmd/go/internal/modindex/scan.go
@@ -65,7 +65,15 @@
 	if err != nil {
 		return nil, err
 	}
-	return encodeModule(packages), nil
+	return encodeModuleBytes(packages), nil
+}
+
+// indexModule indexes the package at the given directory and returns its
+// encoded representation. It returns ErrNotIndexed if the package can't
+// be indexed.
+func indexPackage(modroot, pkgdir string) []byte {
+	p := importRaw(modroot, relPath(pkgdir, modroot))
+	return encodePackageBytes(p)
 }
 
 // rawPackage holds the information from each package that's needed to
diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go
index 0c3123a..3408248 100644
--- a/src/cmd/go/internal/modindex/write.go
+++ b/src/cmd/go/internal/modindex/write.go
@@ -11,9 +11,9 @@
 
 const indexVersion = "go index v0"
 
-// encodeModule produces the encoded representation of the module index.
-// encodeModule may modify the packages slice.
-func encodeModule(packages []*rawPackage) []byte {
+// encodeModuleBytes produces the encoded representation of the module index.
+// encodeModuleBytes may modify the packages slice.
+func encodeModuleBytes(packages []*rawPackage) []byte {
 	e := newEncoder()
 	e.Bytes([]byte(indexVersion))
 	e.Bytes([]byte{'\n'})
@@ -39,6 +39,18 @@
 	return e.b
 }
 
+func encodePackageBytes(p *rawPackage) []byte {
+	e := newEncoder()
+	e.Bytes([]byte(indexVersion))
+	e.Bytes([]byte{'\n'})
+	stringTableOffsetPos := e.Pos() // fill this at the end
+	e.Uint32(0)                     // string table offset
+	encodePackage(e, p)
+	e.IntAt(e.Pos(), stringTableOffsetPos)
+	e.Bytes(e.stringTable)
+	return e.b
+}
+
 func encodePackage(e *encoder, p *rawPackage) {
 	e.String(p.error)
 	e.String(p.dir)
diff --git a/src/cmd/go/internal/modload/import.go b/src/cmd/go/internal/modload/import.go
index f7810ca5..f2c7592 100644
--- a/src/cmd/go/internal/modload/import.go
+++ b/src/cmd/go/internal/modload/import.go
@@ -657,11 +657,11 @@
 	// We don't care about build tags, not even "+build ignore".
 	// We're just looking for a plausible directory.
 	res := haveGoFilesCache.Do(dir, func() any {
-		// modindex.Get will return ErrNotIndexed for any directories which
+		// modindex.GetPackage will return ErrNotIndexed for any directories which
 		// are reached through a symlink, so that they will be handled by
 		// fsys.IsDirWithGoFiles below.
-		if mi, err := modindex.Get(mdir); err == nil {
-			isDirWithGoFiles, err := mi.IsDirWithGoFiles(mi.RelPath(dir))
+		if ip, err := modindex.GetPackage(mdir, dir); err == nil {
+			isDirWithGoFiles, err := ip.IsDirWithGoFiles()
 			return goFilesEntry{isDirWithGoFiles, err}
 		} else if !errors.Is(err, modindex.ErrNotIndexed) {
 			return goFilesEntry{err: err}
diff --git a/src/cmd/go/internal/modload/load.go b/src/cmd/go/internal/modload/load.go
index b2c3ba2..ba85dc2 100644
--- a/src/cmd/go/internal/modload/load.go
+++ b/src/cmd/go/internal/modload/load.go
@@ -2102,8 +2102,8 @@
 // may see these legacy imports. We drop them so that the module
 // search does not look for modules to try to satisfy them.
 func scanDir(modroot string, dir string, tags map[string]bool) (imports_, testImports []string, err error) {
-	if mi, mierr := modindex.Get(modroot); mierr == nil {
-		imports_, testImports, err = mi.ScanDir(mi.RelPath(dir), tags)
+	if ip, mierr := modindex.GetPackage(modroot, dir); mierr == nil {
+		imports_, testImports, err = ip.ScanDir(tags)
 		goto Happy
 	} else if !errors.Is(mierr, modindex.ErrNotIndexed) {
 		return nil, nil, mierr
diff --git a/src/cmd/go/internal/modload/search.go b/src/cmd/go/internal/modload/search.go
index d9d7711..856390a 100644
--- a/src/cmd/go/internal/modload/search.go
+++ b/src/cmd/go/internal/modload/search.go
@@ -195,7 +195,7 @@
 			}
 			modPrefix = mod.Path
 		}
-		if mi, err := modindex.Get(root); err == nil {
+		if mi, err := modindex.GetModule(root); err == nil {
 			walkFromIndex(mi, modPrefix, isMatch, treeCanMatch, tags, have, addPkg)
 			continue
 		} else if !errors.Is(err, modindex.ErrNotIndexed) {
@@ -213,9 +213,9 @@
 }
 
 // walkFromIndex matches packages in a module using the module index. modroot
-// is the module's root directory on disk, index is the ModuleIndex for the
+// is the module's root directory on disk, index is the modindex.Module for the
 // module, and importPathRoot is the module's path prefix.
-func walkFromIndex(index *modindex.ModuleIndex, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
+func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
 loopPackages:
 	for _, reldir := range index.Packages() {
 		// Avoid .foo, _foo, and testdata subdirectory trees.
@@ -252,7 +252,7 @@
 		if !have[name] {
 			have[name] = true
 			if isMatch(name) {
-				if _, _, err := index.ScanDir(reldir, tags); err != imports.ErrNoGo {
+				if _, _, err := index.Package(reldir).ScanDir(tags); err != imports.ErrNoGo {
 					addPkg(name)
 				}
 			}