cmd/go: add per-package indexing for modules outside mod cache
Packages outside the module cache including the standard library will be
indexed individually rather than as a whole module.
For #52876
Change-Id: I142dad6a790e9e8eb4dc6430a588fbfa86552e49
Reviewed-on: https://go-review.googlesource.com/c/go/+/413815
Reviewed-by: Michael Matloob <matloob@golang.org>
Run-TryBot: Michael Matloob <matloob@golang.org>
Reviewed-by: Bryan Mills <bcmills@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
diff --git a/src/cmd/go/internal/load/pkg.go b/src/cmd/go/internal/load/pkg.go
index 1a7b9d2..95a06a3 100644
--- a/src/cmd/go/internal/load/pkg.go
+++ b/src/cmd/go/internal/load/pkg.go
@@ -878,8 +878,8 @@
buildMode = build.ImportComment
}
if modroot := modload.PackageModRoot(ctx, r.path); modroot != "" {
- if mi, err := modindex.Get(modroot); err == nil {
- data.p, data.err = mi.Import(cfg.BuildContext, mi.RelPath(r.dir), buildMode)
+ if rp, err := modindex.GetPackage(modroot, r.dir); err == nil {
+ data.p, data.err = rp.Import(cfg.BuildContext, buildMode)
goto Happy
} else if !errors.Is(err, modindex.ErrNotIndexed) {
base.Fatalf("go: %v", err)
diff --git a/src/cmd/go/internal/modindex/index_format.txt b/src/cmd/go/internal/modindex/index_format.txt
index 3768eea..c74b1d4 100644
--- a/src/cmd/go/internal/modindex/index_format.txt
+++ b/src/cmd/go/internal/modindex/index_format.txt
@@ -7,6 +7,8 @@
is null-terminated. String offsets are relative to the start of the string table.
Bools are written as uint32s: 0 for false and 1 for true.
+The following is the format for a full module:
+
“go index v0\n”
str uint32 - offset of string table
n uint32 - number of packages
@@ -40,7 +42,16 @@
position - file, offset, line, column - uint32
[string table]
-// parseError struct
+The following is the format for a single indexed package:
+
+“go index v0\n”
+str uint32 - offset of string table
+for the single RawPackage:
+ [same RawPackage format as above]
+[string table]
+
+The following is the definition of the json-serialized parseError struct:
+
type parseError struct {
ErrorList *scanner.ErrorList // non-nil if the error was an ErrorList, nil otherwise
ErrorString string // non-empty for all other cases
diff --git a/src/cmd/go/internal/modindex/read.go b/src/cmd/go/internal/modindex/read.go
index ea1ebb0..65a1ecf 100644
--- a/src/cmd/go/internal/modindex/read.go
+++ b/src/cmd/go/internal/modindex/read.go
@@ -23,11 +23,13 @@
"sort"
"strings"
"sync"
+ "time"
"unsafe"
"cmd/go/internal/base"
"cmd/go/internal/cache"
"cmd/go/internal/cfg"
+ "cmd/go/internal/fsys"
"cmd/go/internal/imports"
"cmd/go/internal/par"
"cmd/go/internal/str"
@@ -39,20 +41,16 @@
// module index.
var enabled bool = godebug.Get("goindex") != "0"
-// ModuleIndex represents and encoded module index file. It is used to
+// Module represents and encoded module index file. It is used to
// do the equivalent of build.Import of packages in the module and answer other
// questions based on the index file's data.
-type ModuleIndex struct {
+type Module struct {
modroot string
od offsetDecoder
packages map[string]int // offsets of each package
packagePaths []string // paths to package directories relative to modroot; these are the keys of packages
}
-var fcache par.Cache
-
-var salt = godebug.Get("goindexsalt")
-
// moduleHash returns an ActionID corresponding to the state of the module
// located at filesystem path modroot.
func moduleHash(modroot string, ismodcache bool) (cache.ActionID, error) {
@@ -75,7 +73,45 @@
}
h := cache.NewHash("moduleIndex")
- fmt.Fprintf(h, "module index %s %s %s %v\n", runtime.Version(), salt, indexVersion, modroot)
+ fmt.Fprintf(h, "module index %s %s %v\n", runtime.Version(), indexVersion, modroot)
+ return h.Sum(), nil
+}
+
+const modTimeCutoff = 2 * time.Second
+
+// dirHash returns an ActionID corresponding to the state of the package
+// located at filesystem path pkgdir.
+func dirHash(pkgdir string) (cache.ActionID, error) {
+ h := cache.NewHash("moduleIndex")
+ fmt.Fprintf(h, "package %s %s %v\n", runtime.Version(), indexVersion, pkgdir)
+ entries, err := fsys.ReadDir(pkgdir)
+ if err != nil {
+ // pkgdir might not be a directory. give up on hashing.
+ return cache.ActionID{}, ErrNotIndexed
+ }
+ cutoff := time.Now().Add(-modTimeCutoff)
+ for _, info := range entries {
+ if info.IsDir() {
+ continue
+ }
+
+ if !info.Mode().IsRegular() {
+ return cache.ActionID{}, ErrNotIndexed
+ }
+ // To avoid problems for very recent files where a new
+ // write might not change the mtime due to file system
+ // mtime precision, reject caching if a file was read that
+ // is less than modTimeCutoff old.
+ //
+ // This is the same strategy used for hashing test inputs.
+ // See hashOpen in cmd/go/internal/test/test.go for the
+ // corresponding code.
+ if info.ModTime().After(cutoff) {
+ return cache.ActionID{}, ErrNotIndexed
+ }
+
+ fmt.Fprintf(h, "file %v %v %v\n", info.Name(), info.ModTime(), info.Size())
+ }
return h.Sum(), nil
}
@@ -83,31 +119,61 @@
var ErrNotIndexed = errors.New("not in module index")
-// Get returns the ModuleIndex for the module rooted at modroot.
+var (
+ errDisabled = fmt.Errorf("%w: module indexing disabled", ErrNotIndexed)
+ errNotFromModuleCache = fmt.Errorf("%w: not from module cache", ErrNotIndexed)
+)
+
+// GetPackage returns the IndexPackage for the package at the given path.
// It will return ErrNotIndexed if the directory should be read without
// using the index, for instance because the index is disabled, or the packgae
// is not in a module.
-func Get(modroot string) (*ModuleIndex, error) {
- if !enabled || cache.DefaultDir() == "off" || cfg.BuildMod == "vendor" {
- return nil, ErrNotIndexed
+func GetPackage(modroot, pkgdir string) (*IndexPackage, error) {
+ mi, err := GetModule(modroot)
+ if err == nil {
+ return mi.Package(relPath(pkgdir, modroot)), nil
}
- if modroot == "" {
- panic("modindex.Get called with empty modroot")
+ if !errors.Is(err, errNotFromModuleCache) {
+ return nil, err
}
- modroot = filepath.Clean(modroot)
- isModCache := str.HasFilePathPrefix(modroot, cfg.GOMODCACHE)
- return openIndex(modroot, isModCache)
+ return openIndexPackage(modroot, pkgdir)
}
-// openIndex returns the module index for modPath.
+// GetModule returns the Module for the given modroot.
+// It will return ErrNotIndexed if the directory should be read without
+// using the index, for instance because the index is disabled, or the packgae
+// is not in a module.
+func GetModule(modroot string) (*Module, error) {
+ if !enabled || cache.DefaultDir() == "off" {
+ return nil, errDisabled
+ }
+ if modroot == "" {
+ panic("modindex.GetPackage called with empty modroot")
+ }
+ if cfg.BuildMod == "vendor" {
+ // Even if the main module is in the module cache,
+ // its vendored dependencies are not loaded from their
+ // usual cached locations.
+ return nil, errNotFromModuleCache
+ }
+ modroot = filepath.Clean(modroot)
+ if !str.HasFilePathPrefix(modroot, cfg.GOMODCACHE) {
+ return nil, errNotFromModuleCache
+ }
+ return openIndexModule(modroot, true)
+}
+
+var mcache par.Cache
+
+// openIndexModule returns the module index for modPath.
// It will return ErrNotIndexed if the module can not be read
// using the index because it contains symlinks.
-func openIndex(modroot string, ismodcache bool) (*ModuleIndex, error) {
+func openIndexModule(modroot string, ismodcache bool) (*Module, error) {
type result struct {
- mi *ModuleIndex
+ mi *Module
err error
}
- r := fcache.Do(modroot, func() any {
+ r := mcache.Do(modroot, func() any {
id, err := moduleHash(modroot, ismodcache)
if err != nil {
return result{nil, err}
@@ -133,8 +199,38 @@
return r.mi, r.err
}
-// fromBytes returns a *ModuleIndex given the encoded representation.
-func fromBytes(moddir string, data []byte) (mi *ModuleIndex, err error) {
+var pcache par.Cache
+
+func openIndexPackage(modroot, pkgdir string) (*IndexPackage, error) {
+ type result struct {
+ pkg *IndexPackage
+ err error
+ }
+ r := pcache.Do(pkgdir, func() any {
+ id, err := dirHash(pkgdir)
+ if err != nil {
+ return result{nil, err}
+ }
+ data, _, err := cache.Default().GetMmap(id)
+ if err != nil {
+ // Couldn't read from index. Assume we couldn't read from
+ // the index because the package hasn't been indexed yet.
+ data = indexPackage(modroot, pkgdir)
+ if err = cache.Default().PutBytes(id, data); err != nil {
+ return result{nil, err}
+ }
+ }
+ pkg, err := packageFromBytes(modroot, data)
+ if err != nil {
+ return result{nil, err}
+ }
+ return result{pkg, nil}
+ }).(result)
+ return r.pkg, r.err
+}
+
+// fromBytes returns a *Module given the encoded representation.
+func fromBytes(moddir string, data []byte) (mi *Module, err error) {
if !enabled {
panic("use of index")
}
@@ -184,7 +280,7 @@
packages[packagePaths[i]] = packageOffsets[i]
}
- return &ModuleIndex{
+ return &Module{
moddir,
offsetDecoder{data, st},
packages,
@@ -192,21 +288,60 @@
}, nil
}
+// packageFromBytes returns a *IndexPackage given the encoded representation.
+func packageFromBytes(modroot string, data []byte) (p *IndexPackage, err error) {
+ if !enabled {
+ panic("use of package index when not enabled")
+ }
+
+ // SetPanicOnFault's errors _may_ satisfy this interface. Even though it's not guaranteed
+ // that all its errors satisfy this interface, we'll only check for these errors so that
+ // we don't suppress panics that could have been produced from other sources.
+ type addrer interface {
+ Addr() uintptr
+ }
+
+ // set PanicOnFault to true so that we can catch errors on the initial reads of the slice,
+ // in case it's mmapped (the common case).
+ old := debug.SetPanicOnFault(true)
+ defer func() {
+ debug.SetPanicOnFault(old)
+ if e := recover(); e != nil {
+ if _, ok := e.(addrer); ok {
+ // This panic was almost certainly caused by SetPanicOnFault.
+ err = fmt.Errorf("error reading module index: %v", e)
+ return
+ }
+ // The panic was likely not caused by SetPanicOnFault.
+ panic(e)
+ }
+ }()
+
+ gotVersion, unread, _ := bytes.Cut(data, []byte{'\n'})
+ if string(gotVersion) != indexVersion {
+ return nil, fmt.Errorf("bad index version string: %q", gotVersion)
+ }
+ stringTableOffset, unread := binary.LittleEndian.Uint32(unread[:4]), unread[4:]
+ st := newStringTable(data[stringTableOffset:])
+ d := &decoder{unread, st}
+ p = decodePackage(d, offsetDecoder{data, st})
+ p.modroot = modroot
+ return p, nil
+}
+
// Returns a list of directory paths, relative to the modroot, for
// packages contained in the module index.
-func (mi *ModuleIndex) Packages() []string {
+func (mi *Module) Packages() []string {
return mi.packagePaths
}
-// RelPath returns the path relative to the module's root.
-func (mi *ModuleIndex) RelPath(path string) string {
- return str.TrimFilePathPrefix(filepath.Clean(path), mi.modroot) // mi.modroot is already clean
+// relPath returns the path relative to the module's root.
+func relPath(path, modroot string) string {
+ return str.TrimFilePathPrefix(filepath.Clean(path), filepath.Clean(modroot))
}
-// ImportPackage is the equivalent of build.Import given the information in ModuleIndex.
-func (mi *ModuleIndex) Import(bctxt build.Context, relpath string, mode build.ImportMode) (p *build.Package, err error) {
- rp := mi.indexPackage(relpath)
-
+// Import is the equivalent of build.Import given the information in Module.
+func (rp *IndexPackage) Import(bctxt build.Context, mode build.ImportMode) (p *build.Package, err error) {
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("error reading module index: %v", e)
@@ -218,7 +353,7 @@
p = &build.Package{}
p.ImportPath = "."
- p.Dir = filepath.Join(mi.modroot, rp.dir)
+ p.Dir = filepath.Join(rp.modroot, rp.dir)
var pkgerr error
switch ctxt.Compiler {
@@ -236,7 +371,7 @@
inTestdata := func(sub string) bool {
return strings.Contains(sub, "/testdata/") || strings.HasSuffix(sub, "/testdata") || str.HasPathPrefix(sub, "testdata")
}
- if !inTestdata(relpath) {
+ if !inTestdata(rp.dir) {
// In build.go, p.Root should only be set in the non-local-import case, or in
// GOROOT or GOPATH. Since module mode only calls Import with path set to "."
// and the module index doesn't apply outside modules, the GOROOT case is
@@ -248,8 +383,8 @@
if ctxt.GOROOT != "" && str.HasFilePathPrefix(p.Dir, cfg.GOROOTsrc) && p.Dir != cfg.GOROOTsrc {
p.Root = ctxt.GOROOT
p.Goroot = true
- modprefix := str.TrimFilePathPrefix(mi.modroot, cfg.GOROOTsrc)
- p.ImportPath = relpath
+ modprefix := str.TrimFilePathPrefix(rp.modroot, cfg.GOROOTsrc)
+ p.ImportPath = rp.dir
if modprefix != "" {
p.ImportPath = filepath.Join(modprefix, p.ImportPath)
}
@@ -521,20 +656,21 @@
reldir = str.TrimFilePathPrefix(reldir, "cmd")
modroot = filepath.Join(modroot, "cmd")
}
- mod, err := Get(modroot)
- if err != nil {
+ if _, err := GetPackage(modroot, filepath.Join(modroot, reldir)); err == nil {
+ // Note that goroot.IsStandardPackage doesn't check that the directory
+ // actually contains any go files-- merely that it exists. GetPackage
+ // returning a nil error is enough for us to know the directory exists.
+ return true
+ } else if errors.Is(err, ErrNotIndexed) {
+ // Fall back because package isn't indexable. (Probably because
+ // a file was modified recently)
return goroot.IsStandardPackage(goroot_, compiler, path)
}
-
- pkgs := mod.Packages()
- i := sort.SearchStrings(pkgs, reldir)
- return i != len(pkgs) && pkgs[i] == reldir
+ return false
}
// IsDirWithGoFiles is the equivalent of fsys.IsDirWithGoFiles using the information in the index.
-func (mi *ModuleIndex) IsDirWithGoFiles(relpath string) (_ bool, err error) {
- rp := mi.indexPackage(relpath)
-
+func (rp *IndexPackage) IsDirWithGoFiles() (_ bool, err error) {
defer func() {
if e := recover(); e != nil {
err = fmt.Errorf("error reading module index: %v", e)
@@ -549,9 +685,7 @@
}
// ScanDir implements imports.ScanDir using the information in the index.
-func (mi *ModuleIndex) ScanDir(path string, tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
- rp := mi.indexPackage(path)
-
+func (rp *IndexPackage) ScanDir(tags map[string]bool) (sortedImports []string, sortedTestImports []string, err error) {
// TODO(matloob) dir should eventually be relative to indexed directory
// TODO(matloob): skip reading raw package and jump straight to data we need?
@@ -639,20 +773,22 @@
return true
}
-// index package holds the information needed to access information in the
-// index about a package.
-type indexPackage struct {
+// IndexPackage holds the information needed to access information in the
+// index needed to load a package in a specific directory.
+type IndexPackage struct {
error error
dir string // directory of the package relative to the modroot
+ modroot string
+
// Source files
sourceFiles []*sourceFile
}
var errCannotFindPackage = errors.New("cannot find package")
-// indexPackage returns an indexPackage constructed using the information in the ModuleIndex.
-func (mi *ModuleIndex) indexPackage(path string) *indexPackage {
+// Package returns an IndexPackage constructed using the information in the Module.
+func (mi *Module) Package(path string) *IndexPackage {
defer func() {
if e := recover(); e != nil {
base.Fatalf("error reading module index: %v", e)
@@ -660,12 +796,18 @@
}()
offset, ok := mi.packages[path]
if !ok {
- return &indexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
+ return &IndexPackage{error: fmt.Errorf("%w %q in:\n\t%s", errCannotFindPackage, path, filepath.Join(mi.modroot, path))}
}
// TODO(matloob): do we want to lock on the module index?
d := mi.od.decoderAt(offset)
- rp := new(indexPackage)
+ p := decodePackage(d, mi.od)
+ p.modroot = mi.modroot
+ return p
+}
+
+func decodePackage(d *decoder, od offsetDecoder) *IndexPackage {
+ rp := new(IndexPackage)
if errstr := d.string(); errstr != "" {
rp.error = errors.New(errstr)
}
@@ -675,7 +817,7 @@
for i := uint32(0); i < numSourceFiles; i++ {
offset := d.uint32()
rp.sourceFiles[i] = &sourceFile{
- od: mi.od.offsetDecoderAt(offset),
+ od: od.offsetDecoderAt(offset),
}
}
return rp
diff --git a/src/cmd/go/internal/modindex/scan.go b/src/cmd/go/internal/modindex/scan.go
index d1f73dbb53..eb84bf8 100644
--- a/src/cmd/go/internal/modindex/scan.go
+++ b/src/cmd/go/internal/modindex/scan.go
@@ -65,7 +65,15 @@
if err != nil {
return nil, err
}
- return encodeModule(packages), nil
+ return encodeModuleBytes(packages), nil
+}
+
+// indexModule indexes the package at the given directory and returns its
+// encoded representation. It returns ErrNotIndexed if the package can't
+// be indexed.
+func indexPackage(modroot, pkgdir string) []byte {
+ p := importRaw(modroot, relPath(pkgdir, modroot))
+ return encodePackageBytes(p)
}
// rawPackage holds the information from each package that's needed to
diff --git a/src/cmd/go/internal/modindex/write.go b/src/cmd/go/internal/modindex/write.go
index 0c3123a..3408248 100644
--- a/src/cmd/go/internal/modindex/write.go
+++ b/src/cmd/go/internal/modindex/write.go
@@ -11,9 +11,9 @@
const indexVersion = "go index v0"
-// encodeModule produces the encoded representation of the module index.
-// encodeModule may modify the packages slice.
-func encodeModule(packages []*rawPackage) []byte {
+// encodeModuleBytes produces the encoded representation of the module index.
+// encodeModuleBytes may modify the packages slice.
+func encodeModuleBytes(packages []*rawPackage) []byte {
e := newEncoder()
e.Bytes([]byte(indexVersion))
e.Bytes([]byte{'\n'})
@@ -39,6 +39,18 @@
return e.b
}
+func encodePackageBytes(p *rawPackage) []byte {
+ e := newEncoder()
+ e.Bytes([]byte(indexVersion))
+ e.Bytes([]byte{'\n'})
+ stringTableOffsetPos := e.Pos() // fill this at the end
+ e.Uint32(0) // string table offset
+ encodePackage(e, p)
+ e.IntAt(e.Pos(), stringTableOffsetPos)
+ e.Bytes(e.stringTable)
+ return e.b
+}
+
func encodePackage(e *encoder, p *rawPackage) {
e.String(p.error)
e.String(p.dir)
diff --git a/src/cmd/go/internal/modload/import.go b/src/cmd/go/internal/modload/import.go
index f7810ca5..f2c7592 100644
--- a/src/cmd/go/internal/modload/import.go
+++ b/src/cmd/go/internal/modload/import.go
@@ -657,11 +657,11 @@
// We don't care about build tags, not even "+build ignore".
// We're just looking for a plausible directory.
res := haveGoFilesCache.Do(dir, func() any {
- // modindex.Get will return ErrNotIndexed for any directories which
+ // modindex.GetPackage will return ErrNotIndexed for any directories which
// are reached through a symlink, so that they will be handled by
// fsys.IsDirWithGoFiles below.
- if mi, err := modindex.Get(mdir); err == nil {
- isDirWithGoFiles, err := mi.IsDirWithGoFiles(mi.RelPath(dir))
+ if ip, err := modindex.GetPackage(mdir, dir); err == nil {
+ isDirWithGoFiles, err := ip.IsDirWithGoFiles()
return goFilesEntry{isDirWithGoFiles, err}
} else if !errors.Is(err, modindex.ErrNotIndexed) {
return goFilesEntry{err: err}
diff --git a/src/cmd/go/internal/modload/load.go b/src/cmd/go/internal/modload/load.go
index b2c3ba2..ba85dc2 100644
--- a/src/cmd/go/internal/modload/load.go
+++ b/src/cmd/go/internal/modload/load.go
@@ -2102,8 +2102,8 @@
// may see these legacy imports. We drop them so that the module
// search does not look for modules to try to satisfy them.
func scanDir(modroot string, dir string, tags map[string]bool) (imports_, testImports []string, err error) {
- if mi, mierr := modindex.Get(modroot); mierr == nil {
- imports_, testImports, err = mi.ScanDir(mi.RelPath(dir), tags)
+ if ip, mierr := modindex.GetPackage(modroot, dir); mierr == nil {
+ imports_, testImports, err = ip.ScanDir(tags)
goto Happy
} else if !errors.Is(mierr, modindex.ErrNotIndexed) {
return nil, nil, mierr
diff --git a/src/cmd/go/internal/modload/search.go b/src/cmd/go/internal/modload/search.go
index d9d7711..856390a 100644
--- a/src/cmd/go/internal/modload/search.go
+++ b/src/cmd/go/internal/modload/search.go
@@ -195,7 +195,7 @@
}
modPrefix = mod.Path
}
- if mi, err := modindex.Get(root); err == nil {
+ if mi, err := modindex.GetModule(root); err == nil {
walkFromIndex(mi, modPrefix, isMatch, treeCanMatch, tags, have, addPkg)
continue
} else if !errors.Is(err, modindex.ErrNotIndexed) {
@@ -213,9 +213,9 @@
}
// walkFromIndex matches packages in a module using the module index. modroot
-// is the module's root directory on disk, index is the ModuleIndex for the
+// is the module's root directory on disk, index is the modindex.Module for the
// module, and importPathRoot is the module's path prefix.
-func walkFromIndex(index *modindex.ModuleIndex, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
+func walkFromIndex(index *modindex.Module, importPathRoot string, isMatch, treeCanMatch func(string) bool, tags, have map[string]bool, addPkg func(string)) {
loopPackages:
for _, reldir := range index.Packages() {
// Avoid .foo, _foo, and testdata subdirectory trees.
@@ -252,7 +252,7 @@
if !have[name] {
have[name] = true
if isMatch(name) {
- if _, _, err := index.ScanDir(reldir, tags); err != imports.ErrNoGo {
+ if _, _, err := index.Package(reldir).ScanDir(tags); err != imports.ErrNoGo {
addPkg(name)
}
}