blob: 638331e2833f818406206615feb0e3282609c2ea [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fetch provides a way to fetch modules from a proxy.
package fetch
import (
"archive/zip"
"bytes"
"context"
"errors"
"fmt"
"go/ast"
"go/build"
"go/parser"
"go/token"
"io"
"io/ioutil"
"math"
"net/http"
"os"
"path"
"runtime"
"strings"
"go.opencensus.io/trace"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/config"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/godoc"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/source"
"golang.org/x/pkgsite/internal/stdlib"
)
// BadPackageError represents an error loading a package
// because its contents do not make up a valid package.
//
// This can happen, for example, if the .go files fail
// to parse or declare different package names.
type BadPackageError struct {
Err error // Not nil.
}
func (bpe *BadPackageError) Error() string { return bpe.Err.Error() }
// Go environments used to construct build contexts in loadPackage.
var goEnvs = []struct{ GOOS, GOARCH string }{
{"linux", "amd64"},
{"windows", "amd64"},
{"darwin", "amd64"},
{"js", "wasm"},
}
// loadPackage loads a Go package by calling loadPackageWithBuildContext, trying
// several build contexts in turn. It returns a goPackage with documentation
// information for each build context that results in a valid package, in the
// same order that the build contexts are listed. If none of them result in a
// package, then loadPackage returns nil, nil.
//
// If a package is fine except that its documentation is too large, loadPackage
// returns a goPackage whose err field is a non-nil error with godoc.ErrTooLarge in its chain.
func loadPackage(ctx context.Context, zipGoFiles []*zip.File, innerPath string, sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (_ *goPackage, err error) {
defer derrors.Wrap(&err, "loadPackage(ctx, zipGoFiles, %q, sourceInfo, modInfo)", innerPath)
ctx, span := trace.StartSpan(ctx, "fetch.loadPackage")
defer span.End()
var pkgs []*goPackage
// Make a map with all the zip file contents.
files := make(map[string][]byte)
for _, f := range zipGoFiles {
_, name := path.Split(f.Name)
b, err := readZipFile(f, MaxFileSize)
if err != nil {
return nil, err
}
files[name] = b
}
for _, env := range goEnvs {
pkg, err := loadPackageWithBuildContext(ctx, env.GOOS, env.GOARCH, files, innerPath, sourceInfo, modInfo)
if err != nil && !errors.Is(err, godoc.ErrTooLarge) && !errors.Is(err, derrors.NotFound) {
return nil, err
}
if pkg != nil {
pkg.err = err
pkgs = append(pkgs, pkg)
}
}
return mergePackages(pkgs)
}
// mergePackages combines multiple packages from different build contexts into
// one.
func mergePackages(pkgs []*goPackage) (*goPackage, error) {
if len(pkgs) == 0 {
return nil, nil
}
// If any build context has an error, return that one.
for _, pkg := range pkgs {
if pkg.err != nil {
return pkg, nil
}
}
// Use everything from the first package, just merge the docs.
result := pkgs[0]
for _, pkg := range pkgs[1:] {
if pkg.name != result.name {
// All the build environments should use the same package name. Although
// it's technically legal for different build tags to result in different
// package names, it's not something we support.
return nil, &BadPackageError{
Err: fmt.Errorf("more than one package name (%q and %q)", result.name, pkg.name),
}
}
result.docs = append(result.docs, pkg.docs[0])
}
return result, nil
}
// httpPost allows package fetch tests to stub out playground URL fetches.
var httpPost = http.Post
// loadPackageWithBuildContext loads a Go package made of .go files in zipGoFiles
// using a build context constructed from the given GOOS and GOARCH values.
// modulePath is stdlib.ModulePath for the Go standard library and the module
// path for all other modules. innerPath is the path of the Go package directory
// relative to the module root.
//
// zipGoFiles must contain only .go files that have been verified
// to be of reasonable size.
//
// The returned goPackage.licenses field is not populated.
//
// It returns a nil goPackage if the directory doesn't contain a Go package
// or all .go files have been excluded by constraints.
// A *BadPackageError error is returned if the directory
// contains .go files but do not make up a valid package.
func loadPackageWithBuildContext(ctx context.Context, goos, goarch string, files map[string][]byte, innerPath string, sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (_ *goPackage, err error) {
modulePath := modInfo.ModulePath
defer derrors.Wrap(&err, "loadPackageWithBuildContext(%q, %q, files, %q, %q, %+v)",
goos, goarch, innerPath, modulePath, sourceInfo)
packageName, goFiles, fset, err := loadFilesWithBuildContext(innerPath, goos, goarch, files)
if err != nil {
return nil, err
}
docPkg := godoc.NewPackage(fset, goos, goarch, modInfo.ModulePackages)
for _, pf := range goFiles {
var removeNodes bool
removeNodes = true
// Don't strip the seemingly unexported functions from the builtin package;
// they are actually Go builtins like make, new, etc.
if modulePath == stdlib.ModulePath && innerPath == "builtin" {
removeNodes = false
}
docPkg.AddFile(pf, removeNodes)
}
// Encode first, because Render messes with the AST.
src, err := docPkg.Encode(ctx)
if err != nil {
return nil, err
}
synopsis, imports, _, err := docPkg.Render(ctx, innerPath, sourceInfo, modInfo, goos, goarch)
if err != nil && !errors.Is(err, godoc.ErrTooLarge) {
return nil, err
}
importPath := path.Join(modulePath, innerPath)
if modulePath == stdlib.ModulePath {
importPath = innerPath
}
v1path := internal.V1Path(importPath, modulePath)
return &goPackage{
path: importPath,
name: packageName,
v1path: v1path,
imports: imports,
docs: []*internal.Documentation{{
GOOS: goos,
GOARCH: goarch,
Synopsis: synopsis,
Source: src,
}},
}, err
}
// loadFilesWithBuildContext loads all the Go files at innerPath that match goos
// and goarch in the zip. It returns the package name as it occurs in the
// source, a map of the ASTs of all the Go files, and the token.FileSet used for
// parsing.
func loadFilesWithBuildContext(innerPath, goos, goarch string, allFiles map[string][]byte) (pkgName string, fileMap map[string]*ast.File, _ *token.FileSet, _ error) {
// Apply build constraints to get a map from matching file names to their contents.
files, err := matchingFiles(goos, goarch, allFiles)
if err != nil {
return "", nil, nil, err
}
// Parse .go files and add them to the goFiles slice.
var (
fset = token.NewFileSet()
goFiles = make(map[string]*ast.File)
numNonTestFiles int
packageName string
packageNameFile string // Name of file where packageName came from.
)
for name, b := range files {
pf, err := parser.ParseFile(fset, name, b, parser.ParseComments)
if err != nil {
if pf == nil {
return "", nil, nil, fmt.Errorf("internal error: the source couldn't be read: %v", err)
}
return "", nil, nil, &BadPackageError{Err: err}
}
// Remember all files, including test files for their examples.
goFiles[name] = pf
if strings.HasSuffix(name, "_test.go") {
continue
}
// Keep track of the number of non-test files to check that the package name is the same.
// and also because a directory with only test files doesn't count as a
// Go package.
numNonTestFiles++
if numNonTestFiles == 1 {
packageName = pf.Name.Name
packageNameFile = name
} else if pf.Name.Name != packageName {
return "", nil, nil, &BadPackageError{Err: &build.MultiplePackageError{
Dir: innerPath,
Packages: []string{packageName, pf.Name.Name},
Files: []string{packageNameFile, name},
}}
}
}
if numNonTestFiles == 0 {
// This directory doesn't contain a package, or at least not one
// that matches this build context.
return "", nil, nil, derrors.NotFound
}
return packageName, goFiles, fset, nil
}
// matchingFiles returns a map from file names to their contents, read from zipGoFiles.
// It includes only those files that match the build context determined by goos and goarch.
func matchingFiles(goos, goarch string, allFiles map[string][]byte) (matchedFiles map[string][]byte, err error) {
defer derrors.Wrap(&err, "matchingFiles(%q, %q, zipGoFiles)", goos, goarch)
// bctx is used to make decisions about which of the .go files are included
// by build constraints.
bctx := &build.Context{
GOOS: goos,
GOARCH: goarch,
CgoEnabled: true,
Compiler: build.Default.Compiler,
ReleaseTags: build.Default.ReleaseTags,
JoinPath: path.Join,
OpenFile: func(name string) (io.ReadCloser, error) {
return ioutil.NopCloser(bytes.NewReader(allFiles[name])), nil
},
// If left nil, the default implementations of these read from disk,
// which we do not want. None of these functions should be used
// inside this function; it would be an internal error if they are.
// Set them to non-nil values to catch if that happens.
SplitPathList: func(string) []string { panic("internal error: unexpected call to SplitPathList") },
IsAbsPath: func(string) bool { panic("internal error: unexpected call to IsAbsPath") },
IsDir: func(string) bool { panic("internal error: unexpected call to IsDir") },
HasSubdir: func(string, string) (string, bool) { panic("internal error: unexpected call to HasSubdir") },
ReadDir: func(string) ([]os.FileInfo, error) { panic("internal error: unexpected call to ReadDir") },
}
// Copy the input map so we don't modify it.
matchedFiles = map[string][]byte{}
for n, c := range allFiles {
matchedFiles[n] = c
}
for name := range allFiles {
match, err := bctx.MatchFile(".", name) // This will access the file we just added to files map above.
if err != nil {
return nil, &BadPackageError{Err: fmt.Errorf(`bctx.MatchFile(".", %q): %w`, name, err)}
}
if !match {
delete(matchedFiles, name)
}
}
return matchedFiles, nil
}
// readZipFile decompresses zip file f and returns its uncompressed contents.
// The caller can check f.UncompressedSize64 before calling readZipFile to
// get the expected uncompressed size of f.
//
// limit is the maximum number of bytes to read.
func readZipFile(f *zip.File, limit int64) (_ []byte, err error) {
defer derrors.Add(&err, "readZipFile(%q)", f.Name)
r, err := f.Open()
if err != nil {
return nil, fmt.Errorf("f.Open(): %v", err)
}
b, err := ioutil.ReadAll(io.LimitReader(r, limit))
if err != nil {
r.Close()
return nil, fmt.Errorf("ioutil.ReadAll(r): %v", err)
}
if err := r.Close(); err != nil {
return nil, fmt.Errorf("closing: %v", err)
}
return b, nil
}
func allocMeg() int {
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
return int(ms.Alloc / (1024 * 1024))
}
// mib is the number of bytes in a mebibyte (Mi).
const mib = 1024 * 1024
// The largest module zip size we can comfortably process.
// We probably will OOM if we process a module whose zip is larger.
var maxModuleZipSize int64 = math.MaxInt64
func init() {
v := config.GetEnvInt("GO_DISCOVERY_MAX_MODULE_ZIP_MI", -1)
if v > 0 {
maxModuleZipSize = int64(v) * mib
}
}
var zipLoadShedder *loadShedder
func init() {
ctx := context.Background()
mebis := config.GetEnvInt("GO_DISCOVERY_MAX_IN_FLIGHT_ZIP_MI", -1)
if mebis > 0 {
log.Infof(ctx, "shedding load over %dMi", mebis)
zipLoadShedder = &loadShedder{maxSizeInFlight: uint64(mebis) * mib}
}
}
// ZipLoadShedStats returns a snapshot of the current LoadShedStats for zip files.
func ZipLoadShedStats() LoadShedStats {
if zipLoadShedder != nil {
return zipLoadShedder.stats()
}
return LoadShedStats{}
}