blob: 9ed68e1f0256c89045ac782e687fe2fc92bab597 [file] [log] [blame]
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fetch provides a way to fetch modules from a proxy.
package fetch
import (
"archive/zip"
"bytes"
"context"
"errors"
"fmt"
"go/ast"
"go/build"
"go/parser"
"go/token"
"io"
"io/ioutil"
"math"
"net/http"
"os"
"path"
"sort"
"strings"
"go.opencensus.io/trace"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/config"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/godoc"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/source"
"golang.org/x/pkgsite/internal/stdlib"
)
// BadPackageError represents an error loading a package
// because its contents do not make up a valid package.
//
// This can happen, for example, if the .go files fail
// to parse or declare different package names.
type BadPackageError struct {
Err error // Not nil.
}
func (bpe *BadPackageError) Error() string { return bpe.Err.Error() }
// loadPackage loads a Go package by calling loadPackageWithBuildContext, trying
// several build contexts in turn. It returns a goPackage with documentation
// information for each build context that results in a valid package, in the
// same order that the build contexts are listed. If none of them result in a
// package, then loadPackage returns nil, nil.
//
// If a package is fine except that its documentation is too large, loadPackage
// returns a goPackage whose err field is a non-nil error with godoc.ErrTooLarge in its chain.
func loadPackage(ctx context.Context, zipGoFiles []*zip.File, innerPath string,
sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (_ *goPackage, err error) {
defer derrors.Wrap(&err, "loadPackage(ctx, zipGoFiles, %q, sourceInfo, modInfo)", innerPath)
ctx, span := trace.StartSpan(ctx, "fetch.loadPackage")
defer span.End()
// Make a map with all the zip file contents.
files := make(map[string][]byte)
for _, f := range zipGoFiles {
_, name := path.Split(f.Name)
b, err := readZipFile(f, MaxFileSize)
if err != nil {
return nil, err
}
files[name] = b
}
modulePath := modInfo.ModulePath
importPath := path.Join(modulePath, innerPath)
if modulePath == stdlib.ModulePath {
importPath = innerPath
}
v1path := internal.V1Path(importPath, modulePath)
var pkg *goPackage
// Parse the package for each build context.
// The documentation is determined by the set of matching files, so keep
// track of those to avoid duplication.
docsByFiles := map[string]*internal.Documentation{}
for _, bc := range internal.BuildContexts {
mfiles, err := matchingFiles(bc.GOOS, bc.GOARCH, files)
if err != nil {
return nil, err
}
filesKey := mapKeyForFiles(mfiles)
if doc := docsByFiles[filesKey]; doc != nil {
// We have seen this set of files before.
// loadPackageWithBuildContext will produce the same outputs,
// so don't bother calling it. Just copy the doc.
doc2 := *doc
doc2.GOOS = bc.GOOS
doc2.GOARCH = bc.GOARCH
pkg.docs = append(pkg.docs, &doc2)
continue
}
name, imports, synopsis, source, api, err := loadPackageForBuildContext(ctx, mfiles, innerPath, sourceInfo, modInfo)
switch {
case errors.Is(err, derrors.NotFound):
// No package for this build context.
continue
case errors.Is(err, godoc.ErrTooLarge):
// The doc for this build context is too large. To keep things
// simple, return a single package with this error that will be used
// for all build contexts, and ignore the others.
return &goPackage{
err: err,
path: importPath,
v1path: v1path,
name: name,
imports: imports,
docs: []*internal.Documentation{{
GOOS: internal.All,
GOARCH: internal.All,
Synopsis: synopsis,
Source: source,
API: api,
}},
}, nil
case err != nil:
// Serious error. Fail.
return nil, err
default:
// No error.
if pkg == nil {
pkg = &goPackage{
path: importPath,
v1path: v1path,
name: name,
imports: imports, // Use the imports from the first successful build context.
}
}
// All the build contexts should use the same package name. Although
// it's technically legal for different build tags to result in different
// package names, it's not something we support.
if name != pkg.name {
return nil, &BadPackageError{
Err: fmt.Errorf("more than one package name (%q and %q)", pkg.name, name),
}
}
doc := &internal.Documentation{
GOOS: bc.GOOS,
GOARCH: bc.GOARCH,
Synopsis: synopsis,
Source: source,
API: api,
}
docsByFiles[filesKey] = doc
pkg.docs = append(pkg.docs, doc)
}
}
// If all the build contexts succeeded and had the same set of files, then
// assume that the package doc is valid for all build contexts. Represent
// this with a single Documentation whose GOOS and GOARCH are both "all".
if len(docsByFiles) == 1 && len(pkg.docs) == len(internal.BuildContexts) {
pkg.docs = pkg.docs[:1]
pkg.docs[0].GOOS = internal.All
pkg.docs[0].GOARCH = internal.All
}
return pkg, nil
}
// mapKeyForFiles generates a value that corresponds to the given set of file
// names and can be used as a map key.
// It assumes the filenames do not contain spaces.
func mapKeyForFiles(files map[string][]byte) string {
var names []string
for n := range files {
names = append(names, n)
}
sort.Strings(names)
return strings.Join(names, " ")
}
// httpPost allows package fetch tests to stub out playground URL fetches.
var httpPost = http.Post
// loadPackageForBuildContext loads a Go package made of .go files in
// files, which should match some build context.
// modulePath is stdlib.ModulePath for the Go standard library and the
// module path for all other modules. innerPath is the path of the Go package
// directory relative to the module root. The files argument must contain only
// .go files that have been verified to be of reasonable size and that match
// the build context.
//
// It returns the package name, list of imports, the package synopsis, and the
// serialized source (AST) for the package.
//
// It returns an error with NotFound in its chain if the directory doesn't
// contain a Go package or all .go files have been excluded by constraints. A
// *BadPackageError error is returned if the directory contains .go files but do
// not make up a valid package.
//
// If it returns an error with ErrTooLarge in its chain, the other return values
// are still valid.
func loadPackageForBuildContext(ctx context.Context, files map[string][]byte, innerPath string, sourceInfo *source.Info, modInfo *godoc.ModuleInfo) (
name string, imports []string, synopsis string, source []byte, api []*internal.Symbol, err error) {
modulePath := modInfo.ModulePath
defer derrors.Wrap(&err, "loadPackageWithBuildContext(files, %q, %q, %+v)", innerPath, modulePath, sourceInfo)
packageName, goFiles, fset, err := loadFilesWithBuildContext(innerPath, files)
if err != nil {
return "", nil, "", nil, nil, err
}
docPkg := godoc.NewPackage(fset, modInfo.ModulePackages)
for _, pf := range goFiles {
removeNodes := true
// Don't strip the seemingly unexported functions from the builtin package;
// they are actually Go builtins like make, new, etc.
if modulePath == stdlib.ModulePath && innerPath == "builtin" {
removeNodes = false
}
docPkg.AddFile(pf, removeNodes)
}
// Encode first, because Render messes with the AST.
src, err := docPkg.Encode(ctx)
if err != nil {
return "", nil, "", nil, nil, err
}
synopsis, imports, _, api, err = docPkg.Render(ctx, innerPath, sourceInfo, modInfo)
if err != nil && !errors.Is(err, godoc.ErrTooLarge) {
return "", nil, "", nil, nil, err
}
return packageName, imports, synopsis, src, api, err
}
// loadFilesWithBuildContext loads all the given Go files at innerPath. It
// returns the package name as it occurs in the source, a map of the ASTs of all
// the Go files, and the token.FileSet used for parsing.
// If there are no non-test Go files, it returns a NotFound error.
func loadFilesWithBuildContext(innerPath string, files map[string][]byte) (pkgName string, fileMap map[string]*ast.File, _ *token.FileSet, _ error) {
// Parse .go files and add them to the goFiles slice.
var (
fset = token.NewFileSet()
goFiles = make(map[string]*ast.File)
numNonTestFiles int
packageName string
packageNameFile string // Name of file where packageName came from.
)
for name, b := range files {
pf, err := parser.ParseFile(fset, name, b, parser.ParseComments)
if err != nil {
if pf == nil {
return "", nil, nil, fmt.Errorf("internal error: the source couldn't be read: %v", err)
}
return "", nil, nil, &BadPackageError{Err: err}
}
// Remember all files, including test files for their examples.
goFiles[name] = pf
if strings.HasSuffix(name, "_test.go") {
continue
}
// Keep track of the number of non-test files to check that the package name is the same.
// and also because a directory with only test files doesn't count as a
// Go package.
numNonTestFiles++
if numNonTestFiles == 1 {
packageName = pf.Name.Name
packageNameFile = name
} else if pf.Name.Name != packageName {
return "", nil, nil, &BadPackageError{Err: &build.MultiplePackageError{
Dir: innerPath,
Packages: []string{packageName, pf.Name.Name},
Files: []string{packageNameFile, name},
}}
}
}
if numNonTestFiles == 0 {
// This directory doesn't contain a package, or at least not one
// that matches this build context.
return "", nil, nil, derrors.NotFound
}
return packageName, goFiles, fset, nil
}
// matchingFiles returns a map from file names to their contents, read from zipGoFiles.
// It includes only those files that match the build context determined by goos and goarch.
func matchingFiles(goos, goarch string, allFiles map[string][]byte) (matchedFiles map[string][]byte, err error) {
defer derrors.Wrap(&err, "matchingFiles(%q, %q, zipGoFiles)", goos, goarch)
// bctx is used to make decisions about which of the .go files are included
// by build constraints.
bctx := &build.Context{
GOOS: goos,
GOARCH: goarch,
CgoEnabled: true,
Compiler: build.Default.Compiler,
ReleaseTags: build.Default.ReleaseTags,
JoinPath: path.Join,
OpenFile: func(name string) (io.ReadCloser, error) {
return ioutil.NopCloser(bytes.NewReader(allFiles[name])), nil
},
// If left nil, the default implementations of these read from disk,
// which we do not want. None of these functions should be used
// inside this function; it would be an internal error if they are.
// Set them to non-nil values to catch if that happens.
SplitPathList: func(string) []string { panic("internal error: unexpected call to SplitPathList") },
IsAbsPath: func(string) bool { panic("internal error: unexpected call to IsAbsPath") },
IsDir: func(string) bool { panic("internal error: unexpected call to IsDir") },
HasSubdir: func(string, string) (string, bool) { panic("internal error: unexpected call to HasSubdir") },
ReadDir: func(string) ([]os.FileInfo, error) { panic("internal error: unexpected call to ReadDir") },
}
// Copy the input map so we don't modify it.
matchedFiles = map[string][]byte{}
for n, c := range allFiles {
matchedFiles[n] = c
}
for name := range allFiles {
match, err := bctx.MatchFile(".", name) // This will access the file we just added to files map above.
if err != nil {
return nil, &BadPackageError{Err: fmt.Errorf(`bctx.MatchFile(".", %q): %w`, name, err)}
}
if !match {
delete(matchedFiles, name)
}
}
return matchedFiles, nil
}
// readZipFile decompresses zip file f and returns its uncompressed contents.
// The caller can check f.UncompressedSize64 before calling readZipFile to
// get the expected uncompressed size of f.
//
// limit is the maximum number of bytes to read.
func readZipFile(f *zip.File, limit int64) (_ []byte, err error) {
defer derrors.Add(&err, "readZipFile(%q)", f.Name)
r, err := f.Open()
if err != nil {
return nil, fmt.Errorf("f.Open(): %v", err)
}
b, err := ioutil.ReadAll(io.LimitReader(r, limit))
if err != nil {
r.Close()
return nil, fmt.Errorf("ioutil.ReadAll(r): %v", err)
}
if err := r.Close(); err != nil {
return nil, fmt.Errorf("closing: %v", err)
}
return b, nil
}
// mib is the number of bytes in a mebibyte (Mi).
const mib = 1024 * 1024
// The largest module zip size we can comfortably process.
// We probably will OOM if we process a module whose zip is larger.
var maxModuleZipSize int64 = math.MaxInt64
func init() {
v := config.GetEnvInt("GO_DISCOVERY_MAX_MODULE_ZIP_MI", -1)
if v > 0 {
maxModuleZipSize = int64(v) * mib
}
}
var zipLoadShedder *loadShedder
func init() {
ctx := context.Background()
mebis := config.GetEnvInt("GO_DISCOVERY_MAX_IN_FLIGHT_ZIP_MI", -1)
if mebis > 0 {
log.Infof(ctx, "shedding load over %dMi", mebis)
zipLoadShedder = &loadShedder{maxSizeInFlight: uint64(mebis) * mib}
}
}
// ZipLoadShedStats returns a snapshot of the current LoadShedStats for zip files.
func ZipLoadShedStats() LoadShedStats {
if zipLoadShedder != nil {
return zipLoadShedder.stats()
}
return LoadShedStats{}
}