blob: 8f3e0ad075457f010f120c780bc9f8078e3649af [file] [log] [blame]
// Copyright 2021 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package fetchdatasource provides an internal.DataSource implementation
// that fetches modules (rather than reading them from a database).
// Search and other tabs are not supported.
package fetchdatasource
import (
"context"
"errors"
"fmt"
"sort"
"strconv"
"strings"
"time"
lru "github.com/hashicorp/golang-lru"
"golang.org/x/mod/semver"
"golang.org/x/pkgsite/internal"
"golang.org/x/pkgsite/internal/derrors"
"golang.org/x/pkgsite/internal/fetch"
"golang.org/x/pkgsite/internal/log"
"golang.org/x/pkgsite/internal/proxy"
"golang.org/x/pkgsite/internal/stdlib"
"golang.org/x/pkgsite/internal/version"
)
// FetchDataSource implements the internal.DataSource interface, by trying a list of
// fetch.ModuleGetters to fetch modules and caching the results.
type FetchDataSource struct {
opts Options
cache *lru.Cache
}
// Options are parameters for creating a new FetchDataSource.
type Options struct {
// List of getters to try, in order.
Getters []fetch.ModuleGetter
// If set, this will be used for latest-version information. To fetch modules from the proxy,
// include a ProxyModuleGetter in Getters.
ProxyClientForLatest *proxy.Client
BypassLicenseCheck bool
}
// New creates a new FetchDataSource from the options.
func (o Options) New() *FetchDataSource {
cache, err := lru.New(maxCachedModules)
if err != nil {
// Can only happen if size is bad, and we control it.
panic(err)
}
opts := o
// Copy getters slice so caller doesn't modify us.
opts.Getters = make([]fetch.ModuleGetter, len(opts.Getters))
copy(opts.Getters, o.Getters)
return &FetchDataSource{
opts: opts,
cache: cache,
}
}
// cacheEntry holds a fetched module or an error, if the fetch failed.
type cacheEntry struct {
g fetch.ModuleGetter
module *internal.Module
err error
}
const maxCachedModules = 100
// cacheGet returns information from the cache if it is present, and (nil, nil) otherwise.
func (ds *FetchDataSource) cacheGet(path, version string) (fetch.ModuleGetter, *internal.Module, error) {
// Look for an exact match first, then use LocalVersion, as for a
// directory-based or GOPATH-mode module.
for _, v := range []string{version, fetch.LocalVersion} {
if e, ok := ds.cache.Get(internal.Modver{Path: path, Version: v}); ok {
e := e.(cacheEntry)
return e.g, e.module, e.err
}
}
return nil, nil, nil
}
// cachePut puts information into the cache.
func (ds *FetchDataSource) cachePut(g fetch.ModuleGetter, path, version string, m *internal.Module, err error) {
ds.cache.Add(internal.Modver{Path: path, Version: version}, cacheEntry{g, m, err})
}
// getModule gets the module at the given path and version. It first checks the
// cache, and if it isn't there it then tries to fetch it.
func (ds *FetchDataSource) getModule(ctx context.Context, modulePath, vers string) (_ *internal.Module, err error) {
defer derrors.Wrap(&err, "FetchDataSource.getModule(%q, %q)", modulePath, vers)
g, mod, err := ds.cacheGet(modulePath, vers)
if err != nil {
return nil, err
}
if mod != nil {
// For getters supporting invalidation, check whether cached contents have
// changed.
v, ok := g.(fetch.VolatileModuleGetter)
if !ok {
return mod, nil
}
hasChanged, err := v.HasChanged(ctx, mod.ModuleInfo)
if err != nil {
return nil, err
}
if !hasChanged {
return mod, nil
}
}
// There can be a benign race here, where two goroutines both fetch the same
// module. At worst some work will be duplicated, but if that turns out to
// be a problem we could use golang.org/x/sync/singleflight.
m, g, err := ds.fetch(ctx, modulePath, vers)
if m != nil && ds.opts.ProxyClientForLatest != nil {
// Use the go.mod file at the raw latest version to fill in deprecation
// and retraction information. Ignore any problems getting the
// information, because we may be trying to do this for a local module
// that the proxy doesn't know about.
if lmv, err := fetch.LatestModuleVersions(ctx, modulePath, ds.opts.ProxyClientForLatest, nil); err == nil {
lmv.PopulateModuleInfo(&m.ModuleInfo)
}
}
// Populate unit subdirectories. When we use a database, this only happens when we read
// a unit from the DB.
if m != nil {
for _, u := range m.Units {
ds.populateUnitSubdirectories(u, m)
}
}
// Cache both successes and failures, but not cancellations.
if !errors.Is(err, context.Canceled) {
ds.cachePut(g, modulePath, vers, m, err)
// Cache the resolved version of "latest" too. A useful optimization
// because the frontend redirects "latest", resulting in another fetch.
if m != nil && vers == version.Latest {
ds.cachePut(g, modulePath, m.Version, m, err)
}
}
return m, err
}
// fetch fetches a module using the configured ModuleGetters.
// It tries each getter in turn until it finds one that has the module.
func (ds *FetchDataSource) fetch(ctx context.Context, modulePath, version string) (_ *internal.Module, g fetch.ModuleGetter, err error) {
log.Infof(ctx, "FetchDataSource: fetching %s@%s", modulePath, version)
start := time.Now()
defer func() {
log.Infof(ctx, "FetchDataSource: fetched %s@%s using %T in %s with error %v", modulePath, version, g, time.Since(start), err)
}()
for _, g := range ds.opts.Getters {
fr := fetch.FetchModule(ctx, modulePath, version, g)
if fr.Error == nil {
m := fr.Module
if ds.opts.BypassLicenseCheck {
m.IsRedistributable = true
for _, unit := range m.Units {
unit.IsRedistributable = true
}
} else {
m.RemoveNonRedistributableData()
}
// There is special handling in FetchModule for the standard library,
// that bypasses the getter g. Don't record g as having fetch std.
//
// TODO(rfindley): it would be cleaner if the standard library could be
// its own module getter. This could also allow the go/packages getter to
// serve existing on-disk content for std. See also golang/go#58923.
if modulePath == stdlib.ModulePath {
g = nil
}
return m, g, nil
}
if !errors.Is(fr.Error, derrors.NotFound) {
return nil, g, fr.Error
}
}
return nil, nil, fmt.Errorf("%s@%s: %w", modulePath, version, derrors.NotFound)
}
func (ds *FetchDataSource) populateUnitSubdirectories(u *internal.Unit, m *internal.Module) {
p := u.Path + "/"
for _, u2 := range m.Units {
if strings.HasPrefix(u2.Path, p) {
var syn string
if len(u2.Documentation) > 0 {
syn = u2.Documentation[0].Synopsis
}
u.Subdirectories = append(u.Subdirectories, &internal.PackageMeta{
Path: u2.Path,
Name: u2.Name,
Synopsis: syn,
IsRedistributable: u2.IsRedistributable,
Licenses: u2.Licenses,
})
}
}
}
// findModule finds the module with longest module path containing the given
// package path. It returns an error if no module is found.
func (ds *FetchDataSource) findModule(ctx context.Context, pkgPath, modulePath, version string) (_ *internal.Module, err error) {
defer derrors.Wrap(&err, "FetchDataSource.findModule(%q, %q, %q)", pkgPath, modulePath, version)
if modulePath != internal.UnknownModulePath {
return ds.getModule(ctx, modulePath, version)
}
pkgPath = strings.TrimLeft(pkgPath, "/")
for _, modulePath := range internal.CandidateModulePaths(pkgPath) {
m, err := ds.getModule(ctx, modulePath, version)
if err == nil {
return m, nil
}
if !errors.Is(err, derrors.NotFound) {
return nil, err
}
}
return nil, fmt.Errorf("could not find module for import path %s: %w", pkgPath, derrors.NotFound)
}
// GetUnitMeta returns information about a path.
func (ds *FetchDataSource) GetUnitMeta(ctx context.Context, path, requestedModulePath, requestedVersion string) (_ *internal.UnitMeta, err error) {
defer derrors.Wrap(&err, "FetchDataSource.GetUnitMeta(%q, %q, %q)", path, requestedModulePath, requestedVersion)
module, err := ds.findModule(ctx, path, requestedModulePath, requestedVersion)
if err != nil {
return nil, err
}
um := &internal.UnitMeta{
Path: path,
ModuleInfo: module.ModuleInfo,
}
u := findUnit(module, path)
if u == nil {
return nil, derrors.NotFound
}
um.Name = u.Name
um.IsRedistributable = u.IsRedistributable
return um, nil
}
// GetUnit returns information about a unit. Both the module path and package
// path must be known.
func (ds *FetchDataSource) GetUnit(ctx context.Context, um *internal.UnitMeta, fields internal.FieldSet, bc internal.BuildContext) (_ *internal.Unit, err error) {
defer derrors.Wrap(&err, "FetchDataSource.GetUnit(%q, %q)", um.Path, um.ModulePath)
m, err := ds.getModule(ctx, um.ModulePath, um.Version)
if err != nil {
return nil, err
}
u := findUnit(m, um.Path)
if u == nil {
return nil, fmt.Errorf("import path %s not found in module %s: %w", um.Path, um.ModulePath, derrors.NotFound)
}
// Return only the Documentation matching the given BuildContext, if any.
// Since we cache the module and its units, we have to copy this unit before we modify it.
// It can be a shallow copy, since we're only modifying the Unit.Documentation field.
u2 := *u
if d := matchingDoc(u.Documentation, bc); d != nil {
u2.Documentation = []*internal.Documentation{d}
} else {
u2.Documentation = nil
}
return &u2, nil
}
// findUnit returns the unit with the given path in m, or nil if none.
func findUnit(m *internal.Module, path string) *internal.Unit {
for _, u := range m.Units {
if u.Path == path {
return u
}
}
return nil
}
// matchingDoc returns the Documentation that matches the given build context
// and comes earliest in build-context order. It returns nil if there is none.
func matchingDoc(docs []*internal.Documentation, bc internal.BuildContext) *internal.Documentation {
var (
dMin *internal.Documentation
bcMin = internal.BuildContext{GOOS: "unk", GOARCH: "unk"} // sorts last
)
for _, d := range docs {
dbc := d.BuildContext()
if bc.Match(dbc) && internal.CompareBuildContexts(dbc, bcMin) < 0 {
dMin = d
bcMin = dbc
}
}
return dMin
}
// GetLatestInfo returns latest information for unitPath and modulePath.
func (ds *FetchDataSource) GetLatestInfo(ctx context.Context, unitPath, modulePath string, latestUnitMeta *internal.UnitMeta) (latest internal.LatestInfo, err error) {
defer derrors.Wrap(&err, "FetchDataSource.GetLatestInfo(ctx, %q, %q)", unitPath, modulePath)
if ds.opts.ProxyClientForLatest == nil {
return internal.LatestInfo{}, nil
}
if latestUnitMeta == nil {
latestUnitMeta, err = ds.GetUnitMeta(ctx, unitPath, modulePath, version.Latest)
if err != nil {
return latest, err
}
}
latest.MinorVersion = latestUnitMeta.Version
latest.MinorModulePath = latestUnitMeta.ModulePath
latest.MajorModulePath, latest.MajorUnitPath, err = ds.getLatestMajorVersion(ctx, unitPath, modulePath)
if err != nil {
return latest, err
}
// Do not try to discover whether the unit is in the latest minor version; assume it is.
latest.UnitExistsAtMinor = true
return latest, nil
}
// getLatestMajorVersion returns the latest module path and the full package path
// of the latest version found in the proxy by iterating through vN versions.
// This function does not attempt to find whether the full path exists
// in the new major version.
func (ds *FetchDataSource) getLatestMajorVersion(ctx context.Context, fullPath, modulePath string) (_ string, _ string, err error) {
// We are checking if the full path is valid so that we can forward the error if not.
seriesPath := internal.SeriesPathForModule(modulePath)
info, err := ds.opts.ProxyClientForLatest.Info(ctx, seriesPath, version.Latest)
if err != nil {
return "", "", err
}
// Converting version numbers to integers may cause an overflow, as version
// numbers need not fit into machine integers.
// While using Atoi is wrong, for it to fail, the version number must reach a
// value higher than at least 2^31, which is unlikely.
startVersion, err := strconv.Atoi(strings.TrimPrefix(semver.Major(info.Version), "v"))
if err != nil {
return "", "", err
}
startVersion++
// We start checking versions from "/v2" or higher, since v1 and v0 versions
// don't have a major version at the end of the modulepath.
if startVersion < 2 {
startVersion = 2
}
for v := startVersion; ; v++ {
query := fmt.Sprintf("%s/v%d", seriesPath, v)
_, err := ds.opts.ProxyClientForLatest.Info(ctx, query, version.Latest)
if errors.Is(err, derrors.NotFound) {
if v == 2 {
return modulePath, fullPath, nil
}
latestModulePath := fmt.Sprintf("%s/v%d", seriesPath, v-1)
return latestModulePath, latestModulePath, nil
}
if err != nil {
return "", "", err
}
}
}
// GetNestedModules is not implemented.
func (ds *FetchDataSource) GetNestedModules(ctx context.Context, modulePath string) ([]*internal.ModuleInfo, error) {
return nil, nil
}
// GetModuleReadme is not implemented.
func (*FetchDataSource) GetModuleReadme(ctx context.Context, modulePath, resolvedVersion string) (*internal.Readme, error) {
return nil, nil
}
// SupportsSearch reports whether any of the configured Getters are searchable.
func (ds *FetchDataSource) SearchSupport() internal.SearchSupport {
for _, g := range ds.opts.Getters {
if _, ok := g.(fetch.SearchableModuleGetter); ok {
// Getters only support basic search.
return internal.BasicSearch
}
}
return internal.NoSearch
}
// Search delegates search to any configured getters that support the
// SearchableModuleGetter interface, merging their results.
func (ds *FetchDataSource) Search(ctx context.Context, q string, opts internal.SearchOptions) (_ []*internal.SearchResult, err error) {
var results []*internal.SearchResult
// Since results are potentially merged from multiple sources, we can't know
// a priori how many results will be used from any particular getter.
//
// Offset+MaxResults is an upper bound.
limit := opts.Offset + opts.MaxResults
for _, g := range ds.opts.Getters {
if s, ok := g.(fetch.SearchableModuleGetter); ok {
rs, err := s.Search(ctx, q, limit)
if err != nil {
return nil, err
}
results = append(results, rs...)
}
}
sort.Slice(results, func(i, j int) bool {
return results[i].Score > results[j].Score
})
if opts.Offset > 0 {
if len(results) < opts.Offset {
return nil, nil
}
results = results[opts.Offset:]
}
if opts.MaxResults > 0 && len(results) > opts.MaxResults {
results = results[:opts.MaxResults]
}
return results, nil
}