| // Copyright 2021 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package fetchdatasource provides an internal.DataSource implementation |
| // that fetches modules (rather than reading them from a database). |
| // Search and other tabs are not supported. |
| package fetchdatasource |
| |
| import ( |
| "context" |
| "errors" |
| "fmt" |
| "sort" |
| "strconv" |
| "strings" |
| "time" |
| |
| "golang.org/x/mod/semver" |
| "golang.org/x/pkgsite/internal" |
| "golang.org/x/pkgsite/internal/derrors" |
| "golang.org/x/pkgsite/internal/fetch" |
| "golang.org/x/pkgsite/internal/log" |
| "golang.org/x/pkgsite/internal/lru" |
| "golang.org/x/pkgsite/internal/proxy" |
| "golang.org/x/pkgsite/internal/version" |
| ) |
| |
| // FetchDataSource implements the internal.DataSource interface, by trying a list of |
| // fetch.ModuleGetters to fetch modules and caching the results. |
| type FetchDataSource struct { |
| opts Options |
| cache *lru.Cache[internal.Modver, cacheEntry] |
| } |
| |
| // Options are parameters for creating a new FetchDataSource. |
| type Options struct { |
| // List of getters to try, in order. |
| Getters []fetch.ModuleGetter |
| // If set, this will be used for latest-version information. To fetch modules from the proxy, |
| // include a ProxyModuleGetter in Getters. |
| ProxyClientForLatest *proxy.Client |
| BypassLicenseCheck bool |
| } |
| |
| // New creates a new FetchDataSource from the options. |
| func (o Options) New() *FetchDataSource { |
| cache := lru.New[internal.Modver, cacheEntry](maxCachedModules) |
| |
| opts := o |
| // Copy getters slice so caller doesn't modify us. |
| opts.Getters = make([]fetch.ModuleGetter, len(opts.Getters)) |
| copy(opts.Getters, o.Getters) |
| return &FetchDataSource{ |
| opts: opts, |
| cache: cache, |
| } |
| } |
| |
| // cacheEntry holds a fetched module or an error, if the fetch failed. |
| type cacheEntry struct { |
| g fetch.ModuleGetter |
| module *fetch.LazyModule |
| err error |
| } |
| |
| const maxCachedModules = 100 |
| |
| // cacheGet returns information from the cache if it is present, and (nil, nil) otherwise. |
| func (ds *FetchDataSource) cacheGet(path, version string) (fetch.ModuleGetter, *fetch.LazyModule, error) { |
| // Look for an exact match first, then use LocalVersion, as for a |
| // directory-based or GOPATH-mode module. |
| for _, v := range []string{version, fetch.LocalVersion} { |
| if e, ok := ds.cache.Get(internal.Modver{Path: path, Version: v}); ok { |
| return e.g, e.module, e.err |
| } |
| } |
| return nil, nil, nil |
| } |
| |
| // cachePut puts information into the cache. |
| func (ds *FetchDataSource) cachePut(g fetch.ModuleGetter, path, version string, m *fetch.LazyModule, err error) { |
| ds.cache.Put(internal.Modver{Path: path, Version: version}, cacheEntry{g, m, err}) |
| } |
| |
| // getModule gets the module at the given path and version. It first checks the |
| // cache, and if it isn't there it then tries to fetch it. |
| func (ds *FetchDataSource) getModule(ctx context.Context, modulePath, vers string) (_ *fetch.LazyModule, err error) { |
| defer derrors.Wrap(&err, "FetchDataSource.getModule(%q, %q)", modulePath, vers) |
| |
| g, mod, err := ds.cacheGet(modulePath, vers) |
| if err != nil { |
| return nil, err |
| } |
| if mod != nil { |
| // For getters supporting invalidation, check whether cached contents have |
| // changed. |
| v, ok := g.(fetch.VolatileModuleGetter) |
| if !ok { |
| return mod, nil |
| } |
| hasChanged, err := v.HasChanged(ctx, mod.ModuleInfo) |
| if err != nil { |
| return nil, err |
| } |
| if !hasChanged { |
| return mod, nil |
| } |
| } |
| |
| // There can be a benign race here, where two goroutines both fetch the same |
| // module. At worst some work will be duplicated, but if that turns out to |
| // be a problem we could use golang.org/x/sync/singleflight. |
| m, g, err := ds.fetch(ctx, modulePath, vers) |
| if m != nil && ds.opts.ProxyClientForLatest != nil { |
| // Use the go.mod file at the raw latest version to fill in deprecation |
| // and retraction information. Ignore any problems getting the |
| // information, because we may be trying to do this for a local module |
| // that the proxy doesn't know about. |
| if lmv, err := fetch.LatestModuleVersions(ctx, modulePath, ds.opts.ProxyClientForLatest, nil); err == nil { |
| lmv.PopulateModuleInfo(&m.ModuleInfo) |
| } |
| } |
| |
| // Cache both successes and failures, but not cancellations. |
| if !errors.Is(err, context.Canceled) { |
| ds.cachePut(g, modulePath, vers, m, err) |
| // Cache the resolved version of "latest" too. A useful optimization |
| // because the frontend redirects "latest", resulting in another fetch. |
| if m != nil && vers == version.Latest { |
| ds.cachePut(g, modulePath, m.Version, m, err) |
| } |
| } |
| return m, err |
| } |
| |
| // fetch fetches a module using the configured ModuleGetters. |
| // It tries each getter in turn until it finds one that has the module. |
| func (ds *FetchDataSource) fetch(ctx context.Context, modulePath, version string) (_ *fetch.LazyModule, g fetch.ModuleGetter, err error) { |
| log.Infof(ctx, "FetchDataSource: fetching %s@%s", modulePath, version) |
| start := time.Now() |
| defer func() { |
| log.Infof(ctx, "FetchDataSource: fetched %s@%s using %T in %s with error %v", modulePath, version, g, time.Since(start), err) |
| }() |
| for _, g := range ds.opts.Getters { |
| m := fetch.FetchLazyModule(ctx, modulePath, version, g) |
| if m.Error == nil { |
| if ds.opts.BypassLicenseCheck { |
| m.IsRedistributable = true |
| } |
| return m, g, nil |
| } |
| if !errors.Is(m.Error, derrors.NotFound) { |
| return nil, g, m.Error |
| } |
| } |
| return nil, nil, fmt.Errorf("%s@%s: %w", modulePath, version, derrors.NotFound) |
| } |
| |
| func (ds *FetchDataSource) populateUnitSubdirectories(u *internal.Unit, m *fetch.LazyModule) { |
| p := u.Path + "/" |
| for _, u2 := range m.UnitMetas { |
| if strings.HasPrefix(u2.Path, p) || u.Path == "std" { |
| u.Subdirectories = append(u.Subdirectories, &internal.PackageMeta{ |
| Path: u2.Path, |
| Name: u2.Name, |
| // Syn, IsRedistributable, and Licences are not populated from FetchDataSource. |
| }) |
| } |
| } |
| } |
| |
| // findModule finds the module with longest module path containing the given |
| // package path. It returns an error if no module is found. |
| func (ds *FetchDataSource) findModule(ctx context.Context, pkgPath, modulePath, version string) (_ *fetch.LazyModule, err error) { |
| defer derrors.Wrap(&err, "FetchDataSource.findModule(%q, %q, %q)", pkgPath, modulePath, version) |
| |
| if modulePath != internal.UnknownModulePath { |
| return ds.getModule(ctx, modulePath, version) |
| } |
| pkgPath = strings.TrimLeft(pkgPath, "/") |
| for _, modulePath := range internal.CandidateModulePaths(pkgPath) { |
| m, err := ds.getModule(ctx, modulePath, version) |
| if err == nil { |
| return m, nil |
| } |
| if !errors.Is(err, derrors.NotFound) { |
| return nil, err |
| } |
| } |
| return nil, fmt.Errorf("could not find module for import path %s: %w", pkgPath, derrors.NotFound) |
| } |
| |
| // GetUnitMeta returns information about a path. |
| func (ds *FetchDataSource) GetUnitMeta(ctx context.Context, path, requestedModulePath, requestedVersion string) (_ *internal.UnitMeta, err error) { |
| defer derrors.Wrap(&err, "FetchDataSource.GetUnitMeta(%q, %q, %q)", path, requestedModulePath, requestedVersion) |
| |
| module, err := ds.findModule(ctx, path, requestedModulePath, requestedVersion) |
| if err != nil { |
| return nil, err |
| } |
| return findUnitMeta(module, path) |
| } |
| |
| // GetUnit returns information about a unit. Both the module path and package |
| // path must be known. |
| func (ds *FetchDataSource) GetUnit(ctx context.Context, um *internal.UnitMeta, fields internal.FieldSet, bc internal.BuildContext) (_ *internal.Unit, err error) { |
| defer derrors.Wrap(&err, "FetchDataSource.GetUnit(%q, %q)", um.Path, um.ModulePath) |
| |
| m, err := ds.getModule(ctx, um.ModulePath, um.Version) |
| if err != nil { |
| return nil, err |
| } |
| u, err := ds.findUnit(ctx, m, um.Path) |
| if u == nil { |
| return nil, fmt.Errorf("import path %s not found in module %s: %w", um.Path, um.ModulePath, derrors.NotFound) |
| } |
| // Return only the Documentation matching the given BuildContext, if any. |
| // Since we cache the module and its units, we have to copy this unit before we modify it. |
| // It can be a shallow copy, since we're only modifying the Unit.Documentation field. |
| u2 := *u |
| if d := matchingDoc(u.Documentation, bc); d != nil { |
| u2.Documentation = []*internal.Documentation{d} |
| } else { |
| u2.Documentation = nil |
| } |
| return &u2, nil |
| } |
| |
| // findUnit returns the unit with the given path in m, or nil if none. |
| func (ds *FetchDataSource) findUnit(ctx context.Context, m *fetch.LazyModule, path string) (*internal.Unit, error) { |
| unit, err := m.Unit(ctx, path) |
| ds.populateUnitSubdirectories(unit, m) |
| if err != nil { |
| return nil, err |
| } |
| if ds.opts.BypassLicenseCheck { |
| unit.IsRedistributable = true |
| } else { |
| unit.RemoveNonRedistributableData() |
| } |
| return unit, nil |
| } |
| |
| func findUnitMeta(m *fetch.LazyModule, path string) (*internal.UnitMeta, error) { |
| for _, um := range m.UnitMetas { |
| if um.Path == path { |
| return um, nil |
| } |
| } |
| return nil, derrors.NotFound |
| } |
| |
| // matchingDoc returns the Documentation that matches the given build context |
| // and comes earliest in build-context order. It returns nil if there is none. |
| func matchingDoc(docs []*internal.Documentation, bc internal.BuildContext) *internal.Documentation { |
| var ( |
| dMin *internal.Documentation |
| bcMin = internal.BuildContext{GOOS: "unk", GOARCH: "unk"} // sorts last |
| ) |
| for _, d := range docs { |
| dbc := d.BuildContext() |
| if bc.Match(dbc) && internal.CompareBuildContexts(dbc, bcMin) < 0 { |
| dMin = d |
| bcMin = dbc |
| } |
| } |
| return dMin |
| } |
| |
| // GetLatestInfo returns latest information for unitPath and modulePath. |
| func (ds *FetchDataSource) GetLatestInfo(ctx context.Context, unitPath, modulePath string, latestUnitMeta *internal.UnitMeta) (latest internal.LatestInfo, err error) { |
| defer derrors.Wrap(&err, "FetchDataSource.GetLatestInfo(ctx, %q, %q)", unitPath, modulePath) |
| |
| if ds.opts.ProxyClientForLatest == nil { |
| return internal.LatestInfo{}, nil |
| } |
| |
| if latestUnitMeta == nil { |
| latestUnitMeta, err = ds.GetUnitMeta(ctx, unitPath, modulePath, version.Latest) |
| if err != nil { |
| return latest, err |
| } |
| } |
| latest.MinorVersion = latestUnitMeta.Version |
| latest.MinorModulePath = latestUnitMeta.ModulePath |
| |
| latest.MajorModulePath, latest.MajorUnitPath, err = ds.getLatestMajorVersion(ctx, unitPath, modulePath) |
| if err != nil { |
| return latest, err |
| } |
| // Do not try to discover whether the unit is in the latest minor version; assume it is. |
| latest.UnitExistsAtMinor = true |
| return latest, nil |
| } |
| |
| // getLatestMajorVersion returns the latest module path and the full package path |
| // of the latest version found in the proxy by iterating through vN versions. |
| // This function does not attempt to find whether the full path exists |
| // in the new major version. |
| func (ds *FetchDataSource) getLatestMajorVersion(ctx context.Context, fullPath, modulePath string) (_ string, _ string, err error) { |
| // We are checking if the full path is valid so that we can forward the error if not. |
| seriesPath := internal.SeriesPathForModule(modulePath) |
| info, err := ds.opts.ProxyClientForLatest.Info(ctx, seriesPath, version.Latest) |
| if err != nil { |
| return "", "", err |
| } |
| |
| // Converting version numbers to integers may cause an overflow, as version |
| // numbers need not fit into machine integers. |
| // While using Atoi is wrong, for it to fail, the version number must reach a |
| // value higher than at least 2^31, which is unlikely. |
| startVersion, err := strconv.Atoi(strings.TrimPrefix(semver.Major(info.Version), "v")) |
| if err != nil { |
| return "", "", err |
| } |
| startVersion++ |
| |
| // We start checking versions from "/v2" or higher, since v1 and v0 versions |
| // don't have a major version at the end of the modulepath. |
| if startVersion < 2 { |
| startVersion = 2 |
| } |
| |
| for v := startVersion; ; v++ { |
| query := fmt.Sprintf("%s/v%d", seriesPath, v) |
| |
| _, err := ds.opts.ProxyClientForLatest.Info(ctx, query, version.Latest) |
| if errors.Is(err, derrors.NotFound) { |
| if v == 2 { |
| return modulePath, fullPath, nil |
| } |
| latestModulePath := fmt.Sprintf("%s/v%d", seriesPath, v-1) |
| return latestModulePath, latestModulePath, nil |
| } |
| if err != nil { |
| return "", "", err |
| } |
| } |
| } |
| |
| // GetNestedModules is not implemented. |
| func (ds *FetchDataSource) GetNestedModules(ctx context.Context, modulePath string) ([]*internal.ModuleInfo, error) { |
| return nil, nil |
| } |
| |
| // GetModuleReadme is not implemented. |
| func (*FetchDataSource) GetModuleReadme(ctx context.Context, modulePath, resolvedVersion string) (*internal.Readme, error) { |
| return nil, nil |
| } |
| |
| // SupportsSearch reports whether any of the configured Getters are searchable. |
| func (ds *FetchDataSource) SearchSupport() internal.SearchSupport { |
| for _, g := range ds.opts.Getters { |
| if _, ok := g.(fetch.SearchableModuleGetter); ok { |
| // Getters only support basic search. |
| return internal.BasicSearch |
| } |
| } |
| return internal.NoSearch |
| } |
| |
| // Search delegates search to any configured getters that support the |
| // SearchableModuleGetter interface, merging their results. |
| func (ds *FetchDataSource) Search(ctx context.Context, q string, opts internal.SearchOptions) (_ []*internal.SearchResult, err error) { |
| var results []*internal.SearchResult |
| // Since results are potentially merged from multiple sources, we can't know |
| // a priori how many results will be used from any particular getter. |
| // |
| // Offset+MaxResults is an upper bound. |
| limit := opts.Offset + opts.MaxResults |
| for _, g := range ds.opts.Getters { |
| if s, ok := g.(fetch.SearchableModuleGetter); ok { |
| rs, err := s.Search(ctx, q, limit) |
| if err != nil { |
| return nil, err |
| } |
| results = append(results, rs...) |
| } |
| } |
| sort.Slice(results, func(i, j int) bool { |
| return results[i].Score > results[j].Score |
| }) |
| if opts.Offset > 0 { |
| if len(results) < opts.Offset { |
| return nil, nil |
| } |
| results = results[opts.Offset:] |
| } |
| if opts.MaxResults > 0 && len(results) > opts.MaxResults { |
| results = results[:opts.MaxResults] |
| } |
| |
| return results, nil |
| } |