internal/datasource: factor out cache

Begin factoring out common pieces of the two datasource
implementations by implementing a cache of modules.

We expect that ultimately, we can combine the two into a single
implementation, but we'll get there incrementally.

For golang/go#47780

Change-Id: Ifb17137e20fbf7602d052cd7e1c87fbe06dc4fe9
Reviewed-on: https://go-review.googlesource.com/c/pkgsite/+/344589
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: kokoro <noreply+kokoro@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/datasource/datasource.go b/internal/datasource/datasource.go
new file mode 100644
index 0000000..a8c966e
--- /dev/null
+++ b/internal/datasource/datasource.go
@@ -0,0 +1,60 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package datasource provides internal.DataSource implementations backed solely
+// by a proxy instance, and backed by the local filesystem.
+// Search and other tabs are not supported by these implementations.
+package datasource
+
+import (
+	"sync"
+
+	"golang.org/x/pkgsite/internal"
+	"golang.org/x/pkgsite/internal/fetch"
+	"golang.org/x/pkgsite/internal/source"
+)
+
+// dataSource implements the internal.DataSource interface, by trying a list of
+// fetch.ModuleGetters to fetch modules and caching the results.
+type dataSource struct {
+	sourceClient *source.Client
+
+	mu    sync.Mutex
+	cache map[internal.Modver]cacheEntry
+}
+
+// cacheEntry holds a fetched module or an error, if the fetch failed.
+type cacheEntry struct {
+	module *internal.Module
+	err    error
+}
+
+func newDataSource(sc *source.Client) *dataSource {
+	return &dataSource{
+		sourceClient: sc,
+		cache:        map[internal.Modver]cacheEntry{},
+	}
+}
+
+// cacheGet returns information from the cache if it is present, and (nil, nil) otherwise.
+func (ds *dataSource) cacheGet(path, version string) (*internal.Module, error) {
+	ds.mu.Lock()
+	defer ds.mu.Unlock()
+	// Look for an exact match first.
+	if e, ok := ds.cache[internal.Modver{Path: path, Version: version}]; ok {
+		return e.module, e.err
+	}
+	// Look for the module path with LocalVersion, as for a directory-based or GOPATH-mode module.
+	if e, ok := ds.cache[internal.Modver{Path: path, Version: fetch.LocalVersion}]; ok {
+		return e.module, e.err
+	}
+	return nil, nil
+}
+
+// cachePut puts information into the cache.
+func (ds *dataSource) cachePut(path, version string, m *internal.Module, err error) {
+	ds.mu.Lock()
+	defer ds.mu.Unlock()
+	ds.cache[internal.Modver{Path: path, Version: version}] = cacheEntry{m, err}
+}
diff --git a/internal/datasource/datasource_test.go b/internal/datasource/datasource_test.go
new file mode 100644
index 0000000..feadae6
--- /dev/null
+++ b/internal/datasource/datasource_test.go
@@ -0,0 +1,36 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package datasource
+
+import (
+	"testing"
+
+	"golang.org/x/pkgsite/internal"
+	"golang.org/x/pkgsite/internal/derrors"
+	"golang.org/x/pkgsite/internal/fetch"
+)
+
+func TestCache(t *testing.T) {
+	ds := newDataSource(nil)
+	m1 := &internal.Module{}
+	ds.cachePut("m1", fetch.LocalVersion, m1, nil)
+	ds.cachePut("m2", "v1.0.0", nil, derrors.NotFound)
+
+	for _, test := range []struct {
+		path, version string
+		wantm         *internal.Module
+		wante         error
+	}{
+		{"m1", fetch.LocalVersion, m1, nil},
+		{"m1", "v1.2.3", m1, nil}, // find m1 under LocalVersion
+		{"m2", "v1.0.0", nil, derrors.NotFound},
+		{"m3", "v1.0.0", nil, nil},
+	} {
+		gotm, gote := ds.cacheGet(test.path, test.version)
+		if gotm != test.wantm || gote != test.wante {
+			t.Errorf("%s@%s: got (%v, %v), want (%v, %v)", test.path, test.version, gotm, gote, test.wantm, test.wante)
+		}
+	}
+}
diff --git a/internal/datasource/local.go b/internal/datasource/local.go
index da0eada..5c1eda8 100644
--- a/internal/datasource/local.go
+++ b/internal/datasource/local.go
@@ -26,17 +26,17 @@
 type LocalDataSource struct {
 	sourceClient *source.Client
 
-	mu            sync.Mutex
-	getters       []fetch.ModuleGetter
-	loadedModules map[string]*internal.Module
+	mu      sync.Mutex
+	getters []fetch.ModuleGetter
+	ds      *dataSource
 }
 
 // New creates and returns a new local datasource that bypasses license
 // checks by default.
 func NewLocal(sc *source.Client) *LocalDataSource {
 	return &LocalDataSource{
-		sourceClient:  sc,
-		loadedModules: make(map[string]*internal.Module),
+		sourceClient: sc,
+		ds:           newDataSource(sc),
 	}
 }
 
@@ -52,29 +52,13 @@
 // getModule gets the module at the given path and version. It first checks the
 // cache, and if it isn't there it then tries to fetch it.
 func (ds *LocalDataSource) getModule(ctx context.Context, path, version string) (*internal.Module, error) {
-	if m := ds.getFromCache(path, version); m != nil {
-		return m, nil
+	m, err := ds.ds.cacheGet(path, version)
+	if m != nil || err != nil {
+		return m, err
 	}
-	m, err := ds.fetch(ctx, path, version)
-	if err != nil {
-		return nil, err
-	}
-	ds.mu.Lock()
-	defer ds.mu.Unlock()
-	ds.loadedModules[m.ModulePath+"@"+m.Version] = m
-	return m, nil
-}
-
-// getFromCache returns a module from the cache if it is present, and nil otherwise.
-func (ds *LocalDataSource) getFromCache(path, version string) *internal.Module {
-	ds.mu.Lock()
-	defer ds.mu.Unlock()
-	// Look for an exact match first.
-	if m := ds.loadedModules[path+"@"+version]; m != nil {
-		return m
-	}
-	// Look for the module path with LocalVersion, as for a directory-based or GOPATH-mode module.
-	return ds.loadedModules[path+"@"+fetch.LocalVersion]
+	m, err = ds.fetch(ctx, path, version)
+	ds.ds.cachePut(path, version, m, err)
+	return m, err
 }
 
 // fetch fetches a module using the configured ModuleGetters.
diff --git a/internal/datasource/proxy.go b/internal/datasource/proxy.go
index f876233..451ab26 100644
--- a/internal/datasource/proxy.go
+++ b/internal/datasource/proxy.go
@@ -2,9 +2,6 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package datasource provides internal.DataSource implementations backed solely
-// by a proxy instance, and backed by the local filesystem.
-// Search and other tabs are not supported by these implementations.
 package datasource
 
 import (
@@ -39,9 +36,9 @@
 
 func newProxyDataSource(proxyClient *proxy.Client, sourceClient *source.Client) *ProxyDataSource {
 	return &ProxyDataSource{
-		proxyClient:          proxyClient,
-		sourceClient:         sourceClient,
-		versionCache:         make(map[versionKey]*versionEntry),
+		ds:          newDataSource(sourceClient),
+		proxyClient: proxyClient,
+
 		modulePathToVersions: make(map[string][]string),
 		packagePathToModules: make(map[string][]string),
 		bypassLicenseCheck:   false,
@@ -60,13 +57,12 @@
 // ProxyDataSource implements the frontend.DataSource interface, by querying a
 // module proxy directly and caching the results in memory.
 type ProxyDataSource struct {
-	proxyClient  *proxy.Client
-	sourceClient *source.Client
+	proxyClient *proxy.Client
 
+	mu sync.Mutex
+	ds *dataSource
 	// Use an extremely coarse lock for now - mu guards all maps below. The
 	// assumption is that this will only be used for local development.
-	mu           sync.RWMutex
-	versionCache map[versionKey]*versionEntry
 	// map of modulePath -> versions, with versions sorted in semver order
 	modulePathToVersions map[string][]string
 	// map of package path -> modules paths containing it, with module paths
@@ -75,28 +71,19 @@
 	bypassLicenseCheck   bool
 }
 
-type versionKey struct {
-	modulePath, version string
-}
-
-// versionEntry holds the result of a call to worker.FetchModule.
-type versionEntry struct {
-	module *internal.Module
-	err    error
-}
-
 // getModule retrieves a version from the cache, or failing that queries and
 // processes the version from the proxy.
 func (ds *ProxyDataSource) getModule(ctx context.Context, modulePath, version string, _ internal.BuildContext) (_ *internal.Module, err error) {
 	defer derrors.Wrap(&err, "getModule(%q, %q)", modulePath, version)
 
-	key := versionKey{modulePath, version}
 	ds.mu.Lock()
 	defer ds.mu.Unlock()
-	if e, ok := ds.versionCache[key]; ok {
-		return e.module, e.err
+
+	mod, err := ds.ds.cacheGet(modulePath, version)
+	if mod != nil || err != nil {
+		return mod, err
 	}
-	res := fetch.FetchModule(ctx, modulePath, version, fetch.NewProxyModuleGetter(ds.proxyClient), ds.sourceClient)
+	res := fetch.FetchModule(ctx, modulePath, version, fetch.NewProxyModuleGetter(ds.proxyClient), ds.ds.sourceClient)
 	defer res.Defer()
 	m := res.Module
 	if m != nil {
@@ -121,12 +108,11 @@
 
 	if res.Error != nil {
 		if !errors.Is(ctx.Err(), context.Canceled) {
-			ds.versionCache[key] = &versionEntry{module: m, err: res.Error}
+			ds.ds.cachePut(modulePath, version, m, res.Error)
 		}
 		return nil, res.Error
 	}
-
-	ds.versionCache[key] = &versionEntry{module: m, err: err}
+	ds.ds.cachePut(modulePath, version, m, err)
 
 	// Since we hold the lock and missed the cache, we can assume that we have
 	// never seen this module version. Therefore the following insert-and-sort