internal/worker: scan modules for vulnerabilities

This is the first CL for a new task for the vuln worker: to scan
a selected set of modules for vulnerabilities.

Establish a new server endpoint, /scan-modules, to do that.
Currently visiting that endpoint scans the list of modules unconditionally.
A future CL will skip the scan if the vuln DB hasn't changed.

Hardcode a list of modules in the golang.org/x namespace.
Fetch each one from the proxy, and run vulncheck on it.

At present we just log any vulnerabilities we find. Later we'll
file issues to a GitHub repo.

Lastly, change the base image for the service to one that
has the go toolchain, since go/packages requires it.

Change-Id: I1de571d24d683b080542c5c40b55767967dbe8a5
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/393174
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/cmd/worker/Dockerfile b/cmd/worker/Dockerfile
index 07a9308..8632d90 100644
--- a/cmd/worker/Dockerfile
+++ b/cmd/worker/Dockerfile
@@ -28,7 +28,7 @@
 RUN go build -mod=readonly ./cmd/worker
 
 ################################################################
-FROM debian:stable-slim
+FROM golang:1.17.3
 
 LABEL maintainer="Go VulnDB Team <go-vulndb-team@google.com>"
 
diff --git a/go.mod b/go.mod
index 07d2807..8e3d688 100644
--- a/go.mod
+++ b/go.mod
@@ -27,6 +27,7 @@
 	golang.org/x/exp/event v0.0.0-20220218215828-6cf2b201936e
 	golang.org/x/exp/vulncheck v0.0.0-20220128181451-c853b6ddb95e
 	golang.org/x/mod v0.6.0-dev.0.20211013180041-c96bc1413d57
+	golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd
 	golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8
 	golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
 	golang.org/x/time v0.0.0-20191024005414-555d28b269f0
@@ -68,7 +69,6 @@
 	go.opentelemetry.io/otel/sdk/metric v0.26.0 // indirect
 	go.opentelemetry.io/otel/trace v1.4.0 // indirect
 	golang.org/x/crypto v0.0.0-20210921155107-089bfa567519 // indirect
-	golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect
 	golang.org/x/sys v0.0.0-20220207234003-57398862261d // indirect
 	golang.org/x/text v0.3.7 // indirect
 	google.golang.org/appengine v1.6.7 // indirect
diff --git a/internal/worker/module_proxy.go b/internal/worker/module_proxy.go
new file mode 100644
index 0000000..5c94bfd
--- /dev/null
+++ b/internal/worker/module_proxy.go
@@ -0,0 +1,93 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"archive/zip"
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"io"
+	"net/http"
+	"sort"
+	"strings"
+	"time"
+
+	"golang.org/x/mod/module"
+	"golang.org/x/mod/semver"
+	"golang.org/x/net/context/ctxhttp"
+)
+
+// Convenience functions for accessing the Go module proxy.
+
+const proxyURL = "https://proxy.golang.org"
+
+// latestVersion returns the version of modulePath provided by the proxy's "@latest"
+// endpoint.
+func latestVersion(ctx context.Context, modulePath string) (string, error) {
+	body, err := proxyRequest(ctx, modulePath, "/@latest")
+	if err != nil {
+		return "", err
+	}
+
+	var info struct {
+		Version string
+		Time    time.Time
+	}
+	if err := json.Unmarshal(body, &info); err != nil {
+		return "", err
+	}
+	return info.Version, nil
+}
+
+// latestTaggedVersion returns the latest (largest in the semver sense) tagged
+// version of modulePath, as determined by the module proxy's "list" endpoint.
+// It returns ("", nil) if there are no tagged versions.
+func latestTaggedVersion(ctx context.Context, modulePath string) (string, error) {
+	body, err := proxyRequest(ctx, modulePath, "/@v/list")
+	if err != nil {
+		return "", err
+	}
+	vs := strings.Split(string(bytes.TrimSpace(body)), "\n")
+	if len(vs) == 0 {
+		return "", nil
+	}
+	sort.Slice(vs, func(i, j int) bool { return semver.Compare(vs[i], vs[j]) > 0 })
+	return vs[0], nil
+}
+
+func moduleZip(ctx context.Context, modulePath, version string) (*zip.Reader, error) {
+	ev, err := module.EscapeVersion(version)
+	if err != nil {
+		return nil, err
+	}
+	body, err := proxyRequest(ctx, modulePath, fmt.Sprintf("/@v/%s.zip", ev))
+	if err != nil {
+		return nil, err
+	}
+	return zip.NewReader(bytes.NewReader(body), int64(len(body)))
+}
+
+func proxyRequest(ctx context.Context, modulePath, suffix string) ([]byte, error) {
+	ep, err := module.EscapePath(modulePath)
+	if err != nil {
+		return nil, fmt.Errorf("module path %v: %w", modulePath, err)
+	}
+	url := fmt.Sprintf("%s/%s%s", proxyURL, ep, suffix)
+	req, err := http.NewRequest("GET", url, nil)
+	if err != nil {
+		return nil, err
+	}
+	res, err := ctxhttp.Do(ctx, http.DefaultClient, req)
+	if err != nil {
+		return nil, err
+	}
+	defer res.Body.Close()
+	if res.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("%s returned status %d", url, res.StatusCode)
+	}
+	return io.ReadAll(res.Body)
+}
diff --git a/internal/worker/module_proxy_test.go b/internal/worker/module_proxy_test.go
new file mode 100644
index 0000000..c820500
--- /dev/null
+++ b/internal/worker/module_proxy_test.go
@@ -0,0 +1,54 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"context"
+	"testing"
+
+	"golang.org/x/mod/semver"
+)
+
+func TestLatestVersion(t *testing.T) {
+	got, err := latestVersion(context.Background(), "golang.org/x/build")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !semver.IsValid(got) {
+		t.Errorf("got invalid version %q", got)
+	}
+}
+
+func TestLatestTaggedVersion(t *testing.T) {
+	got, err := latestTaggedVersion(context.Background(), "golang.org/x/build")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got != "" {
+		t.Errorf(`got %q, wanted ""`, got)
+	}
+
+	got, err = latestTaggedVersion(context.Background(), "golang.org/x/tools")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if !semver.IsValid(got) {
+		t.Errorf("got invalid version %q", got)
+	}
+
+}
+
+func TestModuleZip(t *testing.T) {
+	ctx := context.Background()
+	const m = "golang.org/x/time"
+	v, err := latestVersion(ctx, m)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = moduleZip(ctx, m, v)
+	if err != nil {
+		t.Fatal(err)
+	}
+}
diff --git a/internal/worker/scan_modules.go b/internal/worker/scan_modules.go
new file mode 100644
index 0000000..b5384d4
--- /dev/null
+++ b/internal/worker/scan_modules.go
@@ -0,0 +1,167 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"archive/zip"
+	"context"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"golang.org/x/exp/vulncheck"
+	"golang.org/x/tools/go/packages"
+	vulnc "golang.org/x/vuln/client"
+	"golang.org/x/vulndb/internal/derrors"
+	"golang.org/x/vulndb/internal/worker/log"
+)
+
+// Selected repos under golang.org/x.
+var modulesToScan = []string{
+	"golang.org/x/build", "golang.org/x/crypto", "golang.org/x/exp/event", "golang.org/x/exp/vulncheck",
+	"golang.org/x/image", "golang.org/x/mod", "golang.org/x/net", "golang.org/x/oauth2",
+	//"golang.org/x/pkgsite", requires 1.18-aware tools
+	"golang.org/x/playground", "golang.org/x/review", "golang.org/x/sync",
+	"golang.org/x/sys", "golang.org/x/term", "golang.org/x/text", "golang.org/x/time",
+	"golang.org/x/tools",
+	// "golang.org/x/tools/gopls", requires 1.18-aware tools
+	"golang.org/x/vuln", "golang.org/x/vulndb", "golang.org/x/website",
+}
+
+// scanModules scans a list of Go modules for vulnerabilities.
+// It assumes the root of each repo is a module, and there are no nested modules.
+func scanModules(ctx context.Context) error {
+	dbClient, err := vulnc.NewClient([]string{vulnDBURL}, vulnc.Options{})
+	if err != nil {
+		return err
+	}
+	for _, modulePath := range modulesToScan {
+		// Scan the latest version, and the latest tagged version (if they differ).
+		latest, err := latestVersion(ctx, modulePath)
+		if err != nil {
+			return err
+		}
+		if err := processModule(ctx, modulePath, latest, dbClient); err != nil {
+			return err
+		}
+		latestTagged, err := latestTaggedVersion(ctx, modulePath)
+		if err != nil {
+			return err
+		}
+		if latestTagged != "" && latestTagged != latest {
+			if err := processModule(ctx, modulePath, latestTagged, dbClient); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+func processModule(ctx context.Context, modulePath, version string, dbClient vulnc.Client) error {
+	res, err := scanModule(ctx, modulePath, version, dbClient)
+	if err != nil {
+		return err
+	}
+	log.Infof(ctx, "%s@%s has %d vulns", modulePath, version, len(res.Vulns))
+	for _, v := range res.Vulns {
+		log.Warningf(ctx, "module %s@%s is vulnerable to %s: package %s, symbol %s",
+			modulePath, version, v.OSV.ID, v.PkgPath, v.Symbol)
+	}
+	return nil
+}
+
+// scanRepo clones the given repo and analyzes it for vulnerabilities. If commit
+// is "HEAD", the head commit is scanned. Otherwise, commit must be a hex string
+// corresponding to a commit, and that commit is checked out and scanned.
+func scanModule(ctx context.Context, modulePath, version string, dbClient vulnc.Client) (_ *vulncheck.Result, err error) {
+	defer derrors.Wrap(&err, "scanModule(%q, %q)", modulePath, version)
+
+	start := time.Now()
+	log.Infof(ctx, "scanning %s@%s", modulePath, version)
+	defer func() { log.Infof(ctx, "scanned %s@%s in %.1fs", modulePath, version, time.Since(start).Seconds()) }()
+
+	dir, err := os.MkdirTemp("", "scanModule")
+	if err != nil {
+		return nil, err
+	}
+
+	defer func() {
+		err1 := os.RemoveAll(dir)
+		if err == nil {
+			err = err1
+		}
+	}()
+
+	zipr, err := moduleZip(ctx, modulePath, version)
+	if err != nil {
+		return nil, err
+	}
+	if err := writeZip(zipr, dir, modulePath+"@"+version+"/"); err != nil {
+		return nil, err
+	}
+	log.Debugf(ctx, "fetched zip from proxy and unzipped")
+
+	cfg := &packages.Config{
+		Mode:  packages.NeedName | packages.NeedFiles | packages.NeedCompiledGoFiles | packages.NeedImports | packages.NeedTypes | packages.NeedTypesSizes | packages.NeedSyntax | packages.NeedTypesInfo | packages.NeedDeps | packages.NeedModule,
+		Tests: true,
+		Dir:   dir, // filepath.Join(dir, modulePath+"@"+version,
+	}
+	pkgs, err := loadPackages(cfg, []string{"./..."})
+	if err != nil {
+		return nil, err
+	}
+	log.Debugf(ctx, "loaded packages")
+	vcfg := &vulncheck.Config{Client: dbClient}
+	return vulncheck.Source(ctx, vulncheck.Convert(pkgs), vcfg)
+}
+
+func loadPackages(cfg *packages.Config, patterns []string) ([]*packages.Package, error) {
+	pkgs, err := packages.Load(cfg, patterns...)
+	if err != nil {
+		return nil, err
+	}
+	if packages.PrintErrors(pkgs) > 0 {
+		return nil, fmt.Errorf("packages contain errors")
+	}
+	return pkgs, nil
+}
+
+func writeZip(r *zip.Reader, destination, stripPrefix string) error {
+	for _, f := range r.File {
+		name := strings.TrimPrefix(f.Name, stripPrefix)
+		fpath := filepath.Join(destination, name)
+		if !strings.HasPrefix(fpath, filepath.Clean(destination)+string(os.PathSeparator)) {
+			return fmt.Errorf("%s is an illegal filepath", fpath)
+		}
+		if f.FileInfo().IsDir() {
+			os.MkdirAll(fpath, os.ModePerm)
+			continue
+		}
+		if err := os.MkdirAll(filepath.Dir(fpath), os.ModePerm); err != nil {
+			return err
+		}
+		outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
+		if err != nil {
+			return err
+		}
+		rc, err := f.Open()
+		if err != nil {
+			return err
+		}
+		if _, err := io.Copy(outFile, rc); err != nil {
+			return err
+		}
+		if err := outFile.Close(); err != nil {
+			return err
+		}
+		if err := rc.Close(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
diff --git a/internal/worker/scan_modules_test.go b/internal/worker/scan_modules_test.go
new file mode 100644
index 0000000..9cca86c
--- /dev/null
+++ b/internal/worker/scan_modules_test.go
@@ -0,0 +1,47 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"context"
+	"flag"
+	"os"
+	"testing"
+
+	"golang.org/x/exp/event"
+	vulnc "golang.org/x/vuln/client"
+	"golang.org/x/vulndb/internal/worker/log"
+)
+
+// TestScanModules is slow, so put it behind a flag.
+var runScanModulesTest = flag.Bool("scan", false, "run the ScanModules test")
+
+func TestScanModules(t *testing.T) {
+	if !*runScanModulesTest {
+		t.Skip("-scan flag missing")
+	}
+	// Verify only that scanRepos works (doesn't return an error).
+	ctx := event.WithExporter(context.Background(),
+		event.NewExporter(log.NewLineHandler(os.Stderr), nil))
+	if err := scanModules(ctx); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestScanModule(t *testing.T) {
+	ctx := event.WithExporter(context.Background(),
+		event.NewExporter(log.NewLineHandler(os.Stderr), nil))
+	dbClient, err := vulnc.NewClient([]string{vulnDBURL}, vulnc.Options{})
+	if err != nil {
+		t.Fatal(err)
+	}
+	got, err := scanModule(ctx, "golang.org/x/mod", "v0.5.1", dbClient)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got, want := len(got.Vulns), 0; got != want {
+		t.Errorf("got %d vulns, want %d", got, want)
+	}
+}
diff --git a/internal/worker/server.go b/internal/worker/server.go
index de5d8c8..628ac7e 100644
--- a/internal/worker/server.go
+++ b/internal/worker/server.go
@@ -113,6 +113,8 @@
 	s.handle(ctx, "/issues", s.handleIssues)
 	// update-and-issues: do update followed by issues.
 	s.handle(ctx, "/update-and-issues", s.handleUpdateAndIssues)
+	// scan-repos: scan various modules for vulnerabilities
+	s.handle(ctx, "/scan-modules", s.handleScanModules)
 	return s, nil
 }
 
@@ -372,6 +374,10 @@
 	return s.handleIssues(w, r)
 }
 
+func (s *Server) handleScanModules(w http.ResponseWriter, r *http.Request) error {
+	return scanModules(r.Context())
+}
+
 func initOpenTelemetry(projectID string) (tp *sdktrace.TracerProvider, mp metric.MeterProvider, err error) {
 	defer derrors.Wrap(&err, "initOpenTelemetry(%q)", projectID)