internal/worker/govulncheck_scan.go - pkgsite-metrics - Git at Google

 // Copyright 2022 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package worker

 import (
 	"context"
 	"errors"
 	"fmt"
 	"net/http"
 	"os"
 	"path/filepath"
 	"strings"

 	"cloud.google.com/go/storage"
 	"golang.org/x/exp/event"
 	"golang.org/x/pkgsite-metrics/internal/bigquery"
 	"golang.org/x/pkgsite-metrics/internal/derrors"
 	"golang.org/x/pkgsite-metrics/internal/fstore"
 	"golang.org/x/pkgsite-metrics/internal/govulncheck"
 	"golang.org/x/pkgsite-metrics/internal/govulncheckapi"
 	"golang.org/x/pkgsite-metrics/internal/log"
 	"golang.org/x/pkgsite-metrics/internal/proxy"
 	"golang.org/x/pkgsite-metrics/internal/sandbox"
 	"golang.org/x/pkgsite-metrics/internal/version"
 )

 const (
 	// ModeGovulncheck is an ecosystem metrics mode that runs the govulncheck
 	// binary in default (source) mode.
 	ModeGovulncheck = "GOVULNCHECK"

 	// ModeCompare is an ecosystem metrics mode that finds compilable binaries
 	// and runs govulncheck in both source and binary mode and reports results.
 	ModeCompare = "COMPARE"
 )

 // modes is a set of supported govulncheck ecosystem metrics modes.
 var modes = map[string]bool{
 	ModeGovulncheck: true,
 	ModeCompare:     true,
 }

 const (
 	// scanModeSourceSymbol is used to designate results at govulncheck source
 	// '-scan symbol' level of precision.
 	//
 	// Note that this is not an ecosystem metrics mode. Its value is "GOVULNCHECK"
 	// for historical reasons.
 	scanModeSourceSymbol = "GOVULNCHECK"

 	// scanModeSourcePackage is used to designate results at govulncheck source
 	// '-scan package' level of precision.
 	//
 	// Note that this is not an ecosystem metrics mode.
 	scanModeSourcePackage string = "IMPORTS"

 	// scanModeSourceModule is used to designate results at govulncheck source
 	// '-scan module' level of precision.
 	//
 	// Note that this is not an ecosystem metrics mode.
 	scanModeSourceModule string = "REQUIRES"

 	// scanModeCompareBinary is used to designate results for govulncheck
 	// binary (symbol) precision level in compare mode.
 	scanModeCompareBinary string = "COMPARE - BINARY"

 	// scanModeCompareSource is used to designate results for govulncheck
 	// source (symbol) precision level in compare mode.
 	scanModeCompareSource string = "COMPARE - SOURCE"

 	// sandboxGoCache is the location of the Go cache inside the sandbox. The
 	// user is root and their $HOME directory is /root. The Go cache resides
 	// in its default location, $HOME/.cache/go-build.
 	sandboxGoCache = "root/.cache/go-build"
 )

 var (
 	// gReqCounter counts requests to govulncheck handleScan
 	gReqCounter = event.NewCounter("govulncheck-requests", &event.MetricOptions{Namespace: metricNamespace})
 	// gSuccCounter counts successfully processed requests to govulncheck handleScan
 	gSuccCounter = event.NewCounter("govulncheck-requests-ok", &event.MetricOptions{Namespace: metricNamespace})
 	// gSkipCounter counts skipped requests to govulncheck handleScan
 	gSkipCounter = event.NewCounter("govulncheck-requests-skip", &event.MetricOptions{Namespace: metricNamespace})
 )

 // handleScan runs a govulncheck scan for a single input module. It is triggered
 // by path /govulncheck/scan/MODULE_VERSION_SUFFIX?params.
 //
 // See internal/govulncheck.ParseRequest for allowed path forms and query params.
 func (h *GovulncheckServer) handleScan(w http.ResponseWriter, r *http.Request) (err error) {
 	defer derrors.Wrap(&err, "handleScan")

 	// Collect basic metrics.
 	gReqCounter.Record(r.Context(), 1)
 	skip := false // request skipped
 	defer func() {
 		gSuccCounter.Record(r.Context(), 1, event.Bool("success", err == nil))
 		gSkipCounter.Record(r.Context(), 1, event.Bool("skipped", skip))
 	}()

 	ctx := r.Context()
 	sreq, err := govulncheck.ParseRequest(r, "/govulncheck/scan")
 	if err != nil {
 		return fmt.Errorf("%w: %v", derrors.InvalidArgument, err)
 	}
 	if sreq.Mode == "" {
 		sreq.Mode = ModeGovulncheck
 	}
 	scanner, err := newScanner(ctx, h)
 	if err != nil {
 		return err
 	}
 	// An explicit "insecure" query param overrides the default.
 	if sreq.Insecure {
 		scanner.insecure = sreq.Insecure
 	}
 	skip, err = scanner.canSkip(ctx, sreq, h.fsNamespace)
 	if err != nil {
 		return err
 	}
 	if skip {
 		log.Infof(ctx, "skipping (work version unchanged or unrecoverable error): %s@%s", sreq.Module, sreq.Version)
 		return nil
 	}
 	workState, err := scanner.ScanModule(ctx, w, sreq)
 	if err != nil {
 		return err
 	}
 	if workState == nil {
 		return nil
 	}
 	// We can't upload the row to bigquery and write the WorkState to Firestore atomically.
 	// But that's OK: if we fail before writing the WorkState, then we'll just re-do the scan
 	// the next time.
 	if err := govulncheck.SetWorkState(ctx, h.fsNamespace, sreq.Module, sreq.Version, workState); err != nil {
 		// Don't fail if there's an error, because we'd just re-run the task.
 		log.Errorf(ctx, err, "SetWorkState")
 	}
 	return nil
 }

 func (s *scanner) canSkip(ctx context.Context, sreq *govulncheck.Request, fsn *fstore.Namespace) (bool, error) {
 	ws, err := govulncheck.GetWorkState(ctx, fsn, sreq.Module, sreq.Version)
 	if err != nil {
 		return false, err
 	}
 	if ws == nil {
 		// Not scanned before.
 		return false, nil
 	}
 	log.Infof(ctx, "read work version for %s@%s", sreq.Module, sreq.Version)
 	if s.workVersion.Equal(ws.WorkVersion) {
 		// If the work version has not changed, skip analyzing the module
 		return true, nil
 	}
 	// Otherwise, skip if the error is not recoverable. The version of the
 	// module has not changed, so we'll get the same error anyhow.
 	return unrecoverableError(ws.ErrorCategory), nil
 }

 // unrecoverableError returns true iff errorCategory encodes that
 // the project has an error that is unrecoverable from the perspective
 // of govulncheck. One example is build issues.
 func unrecoverableError(errorCategory string) bool {
 	switch errorCategory {
 	case derrors.CategorizeError(derrors.LoadPackagesError): // We model build issues as a general load error.
 		return true
 	default:
 		return false
 	}
 }

 // A scanner holds state for scanning modules.
 type scanner struct {
 	proxyClient *proxy.Client
 	bqClient    *bigquery.Client
 	workVersion *govulncheck.WorkVersion
 	gcsBucket   *storage.BucketHandle
 	insecure    bool
 	sbox        *sandbox.Sandbox
 	binaryDir   string

 	govulncheckPath string
 	vulnDBDir       string
 }

 func newScanner(ctx context.Context, h *GovulncheckServer) (*scanner, error) {
 	workVersion, err := h.getWorkVersion(ctx)
 	if err != nil {
 		return nil, err
 	}
 	var bucket *storage.BucketHandle
 	if h.cfg.BinaryBucket != "" {
 		c, err := storage.NewClient(ctx)
 		if err != nil {
 			return nil, err
 		}
 		bucket = c.Bucket(h.cfg.BinaryBucket)
 	}
 	sbox := sandbox.New("/bundle")
 	sbox.Runsc = "/usr/local/bin/runsc"
 	return &scanner{
 		proxyClient:     h.proxyClient,
 		bqClient:        h.bqClient,
 		workVersion:     workVersion,
 		gcsBucket:       bucket,
 		insecure:        h.cfg.Insecure,
 		sbox:            sbox,
 		binaryDir:       h.cfg.BinaryDir,
 		govulncheckPath: filepath.Join(h.cfg.BinaryDir, "govulncheck"),
 		vulnDBDir:       h.cfg.VulnDBDir,
 	}, nil
 }

 type scanError struct {
 	err error
 }

 func (s scanError) Error() string {
 	return s.err.Error()
 }

 func (s scanError) Unwrap() error {
 	return s.err
 }

 // CompareModule gets results of govulncheck source and binary mode on each binary defined in a module.
 //
 // It discards all results where there is a failure that is not specific to the comparison. Examples are
 // situations where the module is malformed, govulncheck fails, or it is not possible to build a found
 // binary within the module.
 func (s *scanner) CompareModule(ctx context.Context, w http.ResponseWriter, sreq *govulncheck.Request, baseRow *govulncheck.Result) (err error) {
 	defer derrors.Wrap(&err, "CompareModule")
 	err = doScan(ctx, baseRow.ModulePath, baseRow.Version, s.insecure, func() (err error) {
 		inputPath := moduleDir(baseRow.ModulePath, baseRow.Version)
 		defer derrors.Cleanup(&err, func() error { return os.RemoveAll(inputPath) })
 		const init = true
 		if err := prepareModule(ctx, baseRow.ModulePath, baseRow.Version, inputPath, s.proxyClient, s.insecure, init); err != nil {
 			log.Errorf(ctx, err, "error trying to prepare module %s", baseRow.ModulePath)
 			return nil
 		}

 		smdir := strings.TrimPrefix(inputPath, sandboxRoot)
 		err = s.sbox.Validate()
 		log.Debugf(ctx, "sandbox Validate returned %v", err)

 		response, err := s.runGovulncheckCompareSandbox(ctx, smdir)
 		if err != nil {
 			return err
 		}
 		log.Infof(ctx, "scanner.runGovulncheckCompare found %d compilable binaries in %s:", len(response.FindingsForMod), sreq.Path())

 		var rows []bigquery.Row
 		for pkg, results := range response.FindingsForMod {
 			if results.Error != "" {
 				// Just log error if binary failed to build or the analysis failed.
 				// TODO: should we save those rows? This would complicate clients, namely the dashboards.
 				log.Errorf(ctx, errors.New(results.Error), "building/analyzing binary failed: %s %s", pkg, sreq.Path())
 				continue
 			}

 			binRow := createComparisonRow(pkg, &results.BinaryResults, baseRow, true)
 			srcRow := createComparisonRow(pkg, &results.SourceResults, baseRow, false)
 			log.Infof(ctx, "found %d vulns in binary mode and %d vulns in source mode for package %s (module: %s)", len(binRow.Vulns), len(srcRow.Vulns), pkg, sreq.Path())
 			rows = append(rows, binRow, srcRow)
 		}

 		if len(rows) > 0 {
 			return writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows)
 		}
 		return nil
 	})

 	if err != nil {
 		log.Errorf(ctx, err, "CompareModule failed for: %s", baseRow.ModulePath)
 	}
 	return nil
 }

 func createComparisonRow(pkg string, response *govulncheck.AnalysisResponse, baseRow *govulncheck.Result, binary bool) *govulncheck.Result {
 	row := *baseRow
 	row.Suffix = pkg
 	if binary {
 		row.ScanMode = scanModeCompareBinary
 		row.BinaryBuildSeconds = bigquery.NullFloat(response.Stats.BuildTime.Seconds())
 	} else {
 		row.ScanMode = scanModeCompareSource
 	}

 	row.Vulns = vulnsForScanMode(response, scanModeSourceSymbol) // we want vulns at the symbol level, binary or source
 	row.ScanMemory = int64(response.Stats.ScanMemory)
 	row.ScanSeconds = response.Stats.ScanSeconds
 	return &row
 }

 // ScanModule scans the module in the request. It returns the WorkState for the result.
 func (s *scanner) ScanModule(ctx context.Context, w http.ResponseWriter, sreq *govulncheck.Request) (*govulncheck.WorkState, error) {
 	if sreq.Module == "std" {
 		return nil, nil // ignore the standard library
 	}

 	baseRow := &govulncheck.Result{
 		ModulePath:  sreq.Module,
 		Suffix:      sreq.Suffix,
 		WorkVersion: *s.workVersion,
 		ImportedBy:  sreq.ImportedBy,
 	}
 	baseRow.VulnDBLastModified = s.workVersion.VulnDBLastModified

 	log.Debugf(ctx, "fetching proxy info: %s@%s", sreq.Path(), sreq.Version)
 	info, err := s.proxyClient.Info(ctx, sreq.Module, sreq.Version)
 	if err != nil {
 		log.Infof(ctx, "proxy error: %s@%s %v", sreq.Path(), sreq.Version, err)
 		rows := createRows(sreq.Mode, func(sm string) *govulncheck.Result {
 			row := *baseRow
 			row.ScanMode = sm
 			row.AddError(fmt.Errorf("%v: %w", err, derrors.ProxyError))
 			return &row
 		})
 		return nil, writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows)
 	}
 	baseRow.Version = info.Version
 	baseRow.SortVersion = version.ForSorting(info.Version)
 	baseRow.CommitTime = info.Time

 	if sreq.Mode == ModeCompare {
 		// TODO: WorkState for CompareModule requests?
 		return nil, s.CompareModule(ctx, w, sreq, baseRow)
 	} else if sreq.Mode == ModeGovulncheck {
 		return s.CheckModule(ctx, w, sreq, baseRow)
 	}
 	return nil, nil
 }

 // CheckModule govulnchecks a module specified by sreq. Currently, only source
 // analysis is conducted. For binary analysis, see CompareModule.
 func (s *scanner) CheckModule(ctx context.Context, w http.ResponseWriter, sreq *govulncheck.Request, baseRow *govulncheck.Result) (*govulncheck.WorkState, error) {
 	log.Infof(ctx, "running scanner.runScanModule: %s@%s", sreq.Path(), sreq.Version)
 	response, err := s.runScanModule(ctx, sreq.Module, baseRow.Version, sreq.Mode)
 	// classify scan error first
 	if err != nil {
 		switch {
 		case isModVendor(err):
 			err = fmt.Errorf("%v: %w", err, derrors.LoadVendorError)
 		case isGovulncheckLoadError(err) || isBuildIssue(err):
 			err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesError)
 		case isNoRequiredModule(err):
 			// Should be subsumed by LoadPackagesError, kept for sanity
 			// and to catch unexpected changes in govulncheck output.
 			err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesNoRequiredModuleError)
 		case isMissingGoSumEntry(err):
 			// Should be subsumed by LoadPackagesError, kept for sanity.
 			// and to catch unexpected changes in govulncheck output.
 			err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesMissingGoSumEntryError)
 		case isReplacingWithLocalPath(err):
 			// Should be subsumed by LoadPackagesError, kept for sanity.
 			// and to catch unexpected changes in govulncheck output.
 			err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesImportedLocalError)
 		case isMissingGoMod(err) || isNoModulesSpecified(err):
 			// Should be subsumed by LoadPackagesError, kept for sanity
 			// and to catch unexpected changes in govulncheck output.
 			err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesNoGoModError)
 		case isTooManyFiles(err):
 			err = fmt.Errorf("%v: %w", err, derrors.ScanModuleTooManyOpenFiles)
 		case isProxyCacheMiss(err):
 			err = fmt.Errorf("%v: %w", err, derrors.ProxyError)
 		case isSandboxRelatedIssue(err):
 			err = fmt.Errorf("%v: %w", err, derrors.ScanModuleSandboxError)
 		default:
 			err = fmt.Errorf("%v: %w", err, derrors.ScanModuleGovulncheckError)
 		}
 	}

 	rows := createRows(sreq.Mode, func(sm string) *govulncheck.Result {
 		row := *baseRow
 		row.ScanMode = sm

 		if err != nil {
 			row.AddError(err)
 			log.Infof(ctx, "scanner.runScanModule returned err=%v for %s in scan mode=%s", err, sreq.Path(), sm)
 		} else {
 			// We use govulncheck command execution time as the approx. time for symbol level analysis.
 			// We currently don't have a way of approximating time for measuring time for module and
 			// package level scans. We could run govulncheck with -scan package and -scan module, but
 			// that would put more pressure on the pipeline and use more resources.
 			if sm == ModeGovulncheck {
 				row.ScanSeconds = response.Stats.ScanSeconds
 				row.ScanMemory = int64(response.Stats.ScanMemory)
 			}
 			row.Vulns = vulnsForScanMode(response, sm)
 			log.Infof(ctx, "scanner.runScanModule returned %d findings for %s with row.Vulns=%d in scan mode=%s", len(response.Findings), sreq.Path(), len(row.Vulns), sm)
 		}
 		return &row
 	})

 	if err := writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows); err != nil {
 		return nil, err
 	}
 	// all of the rows share the same work state
 	return baseRow.WorkState(), nil
 }

 // vulnsForScanMode produces Vulns from findings at the specified
 // govulncheck scan mode.
 func vulnsForScanMode(response *govulncheck.AnalysisResponse, scanMode string) []*govulncheck.Vuln {
 	var modeFindings []*govulncheckapi.Finding
 	for _, f := range response.Findings {
 		fr := f.Trace[0]
 		switch scanMode {
 		case scanModeSourceSymbol:
 			if fr.Function != "" {
 				modeFindings = append(modeFindings, f)
 			}
 		case scanModeSourcePackage:
 			if fr.Package != "" && fr.Function == "" {
 				modeFindings = append(modeFindings, f)
 			}
 		case scanModeSourceModule:
 			if fr.Package == "" && fr.Function == "" { // fr.Module is always set
 				modeFindings = append(modeFindings, f)
 			}
 		}
 	}

 	var vulns []*govulncheck.Vuln
 	seen := make(map[govulncheck.Vuln]bool) // avoid duplicates
 	for _, f := range modeFindings {
 		v := govulncheck.ConvertGovulncheckFinding(f, response.OSVs[f.OSV])
 		if seen[*v] {
 			continue
 		}
 		seen[*v] = true
 		vulns = append(vulns, v)
 	}
 	return vulns
 }

 // createRows creates a row, using f, for each scanMode associated
 // with ecosystem metrics mode.
 func createRows(mode string, f func(string) *govulncheck.Result) []bigquery.Row {
 	var scanModes []string
 	if mode == ModeCompare {
 		scanModes = []string{scanModeCompareBinary, scanModeCompareSource}
 	} else if mode == ModeGovulncheck {
 		scanModes = []string{scanModeSourceSymbol, scanModeSourcePackage, scanModeSourceModule}
 	}

 	var rows []bigquery.Row
 	for _, sm := range scanModes {
 		rows = append(rows, f(sm))
 	}
 	return rows
 }

 // code for vulnerabilities. The analysis of binaries is done in CompareModule.
 func (s *scanner) runScanModule(ctx context.Context, modulePath, version, mode string) (response *govulncheck.AnalysisResponse, err error) {
 	err = doScan(ctx, modulePath, version, s.insecure, func() (err error) {
 		// Download the module first.
 		inputPath := moduleDir(modulePath, version)
 		defer derrors.Cleanup(&err, func() error { return os.RemoveAll(inputPath) })
 		const init = true
 		if err := prepareModule(ctx, modulePath, version, inputPath, s.proxyClient, s.insecure, init); err != nil {
 			return err
 		}

 		if s.insecure {
 			response, err = s.runGovulncheckScanInsecure(inputPath, mode)
 		} else {
 			response, err = s.runGovulncheckScanSandbox(ctx, inputPath, mode)
 		}
 		if response != nil {
 			log.Debugf(ctx, "govulncheck stats: %dkb | %vs", response.Stats.ScanMemory, response.Stats.ScanSeconds)
 		}
 		return err
 	})
 	return response, err
 }

 func (s *scanner) runGovulncheckScanSandbox(ctx context.Context, inputPath, mode string) (_ *govulncheck.AnalysisResponse, err error) {
 	smdir := strings.TrimPrefix(inputPath, sandboxRoot)
 	err = s.sbox.Validate()
 	log.Debugf(ctx, "sandbox Validate returned %v", err)

 	return s.runGovulncheckSandbox(ctx, mode, smdir)
 }

 func (s *scanner) runGovulncheckSandbox(ctx context.Context, mode, arg string) (*govulncheck.AnalysisResponse, error) {
 	goOut, err := s.sbox.Command("/usr/local/go/bin/go", "version").Output()
 	if err != nil {
 		log.Debugf(ctx, "running go version error: %v", err)
 	} else {
 		log.Debugf(ctx, "Sandbox running %s", goOut)
 	}
 	log.Infof(ctx, "running govulncheck in sandbox: mode %s, arg %q", mode, arg)
 	// currently, only source analysis is done in govulncheck_sandbox (binary is done elsewhere)
 	cmd := s.sbox.Command(filepath.Join(s.binaryDir, "govulncheck_sandbox"), s.govulncheckPath, govulncheck.FlagSource, arg, s.vulnDBDir)
 	stdout, err := cmd.Output()
 	log.Infof(ctx, "govulncheck in sandbox finished with err=%v", err)
 	if err != nil {
 		return nil, errors.New(derrors.IncludeStderr(err))
 	}
 	return govulncheck.UnmarshalAnalysisResponse(stdout)
 }

 func (s *scanner) runGovulncheckCompareSandbox(ctx context.Context, arg string) (*govulncheck.CompareResponse, error) {
 	cmd := s.sbox.Command(filepath.Join(s.binaryDir, "govulncheck_compare"), s.govulncheckPath, arg, s.vulnDBDir)
 	log.Infof(ctx, "running govulncheck_compare: arg %q", arg)
 	stdout, err := cmd.Output()
 	log.Infof(ctx, "govulncheck_compare in sandbox finished with err=%v", err)
 	if err != nil {
 		return nil, errors.New(derrors.IncludeStderr(err))
 	}
 	return govulncheck.UnmarshalCompareResponse(stdout)
 }

 func (s *scanner) runGovulncheckScanInsecure(inputPath, mode string) (_ *govulncheck.AnalysisResponse, err error) {
 	// currently, only source analysis is done individually (binary is done in compare mode)
 	return govulncheck.RunGovulncheckCmd(s.govulncheckPath, govulncheck.FlagSource, "./...", inputPath, s.vulnDBDir)
 }

 func isGovulncheckLoadError(err error) bool {
 	return strings.Contains(err.Error(), "govulncheck: loading packages:") ||
 		strings.Contains(err.Error(), "FindAndBuildBinaries")
 }
	// Copyright 2022 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package worker

	import (
	"context"
	"errors"
	"fmt"
	"net/http"
	"os"
	"path/filepath"
	"strings"

	"cloud.google.com/go/storage"
	"golang.org/x/exp/event"
	"golang.org/x/pkgsite-metrics/internal/bigquery"
	"golang.org/x/pkgsite-metrics/internal/derrors"
	"golang.org/x/pkgsite-metrics/internal/fstore"
	"golang.org/x/pkgsite-metrics/internal/govulncheck"
	"golang.org/x/pkgsite-metrics/internal/govulncheckapi"
	"golang.org/x/pkgsite-metrics/internal/log"
	"golang.org/x/pkgsite-metrics/internal/proxy"
	"golang.org/x/pkgsite-metrics/internal/sandbox"
	"golang.org/x/pkgsite-metrics/internal/version"
	)

	const (
	// ModeGovulncheck is an ecosystem metrics mode that runs the govulncheck
	// binary in default (source) mode.
	ModeGovulncheck = "GOVULNCHECK"

	// ModeCompare is an ecosystem metrics mode that finds compilable binaries
	// and runs govulncheck in both source and binary mode and reports results.
	ModeCompare = "COMPARE"
	)

	// modes is a set of supported govulncheck ecosystem metrics modes.
	var modes = map[string]bool{
	ModeGovulncheck: true,
	ModeCompare: true,
	}

	const (
	// scanModeSourceSymbol is used to designate results at govulncheck source
	// '-scan symbol' level of precision.
	//
	// Note that this is not an ecosystem metrics mode. Its value is "GOVULNCHECK"
	// for historical reasons.
	scanModeSourceSymbol = "GOVULNCHECK"

	// scanModeSourcePackage is used to designate results at govulncheck source
	// '-scan package' level of precision.
	//
	// Note that this is not an ecosystem metrics mode.
	scanModeSourcePackage string = "IMPORTS"

	// scanModeSourceModule is used to designate results at govulncheck source
	// '-scan module' level of precision.
	//
	// Note that this is not an ecosystem metrics mode.
	scanModeSourceModule string = "REQUIRES"

	// scanModeCompareBinary is used to designate results for govulncheck
	// binary (symbol) precision level in compare mode.
	scanModeCompareBinary string = "COMPARE - BINARY"

	// scanModeCompareSource is used to designate results for govulncheck
	// source (symbol) precision level in compare mode.
	scanModeCompareSource string = "COMPARE - SOURCE"

	// sandboxGoCache is the location of the Go cache inside the sandbox. The
	// user is root and their $HOME directory is /root. The Go cache resides
	// in its default location, $HOME/.cache/go-build.
	sandboxGoCache = "root/.cache/go-build"
	)

	var (
	// gReqCounter counts requests to govulncheck handleScan
	gReqCounter = event.NewCounter("govulncheck-requests", &event.MetricOptions{Namespace: metricNamespace})
	// gSuccCounter counts successfully processed requests to govulncheck handleScan
	gSuccCounter = event.NewCounter("govulncheck-requests-ok", &event.MetricOptions{Namespace: metricNamespace})
	// gSkipCounter counts skipped requests to govulncheck handleScan
	gSkipCounter = event.NewCounter("govulncheck-requests-skip", &event.MetricOptions{Namespace: metricNamespace})
	)

	// handleScan runs a govulncheck scan for a single input module. It is triggered
	// by path /govulncheck/scan/MODULE_VERSION_SUFFIX?params.
	//
	// See internal/govulncheck.ParseRequest for allowed path forms and query params.
	func (h GovulncheckServer) handleScan(w http.ResponseWriter, r http.Request) (err error) {
	defer derrors.Wrap(&err, "handleScan")

	// Collect basic metrics.
	gReqCounter.Record(r.Context(), 1)
	skip := false // request skipped
	defer func() {
	gSuccCounter.Record(r.Context(), 1, event.Bool("success", err == nil))
	gSkipCounter.Record(r.Context(), 1, event.Bool("skipped", skip))
	}()

	ctx := r.Context()
	sreq, err := govulncheck.ParseRequest(r, "/govulncheck/scan")
	if err != nil {
	return fmt.Errorf("%w: %v", derrors.InvalidArgument, err)
	}
	if sreq.Mode == "" {
	sreq.Mode = ModeGovulncheck
	}
	scanner, err := newScanner(ctx, h)
	if err != nil {
	return err
	}
	// An explicit "insecure" query param overrides the default.
	if sreq.Insecure {
	scanner.insecure = sreq.Insecure
	}
	skip, err = scanner.canSkip(ctx, sreq, h.fsNamespace)
	if err != nil {
	return err
	}
	if skip {
	log.Infof(ctx, "skipping (work version unchanged or unrecoverable error): %s@%s", sreq.Module, sreq.Version)
	return nil
	}
	workState, err := scanner.ScanModule(ctx, w, sreq)
	if err != nil {
	return err
	}
	if workState == nil {
	return nil
	}
	// We can't upload the row to bigquery and write the WorkState to Firestore atomically.
	// But that's OK: if we fail before writing the WorkState, then we'll just re-do the scan
	// the next time.
	if err := govulncheck.SetWorkState(ctx, h.fsNamespace, sreq.Module, sreq.Version, workState); err != nil {
	// Don't fail if there's an error, because we'd just re-run the task.
	log.Errorf(ctx, err, "SetWorkState")
	}
	return nil
	}

	func (s scanner) canSkip(ctx context.Context, sreq govulncheck.Request, fsn *fstore.Namespace) (bool, error) {
	ws, err := govulncheck.GetWorkState(ctx, fsn, sreq.Module, sreq.Version)
	if err != nil {
	return false, err
	}
	if ws == nil {
	// Not scanned before.
	return false, nil
	}
	log.Infof(ctx, "read work version for %s@%s", sreq.Module, sreq.Version)
	if s.workVersion.Equal(ws.WorkVersion) {
	// If the work version has not changed, skip analyzing the module
	return true, nil
	}
	// Otherwise, skip if the error is not recoverable. The version of the
	// module has not changed, so we'll get the same error anyhow.
	return unrecoverableError(ws.ErrorCategory), nil
	}

	// unrecoverableError returns true iff errorCategory encodes that
	// the project has an error that is unrecoverable from the perspective
	// of govulncheck. One example is build issues.
	func unrecoverableError(errorCategory string) bool {
	switch errorCategory {
	case derrors.CategorizeError(derrors.LoadPackagesError): // We model build issues as a general load error.
	return true
	default:
	return false
	}
	}

	// A scanner holds state for scanning modules.
	type scanner struct {
	proxyClient *proxy.Client
	bqClient *bigquery.Client
	workVersion *govulncheck.WorkVersion
	gcsBucket *storage.BucketHandle
	insecure bool
	sbox *sandbox.Sandbox
	binaryDir string

	govulncheckPath string
	vulnDBDir string
	}

	func newScanner(ctx context.Context, h GovulncheckServer) (scanner, error) {
	workVersion, err := h.getWorkVersion(ctx)
	if err != nil {
	return nil, err
	}
	var bucket *storage.BucketHandle
	if h.cfg.BinaryBucket != "" {
	c, err := storage.NewClient(ctx)
	if err != nil {
	return nil, err
	}
	bucket = c.Bucket(h.cfg.BinaryBucket)
	}
	sbox := sandbox.New("/bundle")
	sbox.Runsc = "/usr/local/bin/runsc"
	return &scanner{
	proxyClient: h.proxyClient,
	bqClient: h.bqClient,
	workVersion: workVersion,
	gcsBucket: bucket,
	insecure: h.cfg.Insecure,
	sbox: sbox,
	binaryDir: h.cfg.BinaryDir,
	govulncheckPath: filepath.Join(h.cfg.BinaryDir, "govulncheck"),
	vulnDBDir: h.cfg.VulnDBDir,
	}, nil
	}

	type scanError struct {
	err error
	}

	func (s scanError) Error() string {
	return s.err.Error()
	}

	func (s scanError) Unwrap() error {
	return s.err
	}

	// CompareModule gets results of govulncheck source and binary mode on each binary defined in a module.
	//
	// It discards all results where there is a failure that is not specific to the comparison. Examples are
	// situations where the module is malformed, govulncheck fails, or it is not possible to build a found
	// binary within the module.
	func (s scanner) CompareModule(ctx context.Context, w http.ResponseWriter, sreq govulncheck.Request, baseRow *govulncheck.Result) (err error) {
	defer derrors.Wrap(&err, "CompareModule")
	err = doScan(ctx, baseRow.ModulePath, baseRow.Version, s.insecure, func() (err error) {
	inputPath := moduleDir(baseRow.ModulePath, baseRow.Version)
	defer derrors.Cleanup(&err, func() error { return os.RemoveAll(inputPath) })
	const init = true
	if err := prepareModule(ctx, baseRow.ModulePath, baseRow.Version, inputPath, s.proxyClient, s.insecure, init); err != nil {
	log.Errorf(ctx, err, "error trying to prepare module %s", baseRow.ModulePath)
	return nil
	}

	smdir := strings.TrimPrefix(inputPath, sandboxRoot)
	err = s.sbox.Validate()
	log.Debugf(ctx, "sandbox Validate returned %v", err)

	response, err := s.runGovulncheckCompareSandbox(ctx, smdir)
	if err != nil {
	return err
	}
	log.Infof(ctx, "scanner.runGovulncheckCompare found %d compilable binaries in %s:", len(response.FindingsForMod), sreq.Path())

	var rows []bigquery.Row
	for pkg, results := range response.FindingsForMod {
	if results.Error != "" {
	// Just log error if binary failed to build or the analysis failed.
	// TODO: should we save those rows? This would complicate clients, namely the dashboards.
	log.Errorf(ctx, errors.New(results.Error), "building/analyzing binary failed: %s %s", pkg, sreq.Path())
	continue
	}

	binRow := createComparisonRow(pkg, &results.BinaryResults, baseRow, true)
	srcRow := createComparisonRow(pkg, &results.SourceResults, baseRow, false)
	log.Infof(ctx, "found %d vulns in binary mode and %d vulns in source mode for package %s (module: %s)", len(binRow.Vulns), len(srcRow.Vulns), pkg, sreq.Path())
	rows = append(rows, binRow, srcRow)
	}

	if len(rows) > 0 {
	return writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows)
	}
	return nil
	})

	if err != nil {
	log.Errorf(ctx, err, "CompareModule failed for: %s", baseRow.ModulePath)
	}
	return nil
	}

	func createComparisonRow(pkg string, response govulncheck.AnalysisResponse, baseRow govulncheck.Result, binary bool) *govulncheck.Result {
	row := *baseRow
	row.Suffix = pkg
	if binary {
	row.ScanMode = scanModeCompareBinary
	row.BinaryBuildSeconds = bigquery.NullFloat(response.Stats.BuildTime.Seconds())
	} else {
	row.ScanMode = scanModeCompareSource
	}

	row.Vulns = vulnsForScanMode(response, scanModeSourceSymbol) // we want vulns at the symbol level, binary or source
	row.ScanMemory = int64(response.Stats.ScanMemory)
	row.ScanSeconds = response.Stats.ScanSeconds
	return &row
	}

	// ScanModule scans the module in the request. It returns the WorkState for the result.
	func (s scanner) ScanModule(ctx context.Context, w http.ResponseWriter, sreq govulncheck.Request) (*govulncheck.WorkState, error) {
	if sreq.Module == "std" {
	return nil, nil // ignore the standard library
	}

	baseRow := &govulncheck.Result{
	ModulePath: sreq.Module,
	Suffix: sreq.Suffix,
	WorkVersion: *s.workVersion,
	ImportedBy: sreq.ImportedBy,
	}
	baseRow.VulnDBLastModified = s.workVersion.VulnDBLastModified

	log.Debugf(ctx, "fetching proxy info: %s@%s", sreq.Path(), sreq.Version)
	info, err := s.proxyClient.Info(ctx, sreq.Module, sreq.Version)
	if err != nil {
	log.Infof(ctx, "proxy error: %s@%s %v", sreq.Path(), sreq.Version, err)
	rows := createRows(sreq.Mode, func(sm string) *govulncheck.Result {
	row := *baseRow
	row.ScanMode = sm
	row.AddError(fmt.Errorf("%v: %w", err, derrors.ProxyError))
	return &row
	})
	return nil, writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows)
	}
	baseRow.Version = info.Version
	baseRow.SortVersion = version.ForSorting(info.Version)
	baseRow.CommitTime = info.Time

	if sreq.Mode == ModeCompare {
	// TODO: WorkState for CompareModule requests?
	return nil, s.CompareModule(ctx, w, sreq, baseRow)
	} else if sreq.Mode == ModeGovulncheck {
	return s.CheckModule(ctx, w, sreq, baseRow)
	}
	return nil, nil
	}

	// CheckModule govulnchecks a module specified by sreq. Currently, only source
	// analysis is conducted. For binary analysis, see CompareModule.
	func (s scanner) CheckModule(ctx context.Context, w http.ResponseWriter, sreq govulncheck.Request, baseRow govulncheck.Result) (govulncheck.WorkState, error) {
	log.Infof(ctx, "running scanner.runScanModule: %s@%s", sreq.Path(), sreq.Version)
	response, err := s.runScanModule(ctx, sreq.Module, baseRow.Version, sreq.Mode)
	// classify scan error first
	if err != nil {
	switch {
	case isModVendor(err):
	err = fmt.Errorf("%v: %w", err, derrors.LoadVendorError)
	case isGovulncheckLoadError(err) \|\| isBuildIssue(err):
	err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesError)
	case isNoRequiredModule(err):
	// Should be subsumed by LoadPackagesError, kept for sanity
	// and to catch unexpected changes in govulncheck output.
	err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesNoRequiredModuleError)
	case isMissingGoSumEntry(err):
	// Should be subsumed by LoadPackagesError, kept for sanity.
	// and to catch unexpected changes in govulncheck output.
	err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesMissingGoSumEntryError)
	case isReplacingWithLocalPath(err):
	// Should be subsumed by LoadPackagesError, kept for sanity.
	// and to catch unexpected changes in govulncheck output.
	err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesImportedLocalError)
	case isMissingGoMod(err) \|\| isNoModulesSpecified(err):
	// Should be subsumed by LoadPackagesError, kept for sanity
	// and to catch unexpected changes in govulncheck output.
	err = fmt.Errorf("%v: %w", err, derrors.LoadPackagesNoGoModError)
	case isTooManyFiles(err):
	err = fmt.Errorf("%v: %w", err, derrors.ScanModuleTooManyOpenFiles)
	case isProxyCacheMiss(err):
	err = fmt.Errorf("%v: %w", err, derrors.ProxyError)
	case isSandboxRelatedIssue(err):
	err = fmt.Errorf("%v: %w", err, derrors.ScanModuleSandboxError)
	default:
	err = fmt.Errorf("%v: %w", err, derrors.ScanModuleGovulncheckError)
	}
	}

	rows := createRows(sreq.Mode, func(sm string) *govulncheck.Result {
	row := *baseRow
	row.ScanMode = sm

	if err != nil {
	row.AddError(err)
	log.Infof(ctx, "scanner.runScanModule returned err=%v for %s in scan mode=%s", err, sreq.Path(), sm)
	} else {
	// We use govulncheck command execution time as the approx. time for symbol level analysis.
	// We currently don't have a way of approximating time for measuring time for module and
	// package level scans. We could run govulncheck with -scan package and -scan module, but
	// that would put more pressure on the pipeline and use more resources.
	if sm == ModeGovulncheck {
	row.ScanSeconds = response.Stats.ScanSeconds
	row.ScanMemory = int64(response.Stats.ScanMemory)
	}
	row.Vulns = vulnsForScanMode(response, sm)
	log.Infof(ctx, "scanner.runScanModule returned %d findings for %s with row.Vulns=%d in scan mode=%s", len(response.Findings), sreq.Path(), len(row.Vulns), sm)
	}
	return &row
	})

	if err := writeResults(ctx, sreq.Serve, w, s.bqClient, govulncheck.TableName, rows); err != nil {
	return nil, err
	}
	// all of the rows share the same work state
	return baseRow.WorkState(), nil
	}

	// vulnsForScanMode produces Vulns from findings at the specified
	// govulncheck scan mode.
	func vulnsForScanMode(response govulncheck.AnalysisResponse, scanMode string) []govulncheck.Vuln {
	var modeFindings []*govulncheckapi.Finding
	for _, f := range response.Findings {
	fr := f.Trace[0]
	switch scanMode {
	case scanModeSourceSymbol:
	if fr.Function != "" {
	modeFindings = append(modeFindings, f)
	}
	case scanModeSourcePackage:
	if fr.Package != "" && fr.Function == "" {
	modeFindings = append(modeFindings, f)
	}
	case scanModeSourceModule:
	if fr.Package == "" && fr.Function == "" { // fr.Module is always set
	modeFindings = append(modeFindings, f)
	}
	}
	}

	var vulns []*govulncheck.Vuln
	seen := make(map[govulncheck.Vuln]bool) // avoid duplicates
	for _, f := range modeFindings {
	v := govulncheck.ConvertGovulncheckFinding(f, response.OSVs[f.OSV])
	if seen[*v] {
	continue
	}
	seen[*v] = true
	vulns = append(vulns, v)
	}
	return vulns
	}

	// createRows creates a row, using f, for each scanMode associated
	// with ecosystem metrics mode.
	func createRows(mode string, f func(string) *govulncheck.Result) []bigquery.Row {
	var scanModes []string
	if mode == ModeCompare {
	scanModes = []string{scanModeCompareBinary, scanModeCompareSource}
	} else if mode == ModeGovulncheck {
	scanModes = []string{scanModeSourceSymbol, scanModeSourcePackage, scanModeSourceModule}
	}

	var rows []bigquery.Row
	for _, sm := range scanModes {
	rows = append(rows, f(sm))
	}
	return rows
	}

	// code for vulnerabilities. The analysis of binaries is done in CompareModule.
	func (s scanner) runScanModule(ctx context.Context, modulePath, version, mode string) (response govulncheck.AnalysisResponse, err error) {
	err = doScan(ctx, modulePath, version, s.insecure, func() (err error) {
	// Download the module first.
	inputPath := moduleDir(modulePath, version)
	defer derrors.Cleanup(&err, func() error { return os.RemoveAll(inputPath) })
	const init = true
	if err := prepareModule(ctx, modulePath, version, inputPath, s.proxyClient, s.insecure, init); err != nil {
	return err
	}

	if s.insecure {
	response, err = s.runGovulncheckScanInsecure(inputPath, mode)
	} else {
	response, err = s.runGovulncheckScanSandbox(ctx, inputPath, mode)
	}
	if response != nil {
	log.Debugf(ctx, "govulncheck stats: %dkb \| %vs", response.Stats.ScanMemory, response.Stats.ScanSeconds)
	}
	return err
	})
	return response, err
	}

	func (s scanner) runGovulncheckScanSandbox(ctx context.Context, inputPath, mode string) (_ govulncheck.AnalysisResponse, err error) {
	smdir := strings.TrimPrefix(inputPath, sandboxRoot)
	err = s.sbox.Validate()
	log.Debugf(ctx, "sandbox Validate returned %v", err)

	return s.runGovulncheckSandbox(ctx, mode, smdir)
	}

	func (s scanner) runGovulncheckSandbox(ctx context.Context, mode, arg string) (govulncheck.AnalysisResponse, error) {
	goOut, err := s.sbox.Command("/usr/local/go/bin/go", "version").Output()
	if err != nil {
	log.Debugf(ctx, "running go version error: %v", err)
	} else {
	log.Debugf(ctx, "Sandbox running %s", goOut)
	}
	log.Infof(ctx, "running govulncheck in sandbox: mode %s, arg %q", mode, arg)
	// currently, only source analysis is done in govulncheck_sandbox (binary is done elsewhere)
	cmd := s.sbox.Command(filepath.Join(s.binaryDir, "govulncheck_sandbox"), s.govulncheckPath, govulncheck.FlagSource, arg, s.vulnDBDir)
	stdout, err := cmd.Output()
	log.Infof(ctx, "govulncheck in sandbox finished with err=%v", err)
	if err != nil {
	return nil, errors.New(derrors.IncludeStderr(err))
	}
	return govulncheck.UnmarshalAnalysisResponse(stdout)
	}

	func (s scanner) runGovulncheckCompareSandbox(ctx context.Context, arg string) (govulncheck.CompareResponse, error) {
	cmd := s.sbox.Command(filepath.Join(s.binaryDir, "govulncheck_compare"), s.govulncheckPath, arg, s.vulnDBDir)
	log.Infof(ctx, "running govulncheck_compare: arg %q", arg)
	stdout, err := cmd.Output()
	log.Infof(ctx, "govulncheck_compare in sandbox finished with err=%v", err)
	if err != nil {
	return nil, errors.New(derrors.IncludeStderr(err))
	}
	return govulncheck.UnmarshalCompareResponse(stdout)
	}

	func (s scanner) runGovulncheckScanInsecure(inputPath, mode string) (_ govulncheck.AnalysisResponse, err error) {
	// currently, only source analysis is done individually (binary is done in compare mode)
	return govulncheck.RunGovulncheckCmd(s.govulncheckPath, govulncheck.FlagSource, "./...", inputPath, s.vulnDBDir)
	}

	func isGovulncheckLoadError(err error) bool {
	return strings.Contains(err.Error(), "govulncheck: loading packages:") \|\|
	strings.Contains(err.Error(), "FindAndBuildBinaries")
	}