internal/govulncheck/govulncheck.go - pkgsite-metrics - Git at Google

 // Copyright 2022 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package govulncheck provides functionality for manipulating
 // inputs and outputs of govulncheck endpoints.
 package govulncheck

 import (
 	"bytes"
 	"context"
 	"encoding/json"
 	"errors"
 	"fmt"
 	"net/http"
 	"os/exec"
 	"path/filepath"
 	"runtime"
 	"strings"
 	"time"

 	bq "cloud.google.com/go/bigquery"

 	"golang.org/x/pkgsite-metrics/internal/bigquery"
 	"golang.org/x/pkgsite-metrics/internal/derrors"
 	"golang.org/x/pkgsite-metrics/internal/govulncheckapi"
 	"golang.org/x/pkgsite-metrics/internal/scan"
 )

 const (
 	// ModeBinary runs the govulncheck binary in binary mode.
 	ModeBinary string = "BINARY"

 	// ModeGovulncheck runs the govulncheck binary in default (source) mode.
 	ModeGovulncheck = "GOVULNCHECK"

 	// FlagBinary is the flag passed to govulncheck to run in binary mode.
 	FlagBinary = "binary"

 	// FlagSource is the flag passed to govulncheck to run in source mode.
 	FlagSource = "source"
 )

 // EnqueueQueryParams for govulncheck/enqueue.
 type EnqueueQueryParams struct {
 	Suffix string // appended to task queue IDs to generate unique tasks
 	Mode   string // type of analysis to run
 	Min    int    // minimum import-by count for a module to be included
 	File   string // path to file containing modules; if missing, use DB
 }

 // Request contains information passed to a scan endpoint.
 type Request struct {
 	scan.ModuleURLPath
 	QueryParams
 }

 // QueryParams has query parameters for a govulncheck scan request.
 type QueryParams struct {
 	ImportedBy int    // imported-by count
 	Mode       string // govulncheck mode
 	Insecure   bool   // if true, run outside sandbox
 	Serve      bool   // serve results back to client instead of writing them to BigQuery
 }

 // The below methods implement queue.Task.

 func (r *Request) Name() string { return r.Module + "@" + r.Version }

 func (r *Request) Path() string { return r.ModuleURLPath.Path() }

 func (r *Request) Params() string {
 	return scan.FormatParams(r.QueryParams)
 }

 // ParseRequest parses an http request r for an endpoint
 // prefix and produces a corresponding ScanRequest.
 //
 // The module and version should have one of the following three forms:
 //   - <module>/@v/<version>
 //   - <module>@<version>
 //   - <module>/@latest
 //
 // (These are the same forms that the module proxy accepts.)
 func ParseRequest(r *http.Request, prefix string) (*Request, error) {
 	mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
 	if err != nil {
 		return nil, err
 	}

 	rp := QueryParams{ImportedBy: -1}
 	if err := scan.ParseParams(r, &rp); err != nil {
 		return nil, err
 	}
 	if rp.ImportedBy < 0 {
 		return nil, errors.New(`missing or negative "importedby" query param`)
 	}
 	return &Request{
 		ModuleURLPath: mp,
 		QueryParams:   rp,
 	}, nil
 }

 // ConvertGovulncheckFinding takes a finding from govulncheck and converts it to
 // a bigquery vuln.
 func ConvertGovulncheckFinding(f *govulncheckapi.Finding) *Vuln {
 	vulnerableFrame := f.Trace[0]
 	vuln := &Vuln{
 		ID:          f.OSV,
 		PackagePath: vulnerableFrame.Package,
 		ModulePath:  vulnerableFrame.Module,
 		Version:     vulnerableFrame.Version,
 		Called:      false,
 	}
 	if vulnerableFrame.Function != "" {
 		vuln.Called = true
 	}

 	return vuln
 }

 const TableName = "govulncheck"

 // Note: before modifying Result or Vuln, make sure the change
 // is a valid schema modification.
 // The only supported changes are:
 //   - adding a nullable or repeated column
 //   - dropping a column
 //   - changing a column from required to nullable.
 // See https://cloud.google.com/bigquery/docs/managing-table-schemas for details.

 // Result is a row in the BigQuery govulncheck table.
 type Result struct {
 	CreatedAt     time.Time `bigquery:"created_at"`
 	ModulePath    string    `bigquery:"module_path"`
 	Version       string    `bigquery:"version"`
 	Suffix        string    `bigquery:"suffix"`
 	SortVersion   string    `bigquery:"sort_version"`
 	ImportedBy    int       `bigquery:"imported_by"`
 	Error         string    `bigquery:"error"`
 	ErrorCategory string    `bigquery:"error_category"`
 	CommitTime    time.Time `bigquery:"commit_time"`
 	ScanSeconds   float64   `bigquery:"scan_seconds"`
 	// BinaryBuildSeconds is populated only in COMPARE - BINARY mode
 	BinaryBuildSeconds bq.NullFloat64 `bigquery:"build_seconds"`
 	ScanMemory         int64          `bigquery:"scan_memory"`
 	ScanMode           string         `bigquery:"scan_mode"`
 	WorkVersion                       // InferSchema flattens embedded fields
 	Vulns              []*Vuln        `bigquery:"vulns"`
 }

 // WorkVersion contains information that can be used to avoid duplicate work.
 // Given two WorkVersion values v1 and v2 for the same module path and version,
 // if v1.Equal(v2) then it is not necessary to scan the module.
 type WorkVersion struct {
 	// GoVersion used at path. Allows precise interpretation
 	// of detected stdlib vulnerabilities.
 	GoVersion string `bigquery:"go_version"`
 	// The version of the currently running code. This tracks changes in the
 	// logic of module scanning and processing.
 	WorkerVersion string `bigquery:"worker_version"`
 	// The version of the bigquery schema.
 	SchemaVersion string ` bigquery:"schema_version"`
 	// When the vuln DB was last modified.
 	VulnDBLastModified time.Time `bigquery:"vulndb_last_modified"`
 }

 func (v1 *WorkVersion) Equal(v2 *WorkVersion) bool {
 	if v1 == nil || v2 == nil {
 		return false
 	}
 	return v1.GoVersion == v2.GoVersion &&
 		v1.WorkerVersion == v2.WorkerVersion &&
 		v1.SchemaVersion == v2.SchemaVersion &&
 		v1.VulnDBLastModified.Equal(v2.VulnDBLastModified)
 }

 func (vr *Result) SetUploadTime(t time.Time) { vr.CreatedAt = t }

 func (vr *Result) AddError(err error) {
 	if err == nil {
 		return
 	}
 	vr.Error = err.Error()
 	vr.ErrorCategory = derrors.CategorizeError(err)
 }

 // Vuln is a record in Result.
 type Vuln struct {
 	ID          string `bigquery:"id"`
 	PackagePath string `bigquery:"package_path"`
 	ModulePath  string `bigquery:"module_path"`
 	Version     string `bigquery:"version"`
 	// Called is currently used to differentiate between
 	// called and imported vulnerabilities. We need it
 	// because we don't conduct an imports analysis yet
 	// use the full results of govulncheck source analysis.
 	// It is not part of the bigquery schema.
 	Called bool `bigquery:"-"`
 }

 // SchemaVersion changes whenever the govulncheck schema changes.
 var SchemaVersion string

 func init() {
 	s, err := bigquery.InferSchema(Result{})
 	if err != nil {
 		panic(err)
 	}
 	SchemaVersion = bigquery.SchemaVersion(s)
 	bigquery.AddTable(TableName, s)
 }

 type WorkState struct {
 	WorkVersion   *WorkVersion
 	ErrorCategory string
 }

 // ReadWorkState reads the most recent work version for module_path@version
 // in the govulncheck table together with its accompanying error category.
 func ReadWorkState(ctx context.Context, c *bigquery.Client, module_path, version string) (ws *WorkState, err error) {
 	defer derrors.Wrap(&err, "ReadWorkState")

 	const qf = `
                 SELECT module_path, version, go_version, worker_version, schema_version, vulndb_last_modified, error_category
                 FROM %s WHERE module_path="%s" AND version="%s" ORDER BY created_at DESC LIMIT 1
         `
 	query := fmt.Sprintf(qf, "`"+c.FullTableName(TableName)+"`", module_path, version)
 	iter, err := c.Query(ctx, query)
 	if err != nil {
 		return nil, err
 	}

 	err = bigquery.ForEachRow(iter, func(r *Result) bool {
 		// This should be reachable at most once.
 		ws = &WorkState{
 			WorkVersion:   &r.WorkVersion,
 			ErrorCategory: r.ErrorCategory,
 		}
 		return true
 	})
 	if err != nil {
 		return nil, err
 	}
 	return ws, nil
 }

 // ScanStats contains monitoring information for a govulncheck run.
 type ScanStats struct {
 	// ScanSeconds is the amount of time a scan took to run, in seconds.
 	ScanSeconds float64
 	// ScanMemory is the peak (heap) memory used by govulncheck, in kb.
 	ScanMemory uint64
 	// BuildTime is the amount of time it takes to build a given binary
 	// *BEFORE* scanning it with govulncheck.
 	// This is only used in COMPARE - BINARY mode
 	BuildTime time.Duration
 }

 // SandboxResponse contains the raw govulncheck result
 // and statistics about memory usage and run time. Used
 // for capturing result of govulncheck run in a sandbox.
 type SandboxResponse struct {
 	Findings []*govulncheckapi.Finding
 	Stats    ScanStats
 }

 func UnmarshalSandboxResponse(output []byte) (*SandboxResponse, error) {
 	var e struct{ Error string }
 	if err := json.Unmarshal(output, &e); err != nil {
 		return nil, err
 	}
 	if e.Error != "" {
 		return nil, errors.New(e.Error)
 	}
 	var res SandboxResponse
 	if err := json.Unmarshal(output, &res); err != nil {
 		return nil, err
 	}
 	return &res, nil
 }

 type CompareResponse struct {
 	// Map from package import path to pair of binary & source mode findings
 	FindingsForMod map[string]*ComparePair
 }

 type ComparePair struct {
 	BinaryResults SandboxResponse
 	SourceResults SandboxResponse
 	Error         error
 }

 func UnmarshalCompareResponse(output []byte) (*CompareResponse, error) {
 	var e struct{ Error string }
 	if err := json.Unmarshal(output, &e); err != nil {
 		return nil, err
 	}
 	if e.Error != "" {
 		return nil, errors.New(e.Error)
 	}
 	var res CompareResponse
 	if err := json.Unmarshal(output, &res); err != nil {
 		return nil, err
 	}
 	return &res, nil
 }

 func RunGovulncheckCmd(govulncheckPath, modeFlag, pattern, moduleDir, vulndbDir string, stats *ScanStats) ([]*govulncheckapi.Finding, error) {
 	stdOut := bytes.Buffer{}
 	stdErr := bytes.Buffer{}
 	uri := "file://" + vulndbDir
 	if runtime.GOOS == "windows" {
 		uri = "file:///" + filepath.ToSlash(vulndbDir)
 	}
 	args := []string{"-mode", modeFlag, "-json", "-db", uri}
 	if moduleDir != "" {
 		args = append(args, "-C", moduleDir)
 	}
 	args = append(args, pattern)
 	govulncheckCmd := exec.Command(govulncheckPath, args...)

 	govulncheckCmd.Stdout = &stdOut
 	govulncheckCmd.Stderr = &stdErr

 	start := time.Now()
 	if err := govulncheckCmd.Run(); err != nil {
 		return nil, errors.New(stdErr.String())
 	}
 	stats.ScanSeconds = time.Since(start).Seconds()
 	stats.ScanMemory = getMemoryUsage(govulncheckCmd)

 	handler := NewMetricsHandler()
 	err := govulncheckapi.HandleJSON(&stdOut, handler)
 	if err != nil {
 		return nil, err
 	}
 	return handler.Findings(), nil
 }

 // getMemoryUsage is overridden with a Unix-specific function on Linux.
 var getMemoryUsage = func(c *exec.Cmd) uint64 {
 	return 0
 }
	// Copyright 2022 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package govulncheck provides functionality for manipulating
	// inputs and outputs of govulncheck endpoints.
	package govulncheck

	import (
	"bytes"
	"context"
	"encoding/json"
	"errors"
	"fmt"
	"net/http"
	"os/exec"
	"path/filepath"
	"runtime"
	"strings"
	"time"

	bq "cloud.google.com/go/bigquery"

	"golang.org/x/pkgsite-metrics/internal/bigquery"
	"golang.org/x/pkgsite-metrics/internal/derrors"
	"golang.org/x/pkgsite-metrics/internal/govulncheckapi"
	"golang.org/x/pkgsite-metrics/internal/scan"
	)

	const (
	// ModeBinary runs the govulncheck binary in binary mode.
	ModeBinary string = "BINARY"

	// ModeGovulncheck runs the govulncheck binary in default (source) mode.
	ModeGovulncheck = "GOVULNCHECK"

	// FlagBinary is the flag passed to govulncheck to run in binary mode.
	FlagBinary = "binary"

	// FlagSource is the flag passed to govulncheck to run in source mode.
	FlagSource = "source"
	)

	// EnqueueQueryParams for govulncheck/enqueue.
	type EnqueueQueryParams struct {
	Suffix string // appended to task queue IDs to generate unique tasks
	Mode string // type of analysis to run
	Min int // minimum import-by count for a module to be included
	File string // path to file containing modules; if missing, use DB
	}

	// Request contains information passed to a scan endpoint.
	type Request struct {
	scan.ModuleURLPath
	QueryParams
	}

	// QueryParams has query parameters for a govulncheck scan request.
	type QueryParams struct {
	ImportedBy int // imported-by count
	Mode string // govulncheck mode
	Insecure bool // if true, run outside sandbox
	Serve bool // serve results back to client instead of writing them to BigQuery
	}

	// The below methods implement queue.Task.

	func (r *Request) Name() string { return r.Module + "@" + r.Version }

	func (r *Request) Path() string { return r.ModuleURLPath.Path() }

	func (r *Request) Params() string {
	return scan.FormatParams(r.QueryParams)
	}

	// ParseRequest parses an http request r for an endpoint
	// prefix and produces a corresponding ScanRequest.
	//
	// The module and version should have one of the following three forms:
	// - <module>/@v/<version>
	// - <module>@<version>
	// - <module>/@latest
	//
	// (These are the same forms that the module proxy accepts.)
	func ParseRequest(r http.Request, prefix string) (Request, error) {
	mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
	if err != nil {
	return nil, err
	}

	rp := QueryParams{ImportedBy: -1}
	if err := scan.ParseParams(r, &rp); err != nil {
	return nil, err
	}
	if rp.ImportedBy < 0 {
	return nil, errors.New(`missing or negative "importedby" query param`)
	}
	return &Request{
	ModuleURLPath: mp,
	QueryParams: rp,
	}, nil
	}

	// ConvertGovulncheckFinding takes a finding from govulncheck and converts it to
	// a bigquery vuln.
	func ConvertGovulncheckFinding(f govulncheckapi.Finding) Vuln {
	vulnerableFrame := f.Trace[0]
	vuln := &Vuln{
	ID: f.OSV,
	PackagePath: vulnerableFrame.Package,
	ModulePath: vulnerableFrame.Module,
	Version: vulnerableFrame.Version,
	Called: false,
	}
	if vulnerableFrame.Function != "" {
	vuln.Called = true
	}

	return vuln
	}

	const TableName = "govulncheck"

	// Note: before modifying Result or Vuln, make sure the change
	// is a valid schema modification.
	// The only supported changes are:
	// - adding a nullable or repeated column
	// - dropping a column
	// - changing a column from required to nullable.
	// See https://cloud.google.com/bigquery/docs/managing-table-schemas for details.

	// Result is a row in the BigQuery govulncheck table.
	type Result struct {
	CreatedAt time.Time `bigquery:"created_at"`
	ModulePath string `bigquery:"module_path"`
	Version string `bigquery:"version"`
	Suffix string `bigquery:"suffix"`
	SortVersion string `bigquery:"sort_version"`
	ImportedBy int `bigquery:"imported_by"`
	Error string `bigquery:"error"`
	ErrorCategory string `bigquery:"error_category"`
	CommitTime time.Time `bigquery:"commit_time"`
	ScanSeconds float64 `bigquery:"scan_seconds"`
	// BinaryBuildSeconds is populated only in COMPARE - BINARY mode
	BinaryBuildSeconds bq.NullFloat64 `bigquery:"build_seconds"`
	ScanMemory int64 `bigquery:"scan_memory"`
	ScanMode string `bigquery:"scan_mode"`
	WorkVersion // InferSchema flattens embedded fields
	Vulns []*Vuln `bigquery:"vulns"`
	}

	// WorkVersion contains information that can be used to avoid duplicate work.
	// Given two WorkVersion values v1 and v2 for the same module path and version,
	// if v1.Equal(v2) then it is not necessary to scan the module.
	type WorkVersion struct {
	// GoVersion used at path. Allows precise interpretation
	// of detected stdlib vulnerabilities.
	GoVersion string `bigquery:"go_version"`
	// The version of the currently running code. This tracks changes in the
	// logic of module scanning and processing.
	WorkerVersion string `bigquery:"worker_version"`
	// The version of the bigquery schema.
	SchemaVersion string ` bigquery:"schema_version"`
	// When the vuln DB was last modified.
	VulnDBLastModified time.Time `bigquery:"vulndb_last_modified"`
	}

	func (v1 WorkVersion) Equal(v2 WorkVersion) bool {
	if v1 == nil \|\| v2 == nil {
	return false
	}
	return v1.GoVersion == v2.GoVersion &&
	v1.WorkerVersion == v2.WorkerVersion &&
	v1.SchemaVersion == v2.SchemaVersion &&
	v1.VulnDBLastModified.Equal(v2.VulnDBLastModified)
	}

	func (vr *Result) SetUploadTime(t time.Time) { vr.CreatedAt = t }

	func (vr *Result) AddError(err error) {
	if err == nil {
	return
	}
	vr.Error = err.Error()
	vr.ErrorCategory = derrors.CategorizeError(err)
	}

	// Vuln is a record in Result.
	type Vuln struct {
	ID string `bigquery:"id"`
	PackagePath string `bigquery:"package_path"`
	ModulePath string `bigquery:"module_path"`
	Version string `bigquery:"version"`
	// Called is currently used to differentiate between
	// called and imported vulnerabilities. We need it
	// because we don't conduct an imports analysis yet
	// use the full results of govulncheck source analysis.
	// It is not part of the bigquery schema.
	Called bool `bigquery:"-"`
	}

	// SchemaVersion changes whenever the govulncheck schema changes.
	var SchemaVersion string

	func init() {
	s, err := bigquery.InferSchema(Result{})
	if err != nil {
	panic(err)
	}
	SchemaVersion = bigquery.SchemaVersion(s)
	bigquery.AddTable(TableName, s)
	}

	type WorkState struct {
	WorkVersion *WorkVersion
	ErrorCategory string
	}

	// ReadWorkState reads the most recent work version for module_path@version
	// in the govulncheck table together with its accompanying error category.
	func ReadWorkState(ctx context.Context, c bigquery.Client, module_path, version string) (ws WorkState, err error) {
	defer derrors.Wrap(&err, "ReadWorkState")

	const qf = `
	SELECT module_path, version, go_version, worker_version, schema_version, vulndb_last_modified, error_category
	FROM %s WHERE module_path="%s" AND version="%s" ORDER BY created_at DESC LIMIT 1
	`
	query := fmt.Sprintf(qf, "`"+c.FullTableName(TableName)+"`", module_path, version)
	iter, err := c.Query(ctx, query)
	if err != nil {
	return nil, err
	}

	err = bigquery.ForEachRow(iter, func(r *Result) bool {
	// This should be reachable at most once.
	ws = &WorkState{
	WorkVersion: &r.WorkVersion,
	ErrorCategory: r.ErrorCategory,
	}
	return true
	})
	if err != nil {
	return nil, err
	}
	return ws, nil
	}

	// ScanStats contains monitoring information for a govulncheck run.
	type ScanStats struct {
	// ScanSeconds is the amount of time a scan took to run, in seconds.
	ScanSeconds float64
	// ScanMemory is the peak (heap) memory used by govulncheck, in kb.
	ScanMemory uint64
	// BuildTime is the amount of time it takes to build a given binary
	// BEFORE scanning it with govulncheck.
	// This is only used in COMPARE - BINARY mode
	BuildTime time.Duration
	}

	// SandboxResponse contains the raw govulncheck result
	// and statistics about memory usage and run time. Used
	// for capturing result of govulncheck run in a sandbox.
	type SandboxResponse struct {
	Findings []*govulncheckapi.Finding
	Stats ScanStats
	}

	func UnmarshalSandboxResponse(output []byte) (*SandboxResponse, error) {
	var e struct{ Error string }
	if err := json.Unmarshal(output, &e); err != nil {
	return nil, err
	}
	if e.Error != "" {
	return nil, errors.New(e.Error)
	}
	var res SandboxResponse
	if err := json.Unmarshal(output, &res); err != nil {
	return nil, err
	}
	return &res, nil
	}

	type CompareResponse struct {
	// Map from package import path to pair of binary & source mode findings
	FindingsForMod map[string]*ComparePair
	}

	type ComparePair struct {
	BinaryResults SandboxResponse
	SourceResults SandboxResponse
	Error error
	}

	func UnmarshalCompareResponse(output []byte) (*CompareResponse, error) {
	var e struct{ Error string }
	if err := json.Unmarshal(output, &e); err != nil {
	return nil, err
	}
	if e.Error != "" {
	return nil, errors.New(e.Error)
	}
	var res CompareResponse
	if err := json.Unmarshal(output, &res); err != nil {
	return nil, err
	}
	return &res, nil
	}

	func RunGovulncheckCmd(govulncheckPath, modeFlag, pattern, moduleDir, vulndbDir string, stats ScanStats) ([]govulncheckapi.Finding, error) {
	stdOut := bytes.Buffer{}
	stdErr := bytes.Buffer{}
	uri := "file://" + vulndbDir
	if runtime.GOOS == "windows" {
	uri = "file:///" + filepath.ToSlash(vulndbDir)
	}
	args := []string{"-mode", modeFlag, "-json", "-db", uri}
	if moduleDir != "" {
	args = append(args, "-C", moduleDir)
	}
	args = append(args, pattern)
	govulncheckCmd := exec.Command(govulncheckPath, args...)

	govulncheckCmd.Stdout = &stdOut
	govulncheckCmd.Stderr = &stdErr

	start := time.Now()
	if err := govulncheckCmd.Run(); err != nil {
	return nil, errors.New(stdErr.String())
	}
	stats.ScanSeconds = time.Since(start).Seconds()
	stats.ScanMemory = getMemoryUsage(govulncheckCmd)

	handler := NewMetricsHandler()
	err := govulncheckapi.HandleJSON(&stdOut, handler)
	if err != nil {
	return nil, err
	}
	return handler.Findings(), nil
	}

	// getMemoryUsage is overridden with a Unix-specific function on Linux.
	var getMemoryUsage = func(c *exec.Cmd) uint64 {
	return 0
	}