internal/worker: add analysis/scan handler

Add a server endpoint for running analysis binaries.

This first CL does not write to BigQuery. It just
runs an analysis binary and sends the result back
to the client. And it only runs in insecure mode.

Change-Id: Id70beaeb79cb518c0bde63e95b9a648a6892e838
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/470455
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/internal/scan/parse.go b/internal/scan/parse.go
index 17bca28..6347296 100644
--- a/internal/scan/parse.go
+++ b/internal/scan/parse.go
@@ -9,6 +9,7 @@
 	"bufio"
 	"fmt"
 	"net/http"
+	"net/url"
 	"os"
 	"reflect"
 	"strconv"
@@ -207,8 +208,9 @@
 	var params []string
 	for i := 0; i < t.NumField(); i++ {
 		f := t.Field(i)
+		val := url.QueryEscape(fmt.Sprint(v.Field(i)))
 		params = append(params,
-			fmt.Sprintf("%s=%v", strings.ToLower(f.Name), v.Field(i)))
+			fmt.Sprintf("%s=%s", strings.ToLower(f.Name), val))
 	}
 	return strings.Join(params, "&")
 }
diff --git a/internal/scan/parse_test.go b/internal/scan/parse_test.go
index 253f61c..e141a99 100644
--- a/internal/scan/parse_test.go
+++ b/internal/scan/parse_test.go
@@ -186,8 +186,8 @@
 }
 
 func TestFormatParams(t *testing.T) {
-	got := FormatParams(params{Str: "foo", Int: 17, Bool: true})
-	want := "str=foo&int=17&bool=true"
+	got := FormatParams(params{Str: "foo bar", Int: 17, Bool: true})
+	want := "str=foo+bar&int=17&bool=true"
 	if got != want {
 		t.Errorf("got %q, want %q", got, want)
 	}
diff --git a/internal/worker/analysis.go b/internal/worker/analysis.go
new file mode 100644
index 0000000..1cc42d1
--- /dev/null
+++ b/internal/worker/analysis.go
@@ -0,0 +1,212 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"net/http"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"cloud.google.com/go/storage"
+	"golang.org/x/pkgsite-metrics/internal/derrors"
+	"golang.org/x/pkgsite-metrics/internal/log"
+	"golang.org/x/pkgsite-metrics/internal/modules"
+	"golang.org/x/pkgsite-metrics/internal/queue"
+	"golang.org/x/pkgsite-metrics/internal/scan"
+)
+
+type analysisServer struct {
+	*Server
+}
+
+type analysisRequest struct {
+	scan.ModuleURLPath
+	analysisParams
+}
+
+// analysisRequest implements queue.Task so it can be put on a TaskQueue.
+var _ queue.Task = (*analysisRequest)(nil)
+
+type analysisParams struct {
+	Binary     string // name of analysis binary to run
+	Args       string // command-line arguments to binary; split on whitespace
+	ImportedBy int    // imported-by count of module in path
+	Insecure   bool   // if true, run outside sandbox
+	Serve      bool   // serve results back to client instead of writing them to BigQuery
+}
+
+func (r *analysisRequest) Name() string { return r.Binary + "_" + r.Module + "@" + r.Version }
+
+func (r *analysisRequest) Path() string { return r.ModuleURLPath.Path() }
+
+func (r *analysisRequest) Params() string {
+	return scan.FormatParams(r.analysisParams)
+}
+
+func parseAnalysisRequest(r *http.Request, prefix string) (*analysisRequest, error) {
+	mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
+	if err != nil {
+		return nil, err
+	}
+
+	ap := analysisParams{}
+	if err := scan.ParseParams(r, &ap); err != nil {
+		return nil, err
+	}
+	return &analysisRequest{
+		ModuleURLPath:  mp,
+		analysisParams: ap,
+	}, nil
+}
+
+const analysisBinariesBucketDir = "analysis-binaries"
+
+func (s *analysisServer) handleScan(w http.ResponseWriter, r *http.Request) (err error) {
+	defer derrors.Wrap(&err, "analysisServer.handleScan")
+
+	if s.cfg.BinaryBucket != "" {
+		return errors.New("binary bucket not configured; set GO_ECOSYSTEM_BINARY_BUCKET")
+	}
+
+	ctx := r.Context()
+	req, err := parseAnalysisRequest(r, "/analysis/scan")
+	if err != nil {
+		return fmt.Errorf("%w: %v", derrors.InvalidArgument, err)
+	}
+	jsonTree, err := s.scan(ctx, req)
+	if err != nil {
+		return err
+	}
+	out, err := json.Marshal(jsonTree)
+	if err != nil {
+		return err
+	}
+	_, err = w.Write(out)
+	return err
+}
+
+func (s *analysisServer) scan(ctx context.Context, req *analysisRequest) (_ JSONTree, err error) {
+	if req.Binary == "" {
+		return nil, fmt.Errorf("%w: analysis: missing binary", derrors.InvalidArgument)
+	}
+	if !req.Insecure {
+		return nil, fmt.Errorf("%w: analysis: sandbox mode unimplemented", derrors.InvalidArgument)
+	}
+	if !req.Serve {
+		return nil, fmt.Errorf("%w: analysis: writing to BigQuery unimplemented", derrors.InvalidArgument)
+	}
+	if req.Suffix != "" {
+		return nil, fmt.Errorf("%w: analysis: only implemented for whole modules (no suffix)", derrors.InvalidArgument)
+	}
+
+	// Copy the binary from the bucket.
+	c, err := storage.NewClient(ctx)
+	if err != nil {
+		return nil, err
+	}
+	bucket := c.Bucket(s.cfg.BinaryBucket)
+	const destDir = "/bundle/rootfs/binaries"
+	binaryPath := filepath.Join(filepath.FromSlash(destDir), req.Binary)
+	if err := copyFromGCS(ctx, bucket, path.Join(analysisBinariesBucketDir, req.Binary), binaryPath); err != nil {
+		return nil, err
+	}
+
+	// Download the module.
+	tempDir, err := os.MkdirTemp("", "analysis")
+	if err != nil {
+		return nil, err
+	}
+	defer func() {
+		err1 := os.RemoveAll(tempDir)
+		if err == nil {
+			err = err1
+		}
+	}()
+
+	log.Debugf(ctx, "fetching module zip: %s@%s", req.Module, req.Version)
+	const stripModulePrefix = true
+	if err := modules.Download(ctx, req.Module, req.Version, tempDir, s.proxyClient, stripModulePrefix); err != nil {
+		return nil, err
+	}
+
+	return runAnalysisBinary(binaryPath, req.Args, tempDir)
+}
+
+// Run the binary on the module.
+func runAnalysisBinary(binaryPath, reqArgs, moduleDir string) (JSONTree, error) {
+	args := []string{"-json"}
+	args = append(args, strings.Fields(reqArgs)...)
+	args = append(args, "./...")
+	cmd := exec.Command(binaryPath, args...)
+	cmd.Dir = moduleDir
+	out, err := cmd.Output()
+	if err != nil {
+		return nil, fmt.Errorf("running analysis binary %s: %s", binaryPath, log.IncludeStderr(err))
+	}
+	var tree JSONTree
+	if err := json.Unmarshal(out, &tree); err != nil {
+		return nil, err
+	}
+	return tree, nil
+}
+
+type diagnosticsOrError struct {
+	Diagnostics []JSONDiagnostic
+	Error       *jsonError
+}
+
+func (de *diagnosticsOrError) UnmarshalJSON(data []byte) error {
+	if err := json.Unmarshal(data, &de.Diagnostics); err == nil {
+		return nil
+	}
+	return json.Unmarshal(data, &de.Error)
+}
+
+////////////////////////////////////////////////////////////////
+
+// These structs were copied, with minor changes, from
+// golang.org/x/tools/go/analysis/internal/analysisflags.
+
+// A JSONTree is a mapping from package ID to analysis name to result.
+// Each result is either a jsonError or a list of JSONDiagnostic.
+type JSONTree map[string]map[string]diagnosticsOrError
+
+// A JSONDiagnostic can be used to encode and decode analysis.Diagnostics to and
+// from JSON.
+type JSONDiagnostic struct {
+	Category       string             `json:"category,omitempty"`
+	Posn           string             `json:"posn"`
+	Message        string             `json:"message"`
+	SuggestedFixes []JSONSuggestedFix `json:"suggested_fixes,omitempty"`
+}
+
+// A JSONSuggestedFix describes an edit that should be applied as a whole or not
+// at all. It might contain multiple TextEdits/text_edits if the SuggestedFix
+// consists of multiple non-contiguous edits.
+type JSONSuggestedFix struct {
+	Message string         `json:"message"`
+	Edits   []JSONTextEdit `json:"edits"`
+}
+
+// A TextEdit describes the replacement of a portion of a file.
+// Start and End are zero-based half-open indices into the original byte
+// sequence of the file, and New is the new text.
+type JSONTextEdit struct {
+	Filename string `json:"filename"`
+	Start    int    `json:"start"`
+	End      int    `json:"end"`
+	New      string `json:"new"`
+}
+
+type jsonError struct {
+	Err string `json:"error"`
+}
diff --git a/internal/worker/analysis_test.go b/internal/worker/analysis_test.go
new file mode 100644
index 0000000..044c94a
--- /dev/null
+++ b/internal/worker/analysis_test.go
@@ -0,0 +1,56 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package worker
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"golang.org/x/pkgsite-metrics/internal/buildtest"
+)
+
+func TestRunAnalysisBinary(t *testing.T) {
+	const binary = "./analyzer"
+	binaryPath, cleanup := buildtest.GoBuild(t, "testdata/analyzer", "")
+	defer cleanup()
+
+	got, err := runAnalysisBinary(binaryPath, "-name Fact", "testdata/module")
+	if err != nil {
+		t.Fatal(err)
+	}
+	want := JSONTree{
+		"test_module": map[string]diagnosticsOrError{
+			"findcall": diagnosticsOrError{
+				Diagnostics: []JSONDiagnostic{
+					{
+						Posn:    "a.go:7:17",
+						Message: "call of Fact(...)",
+						SuggestedFixes: []JSONSuggestedFix{
+							{
+								Message: "Add '_TEST_'",
+								Edits: []JSONTextEdit{{
+									Filename: "a.go",
+									Start:    77,
+									End:      77,
+									New:      "_TEST_",
+								}},
+							},
+						},
+					},
+				},
+			},
+		},
+	}
+	// To make the test portable, compare the basenames of file paths.
+	// This will be called for all strings, but in this case only file paths contain slashes.
+	comparePaths := func(s1, s2 string) bool {
+		return filepath.Base(s1) == filepath.Base(s2)
+	}
+
+	if diff := cmp.Diff(want, got, cmp.Comparer(comparePaths)); diff != "" {
+		t.Errorf("mismatch (-want, +got):\n%s", diff)
+	}
+}
diff --git a/internal/worker/server.go b/internal/worker/server.go
index 1bb361f..55ae91f 100644
--- a/internal/worker/server.go
+++ b/internal/worker/server.go
@@ -138,6 +138,9 @@
 	if err := s.registerVulncheckHandlers(ctx); err != nil {
 		return nil, err
 	}
+	if err := s.registerAnalysisHandlers(ctx); err != nil {
+		return nil, err
+	}
 
 	s.handle("/test-vulncheck-sandbox/", s.handleTestVulncheckSandbox)
 	s.handle("/test-db", s.handleTestDB)
@@ -184,6 +187,12 @@
 	return nil
 }
 
+func (s *Server) registerAnalysisHandlers(ctx context.Context) error {
+	h := &analysisServer{s}
+	s.handle("/analysis/scan/", h.handleScan)
+	return nil
+}
+
 type serverError struct {
 	status int   // HTTP status code
 	err    error // wrapped error
diff --git a/internal/worker/testdata/analyzer/analyzer.go b/internal/worker/testdata/analyzer/analyzer.go
new file mode 100644
index 0000000..08fa487
--- /dev/null
+++ b/internal/worker/testdata/analyzer/analyzer.go
@@ -0,0 +1,12 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"golang.org/x/tools/go/analysis/passes/findcall"
+	"golang.org/x/tools/go/analysis/singlechecker"
+)
+
+func main() { singlechecker.Main(findcall.Analyzer) }
diff --git a/internal/worker/testdata/module/a.go b/internal/worker/testdata/module/a.go
new file mode 100644
index 0000000..729e910
--- /dev/null
+++ b/internal/worker/testdata/module/a.go
@@ -0,0 +1,8 @@
+package p
+
+func Fact(n int) int {
+	if n == 0 {
+		return 1
+	}
+	return n * Fact(n-1)
+}
diff --git a/internal/worker/testdata/module/go.mod b/internal/worker/testdata/module/go.mod
new file mode 100644
index 0000000..84e2e85
--- /dev/null
+++ b/internal/worker/testdata/module/go.mod
@@ -0,0 +1,2 @@
+module test_module
+
diff --git a/internal/worker/vulncheck_scan.go b/internal/worker/vulncheck_scan.go
index 76762b0..db94733 100644
--- a/internal/worker/vulncheck_scan.go
+++ b/internal/worker/vulncheck_scan.go
@@ -739,21 +739,15 @@
 
 // vulncheckRequestParams has query parameters for a vulncheck scan request.
 type vulncheckRequestParams struct {
-	ImportedBy int
-	Mode       string
-	Insecure   bool
+	ImportedBy int    // imported-by count
+	Mode       string // vulncheck mode (VTA, etc)
+	Insecure   bool   // if true, run outside sandbox
 }
 
 // These methods implement queue.Task.
 func (r *vulncheckRequest) Name() string { return r.Module + "@" + r.Version }
 
-func (r *vulncheckRequest) Path() string {
-	p := r.Module + "@" + r.Version
-	if r.Suffix != "" {
-		p += "/" + r.Suffix
-	}
-	return p
-}
+func (r *vulncheckRequest) Path() string { return r.ModuleURLPath.Path() }
 
 func (r *vulncheckRequest) Params() string {
 	return scan.FormatParams(r.vulncheckRequestParams)