internal/analysis: analysis data structures Move all the data structures and related functions for analysis, including HTTP request data, analysis binary results, and BigQuery structs, into a single package. I think this makes the structure of the whole analysis part of the system easier to understand. It also moves internal/bigquery towards being a general helper for BigQuery, with no application-specific code. I think that's a good thing. I plan to do this same refactoring for vulncheck once the code there settles down a bit more. Change-Id: I88eac58572ea397ab5e2ee114b795b82b656655b Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/472657 Run-TryBot: Jonathan Amsterdam <jba@google.com> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>

commit: d4b0c410af5dfd03d74aac8bb038bc0ef743e810 [log] [tgz]
author: Jonathan Amsterdam <jba@google.com> Wed Mar 01 16:22:30 2023 -0500
committer: Jonathan Amsterdam <jba@google.com> Fri Mar 03 14:07:21 2023 +0000
tree: 04eb17e0d23f7728af535f13507e9e7eabe895fb
parent: d62bba630cc68cb63c3f279ad02d0139da72948f [diff]
diff --git a/internal/analysis/analysis.go b/internal/analysis/analysis.go
new file mode 100644
index 0000000..80824f4
--- /dev/null
+++ b/internal/analysis/analysis.go

@@ -0,0 +1,240 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package analysis
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"sort"
+	"strings"
+	"time"
+
+	"golang.org/x/exp/maps"
+	"golang.org/x/pkgsite-metrics/internal/bigquery"
+	"golang.org/x/pkgsite-metrics/internal/derrors"
+	"golang.org/x/pkgsite-metrics/internal/queue"
+	"golang.org/x/pkgsite-metrics/internal/scan"
+)
+
+type Request struct {
+	scan.ModuleURLPath
+	QueryParams
+}
+
+type QueryParams struct {
+	Binary     string // name of analysis binary to run
+	Args       string // command-line arguments to binary; split on whitespace
+	ImportedBy int    // imported-by count of module in path
+	Insecure   bool   // if true, run outside sandbox
+	Serve      bool   // serve results back to client instead of writing them to BigQuery
+}
+
+// Request implements queue.Task so it can be put on a TaskQueue.
+var _ queue.Task = (*Request)(nil)
+
+func (r *Request) Name() string { return r.Binary + "_" + r.Module + "@" + r.Version }
+
+func (r *Request) Path() string { return r.ModuleURLPath.Path() }
+
+func (r *Request) Params() string {
+	return scan.FormatParams(r.QueryParams)
+}
+
+func ParseRequest(r *http.Request, prefix string) (*Request, error) {
+	mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
+	if err != nil {
+		return nil, err
+	}
+
+	ap := QueryParams{}
+	if err := scan.ParseParams(r, &ap); err != nil {
+		return nil, err
+	}
+	return &Request{
+		ModuleURLPath: mp,
+		QueryParams:   ap,
+	}, nil
+}
+
+// These structs were copied, with minor changes, from
+// golang.org/x/tools/go/analysis/internal/analysisflags.
+
+// A JSONTree is a mapping from package ID to analysis name to result.
+// Each result is either a jsonError or a list of JSONDiagnostic.
+type JSONTree map[string]map[string]DiagnosticsOrError
+
+// A JSONDiagnostic can be used to encode and decode analysis.Diagnostics to and
+// from JSON.
+type JSONDiagnostic struct {
+	Category       string             `json:"category,omitempty"`
+	Posn           string             `json:"posn"`
+	Message        string             `json:"message"`
+	SuggestedFixes []JSONSuggestedFix `json:"suggested_fixes,omitempty"`
+}
+
+// A JSONSuggestedFix describes an edit that should be applied as a whole or not
+// at all. It might contain multiple TextEdits/text_edits if the SuggestedFix
+// consists of multiple non-contiguous edits.
+type JSONSuggestedFix struct {
+	Message string         `json:"message"`
+	Edits   []JSONTextEdit `json:"edits"`
+}
+
+// A TextEdit describes the replacement of a portion of a file.
+// Start and End are zero-based half-open indices into the original byte
+// sequence of the file, and New is the new text.
+type JSONTextEdit struct {
+	Filename string `json:"filename"`
+	Start    int    `json:"start"`
+	End      int    `json:"end"`
+	New      string `json:"new"`
+}
+
+type jsonError struct {
+	Err string `json:"error"`
+}
+
+type DiagnosticsOrError struct {
+	Diagnostics []JSONDiagnostic
+	Error       *jsonError
+}
+
+func (de *DiagnosticsOrError) UnmarshalJSON(data []byte) error {
+	if err := json.Unmarshal(data, &de.Diagnostics); err == nil {
+		return nil
+	}
+	return json.Unmarshal(data, &de.Error)
+}
+
+// Definitions for BigQuery.
+
+const TableName = "analysis"
+
+// Note: before modifying AnalysisResult, make sure the change
+// is a valid schema modification.
+// The only supported changes are:
+//   - adding a nullable or repeated column
+//   - dropping a column
+//   - changing a column from required to nullable.
+// See https://cloud.google.com/bigquery/docs/managing-table-schemas for details.
+
+// Result is a row in the BigQuery analysis table. It corresponds to a
+// result from the output for an analysis.
+type Result struct {
+	CreatedAt   time.Time `bigquery:"created_at"`
+	ModulePath  string    `bigquery:"module_path"`
+	Version     string    `bigquery:"version"`
+	SortVersion string    `bigquery:"sort_version"`
+	CommitTime  time.Time `bigquery:"commit_time"`
+	// The name of the analysis binary that was executed.
+	// A single binary may run multiple analyzers.
+	BinaryName    string `bigquery:"binary_name"`
+	Error         string `bigquery:"error"`
+	ErrorCategory string `bigquery:"error_category"`
+	WorkVersion          // InferSchema flattens embedded fields
+
+	Diagnostics []*Diagnostic `bigquery:"diagnostic"`
+}
+
+func (r *Result) AddError(err error) {
+	if err == nil {
+		return
+	}
+	r.Error = err.Error()
+	r.ErrorCategory = derrors.CategorizeError(err)
+}
+
+// WorkVersion contains information that can be used to avoid duplicate work.
+// Given two WorkVersion values v1 and v2 for the same module path and version,
+// if v1 == v2 then it is not necessary to scan the module.
+type WorkVersion struct {
+	// A hash of the  binary executed.
+	BinaryVersion string `bigquery:"binary_version"`
+	BinaryArgs    string `bigquery:"binary_args"` // args passed to binary
+	// The version of the currently running code. This tracks changes in the
+	// logic of module scanning and processing.
+	WorkerVersion string `bigquery:"worker_version"`
+	// The version of the bigquery schema.
+	SchemaVersion string ` bigquery:"schema_version"`
+}
+
+// A Diagnostic is a single analyzer finding.
+type Diagnostic struct {
+	// The package ID as reported by the analysis binary.
+	PackageID    string `bigquery:"package_id"`
+	AnalyzerName string `bigquery:"analyzer_name"`
+	Error        string `bigquery:"error"`
+	// These fields are from internal/worker.JSONDiagnostic.
+	Category string `bigquery:"category"`
+	Position string `bigquery:"position"`
+	Message  string `bigquery:"message"`
+}
+
+// SchemaVersion changes whenever the analysis schema changes.
+var SchemaVersion string
+
+func init() {
+	s, err := bigquery.InferSchema(Result{})
+	if err != nil {
+		panic(err)
+	}
+	SchemaVersion = bigquery.SchemaVersion(s)
+	bigquery.AddTable(TableName, s)
+}
+
+// ReadWorkVersions reads the most recent WorkVersions in the analysis table.
+func ReadWorkVersions(ctx context.Context, c *bigquery.Client) (_ map[[2]string]*WorkVersion, err error) {
+	defer derrors.Wrap(&err, "ReadWorkVersions")
+	m := map[[2]string]*WorkVersion{}
+	query := bigquery.PartitionQuery(c.FullTableName(TableName), "module_path, sort_version", "created_at DESC")
+	iter, err := c.Query(ctx, query)
+	if err != nil {
+		return nil, err
+	}
+	err = bigquery.ForEachRow(iter, func(r *Result) bool {
+		m[[2]string{r.ModulePath, r.Version}] = &r.WorkVersion
+		return true
+	})
+	if err != nil {
+		return nil, err
+	}
+	return m, nil
+}
+
+// JSONTreeToDiagnostics converts a jsonTree to a list of diagnostics for BigQuery.
+// It ignores the suggested fixes of the diagnostics.
+func JSONTreeToDiagnostics(jsonTree JSONTree) []*Diagnostic {
+	var diags []*Diagnostic
+	// Sort for determinism.
+	pkgIDs := maps.Keys(jsonTree)
+	sort.Strings(pkgIDs)
+	for _, pkgID := range pkgIDs {
+		amap := jsonTree[pkgID]
+		aNames := maps.Keys(amap)
+		sort.Strings(aNames)
+		for _, aName := range aNames {
+			diagsOrErr := amap[aName]
+			if diagsOrErr.Error != nil {
+				diags = append(diags, &Diagnostic{
+					PackageID:    pkgID,
+					AnalyzerName: aName,
+					Error:        diagsOrErr.Error.Err,
+				})
+			} else {
+				for _, jd := range diagsOrErr.Diagnostics {
+					diags = append(diags, &Diagnostic{
+						PackageID:    pkgID,
+						AnalyzerName: aName,
+						Category:     jd.Category,
+						Position:     jd.Posn,
+						Message:      jd.Message,
+					})
+				}
+			}
+		}
+	}
+	return diags
+}

diff --git a/internal/analysis/analysis_test.go b/internal/analysis/analysis_test.go
new file mode 100644
index 0000000..dd4dcdc
--- /dev/null
+++ b/internal/analysis/analysis_test.go

@@ -0,0 +1,43 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package analysis
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+)
+
+func TestJSONTreeToDiagnostics(t *testing.T) {
+	in := JSONTree{
+		"pkg1": {
+			"a": {
+				Diagnostics: []JSONDiagnostic{
+					{Category: "c1", Posn: "pos1", Message: "m1"},
+					{Category: "c2", Posn: "pos2", Message: "m2"},
+				},
+			},
+			"b": {
+				Diagnostics: []JSONDiagnostic{{Category: "c3", Posn: "pos3", Message: "m3"}},
+			},
+		},
+		"pkg2": {
+			"c": {
+				Error: &jsonError{Err: "fail"},
+			},
+		},
+	}
+	got := JSONTreeToDiagnostics(in)
+	want := []*Diagnostic{
+		{PackageID: "pkg1", AnalyzerName: "a", Category: "c1", Position: "pos1", Message: "m1"},
+		{PackageID: "pkg1", AnalyzerName: "a", Category: "c2", Position: "pos2", Message: "m2"},
+		{PackageID: "pkg1", AnalyzerName: "b", Category: "c3", Position: "pos3", Message: "m3"},
+		{PackageID: "pkg2", AnalyzerName: "c", Error: "fail"},
+	}
+	if diff := cmp.Diff(want, got); diff != "" {
+		t.Errorf("mismatch (-want, +got)\n%s", diff)
+	}
+
+}

diff --git a/internal/bigquery/analysis.go b/internal/bigquery/analysis.go
deleted file mode 100644
index 6b796aa..0000000
--- a/internal/bigquery/analysis.go
+++ /dev/null

@@ -1,106 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bigquery
-
-import (
-	"context"
-	"time"
-
-	bq "cloud.google.com/go/bigquery"
-	"golang.org/x/pkgsite-metrics/internal/derrors"
-)
-
-const AnalysisTableName = "analysis"
-
-// Note: before modifying AnalysisResult, make sure the change
-// is a valid schema modification.
-// The only supported changes are:
-//   - adding a nullable or repeated column
-//   - dropping a column
-//   - changing a column from required to nullable.
-// See https://cloud.google.com/bigquery/docs/managing-table-schemas for details.
-
-// AnalysisResult is a row in the BigQuery analysis table. It corresponds to a
-// result from the output for an analysis.
-type AnalysisResult struct {
-	CreatedAt   time.Time `bigquery:"created_at"`
-	ModulePath  string    `bigquery:"module_path"`
-	Version     string    `bigquery:"version"`
-	SortVersion string    `bigquery:"sort_version"`
-	CommitTime  time.Time `bigquery:"commit_time"`
-	// The name of the analysis binary that was executed.
-	// A single binary may run multiple analyzers.
-	BinaryName          string `bigquery:"binary_name"`
-	Error               string `bigquery:"error"`
-	ErrorCategory       string `bigquery:"error_category"`
-	AnalysisWorkVersion        // InferSchema flattens embedded fields
-
-	Diagnostics []*Diagnostic `bigquery:"diagnostic"`
-}
-
-func (r *AnalysisResult) AddError(err error) {
-	if err == nil {
-		return
-	}
-	r.Error = err.Error()
-	r.ErrorCategory = derrors.CategorizeError(err)
-}
-
-// AnalysisWorkVersion contains information that can be used to avoid duplicate work.
-// Given two AnalysisWorkVersion values v1 and v2 for the same module path and version,
-// if v1 == v2 then it is not necessary to scan the module.
-type AnalysisWorkVersion struct {
-	// A hash of the  binary executed.
-	BinaryVersion string `bigquery:"binary_version"`
-	BinaryArgs    string `bigquery:"binary_args"` // args passed to binary
-	// The version of the currently running code. This tracks changes in the
-	// logic of module scanning and processing.
-	WorkerVersion string `bigquery:"worker_version"`
-	// The version of the bigquery schema.
-	SchemaVersion string ` bigquery:"schema_version"`
-}
-
-// A Diagnostic is a single analyzer finding.
-type Diagnostic struct {
-	// The package ID as reported by the analysis binary.
-	PackageID    string `bigquery:"package_id"`
-	AnalyzerName string `bigquery:"analyzer_name"`
-	Error        string `bigquery:"error"`
-	// These fields are from internal/worker.JSONDiagnostic.
-	Category string `bigquery:"category"`
-	Position string `bigquery:"position"`
-	Message  string `bigquery:"message"`
-}
-
-// AnalysisSchemaVersion changes whenever the analysis schema changes.
-var AnalysisSchemaVersion string
-
-func init() {
-	s, err := bq.InferSchema(AnalysisResult{})
-	if err != nil {
-		panic(err)
-	}
-	AnalysisSchemaVersion = schemaVersion(s)
-	addTable(AnalysisTableName, s)
-}
-
-// ReadAnalysisWorkVersions reads the most recent WorkVersions in the analysis table.
-func ReadAnalysisWorkVersions(ctx context.Context, c *Client) (_ map[[2]string]*AnalysisWorkVersion, err error) {
-	defer derrors.Wrap(&err, "ReadAnalysisWorkVersions")
-	m := map[[2]string]*AnalysisWorkVersion{}
-	query := partitionQuery(c.FullTableName(AnalysisTableName), "module_path, sort_version", "created_at DESC")
-	iter, err := c.Query(ctx, query)
-	if err != nil {
-		return nil, err
-	}
-	err = ForEachRow(iter, func(r *AnalysisResult) bool {
-		m[[2]string{r.ModulePath, r.Version}] = &r.AnalysisWorkVersion
-		return true
-	})
-	if err != nil {
-		return nil, err
-	}
-	return m, nil
-}

diff --git a/internal/bigquery/bigquery.go b/internal/bigquery/bigquery.go
index b87fdea..f09e0fe 100644
--- a/internal/bigquery/bigquery.go
+++ b/internal/bigquery/bigquery.go

@@ -239,9 +239,9 @@
 	return bq.NullTime{Time: civil.TimeOf(t), Valid: true}
 }
 
-// schemaVersion computes a relatively short string from a schema, such that
+// SchemaVersion computes a relatively short string from a schema, such that
 // different schemas result in different strings with high probability.
-func schemaVersion(schema bq.Schema) string {
+func SchemaVersion(schema bq.Schema) string {
 	hash := sha256.Sum256([]byte(schemaString(schema)))
 	return hex.EncodeToString(hash[:])
 }
@@ -275,7 +275,7 @@
 	tables  = map[string]bq.Schema{}
 )
 
-func addTable(tableID string, s bq.Schema) {
+func AddTable(tableID string, s bq.Schema) {
 	tableMu.Lock()
 	defer tableMu.Unlock()
 	tables[tableID] = s
@@ -298,7 +298,7 @@
 	return tableIDs
 }
 
-// partitionQuery returns a query that returns one row for each distinct value
+// PartitionQuery returns a query that returns one row for each distinct value
 // of partitionColumn in tableName.
 // The selected row will be the first one according to the orderings, which
 // should be comma-separated ORDER BY clauses.
@@ -313,7 +313,7 @@
 //
 // (BigQuery SQL has no DISTINCT ON feature and doesn't allow columns of type RECORD
 // in queries with DISTINCT, so we have to take this approach.)
-func partitionQuery(tableName, partitionColumn, orderings string) string {
+func PartitionQuery(tableName, partitionColumn, orderings string) string {
 	// This query first organizes the table rows into windows that have the same partitionColumn.
 	// The rows in each window are sorted by the given orderings.
 	// They are then assigned numbers, where 1 is the first row in the window.
@@ -334,3 +334,7 @@
 
 	return fmt.Sprintf(qf, partitionColumn, orderings, "`"+tableName+"`")
 }
+
+// Copy InferSchema so users don't have to import cloud.google.com/go/bigquery
+// just to get it.
+var InferSchema = bq.InferSchema

diff --git a/internal/bigquery/bigquery_test.go b/internal/bigquery/bigquery_test.go
index 0715f43..908ac14 100644
--- a/internal/bigquery/bigquery_test.go
+++ b/internal/bigquery/bigquery_test.go

@@ -106,7 +106,7 @@
 
 	t.Run("latest", func(t *testing.T) {
 		latestTableID := VulncheckTableName + "-latest"
-		addTable(latestTableID, tableSchema(VulncheckTableName))
+		AddTable(latestTableID, tableSchema(VulncheckTableName))
 		must(client.CreateTable(ctx, latestTableID))
 		defer func() { must(client.Table(latestTableID).Delete(ctx)) }()
 
@@ -167,7 +167,7 @@
 
 		// Test InsertVulncheckResults
 		reportTableID := latestTableID + "-report"
-		addTable(reportTableID, tableSchema(VulncheckTableName+"-report"))
+		AddTable(reportTableID, tableSchema(VulncheckTableName+"-report"))
 		reportTable := client.dataset.Table(reportTableID)
 		// Table is created by InsertVulncheckResults.
 		defer func() { must(reportTable.Delete(ctx)) }()

diff --git a/internal/bigquery/vulncheck.go b/internal/bigquery/vulncheck.go
index a583d75..51b58ac 100644
--- a/internal/bigquery/vulncheck.go
+++ b/internal/bigquery/vulncheck.go

@@ -108,15 +108,15 @@
 	if err != nil {
 		panic(err)
 	}
-	VulncheckSchemaVersion = schemaVersion(s)
-	addTable(VulncheckTableName, s)
+	VulncheckSchemaVersion = SchemaVersion(s)
+	AddTable(VulncheckTableName, s)
 }
 
 // ReadVulncheckWorkVersions reads the most recent WorkVersions in the vulncheck table.
 func ReadVulncheckWorkVersions(ctx context.Context, c *Client) (_ map[[2]string]*VulncheckWorkVersion, err error) {
 	defer derrors.Wrap(&err, "ReadVulncheckWorkVersions")
 	m := map[[2]string]*VulncheckWorkVersion{}
-	query := partitionQuery(c.FullTableName(VulncheckTableName), "module_path, sort_version", "created_at DESC")
+	query := PartitionQuery(c.FullTableName(VulncheckTableName), "module_path, sort_version", "created_at DESC")
 	iter, err := c.Query(ctx, query)
 	if err != nil {
 		return nil, err
@@ -151,7 +151,7 @@
 
 func fetchVulncheckResults(ctx context.Context, c *Client, tableName string) (rows []*VulnResult, err error) {
 	name := c.FullTableName(tableName)
-	query := partitionQuery(name, "module_path, scan_mode", orderByClauses)
+	query := PartitionQuery(name, "module_path, scan_mode", orderByClauses)
 	log.Infof(ctx, "running latest query on %s", name)
 	iter, err := c.Query(ctx, query)
 	if err != nil {
@@ -189,7 +189,7 @@
 	if err != nil {
 		panic(err)
 	}
-	addTable(VulncheckTableName+"-report", s)
+	AddTable(VulncheckTableName+"-report", s)
 }
 
 func InsertVulncheckResults(ctx context.Context, c *Client, results []*VulnResult, date civil.Date, allowDuplicates bool) (err error) {

diff --git a/internal/bigquery/vulndb_requests.go b/internal/bigquery/vulndb_requests.go
index b9bc955..6b1d583 100644
--- a/internal/bigquery/vulndb_requests.go
+++ b/internal/bigquery/vulndb_requests.go

@@ -33,7 +33,7 @@
 	if err != nil {
 		panic(err)
 	}
-	addTable(VulnDBRequestTableName, s)
+	AddTable(VulnDBRequestTableName, s)
 }
 
 // SetUploadTime is used by Client.Upload.
@@ -65,7 +65,7 @@
 func readVulnDBRequestCounts(ctx context.Context, c *Client) (_ []*VulnDBRequestCount, err error) {
 	// Select the most recently inserted row for each date.
 	q := fmt.Sprintf("(%s) ORDER BY date DESC",
-		partitionQuery(c.FullTableName(VulnDBRequestTableName), "date", "inserted_at DESC"))
+		PartitionQuery(c.FullTableName(VulnDBRequestTableName), "date", "inserted_at DESC"))
 	iter, err := c.Query(ctx, q)
 	if err != nil {
 		return nil, err

diff --git a/internal/worker/analysis.go b/internal/worker/analysis.go
index f4fd42a..a78f1b3 100644
--- a/internal/worker/analysis.go
+++ b/internal/worker/analysis.go

@@ -17,18 +17,14 @@
 	"os/exec"
 	"path"
 	"path/filepath"
-	"sort"
 	"strings"
 
 	"cloud.google.com/go/storage"
-	"golang.org/x/exp/maps"
-	"golang.org/x/pkgsite-metrics/internal/bigquery"
+	"golang.org/x/pkgsite-metrics/internal/analysis"
 	"golang.org/x/pkgsite-metrics/internal/derrors"
 	"golang.org/x/pkgsite-metrics/internal/log"
 	"golang.org/x/pkgsite-metrics/internal/modules"
-	"golang.org/x/pkgsite-metrics/internal/queue"
 	"golang.org/x/pkgsite-metrics/internal/sandbox"
-	"golang.org/x/pkgsite-metrics/internal/scan"
 	"golang.org/x/pkgsite-metrics/internal/version"
 )
 
@@ -36,53 +32,13 @@
 	*Server
 }
 
-type analysisRequest struct {
-	scan.ModuleURLPath
-	analysisParams
-}
-
-// analysisRequest implements queue.Task so it can be put on a TaskQueue.
-var _ queue.Task = (*analysisRequest)(nil)
-
-type analysisParams struct {
-	Binary     string // name of analysis binary to run
-	Args       string // command-line arguments to binary; split on whitespace
-	ImportedBy int    // imported-by count of module in path
-	Insecure   bool   // if true, run outside sandbox
-	Serve      bool   // serve results back to client instead of writing them to BigQuery
-}
-
-func (r *analysisRequest) Name() string { return r.Binary + "_" + r.Module + "@" + r.Version }
-
-func (r *analysisRequest) Path() string { return r.ModuleURLPath.Path() }
-
-func (r *analysisRequest) Params() string {
-	return scan.FormatParams(r.analysisParams)
-}
-
-func parseAnalysisRequest(r *http.Request, prefix string) (*analysisRequest, error) {
-	mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
-	if err != nil {
-		return nil, err
-	}
-
-	ap := analysisParams{}
-	if err := scan.ParseParams(r, &ap); err != nil {
-		return nil, err
-	}
-	return &analysisRequest{
-		ModuleURLPath:  mp,
-		analysisParams: ap,
-	}, nil
-}
-
 const analysisBinariesBucketDir = "analysis-binaries"
 
 func (s *analysisServer) handleScan(w http.ResponseWriter, r *http.Request) (err error) {
 	defer derrors.Wrap(&err, "analysisServer.handleScan")
 
 	ctx := r.Context()
-	req, err := parseAnalysisRequest(r, "/analysis/scan")
+	req, err := analysis.ParseRequest(r, "/analysis/scan")
 	if err != nil {
 		return fmt.Errorf("%w: %v", derrors.InvalidArgument, err)
 	}
@@ -104,7 +60,7 @@
 
 const sandboxRoot = "/bundle/rootfs"
 
-func (s *analysisServer) scan(ctx context.Context, req *analysisRequest) (_ JSONTree, binaryHash []byte, err error) {
+func (s *analysisServer) scan(ctx context.Context, req *analysis.Request) (_ analysis.JSONTree, binaryHash []byte, err error) {
 	if req.Binary == "" {
 		return nil, nil, fmt.Errorf("%w: analysis: missing binary", derrors.InvalidArgument)
 	}
@@ -200,7 +156,7 @@
 }
 
 // Run the binary on the module.
-func runAnalysisBinary(sbox *sandbox.Sandbox, binaryPath, reqArgs, moduleDir string) (JSONTree, error) {
+func runAnalysisBinary(sbox *sandbox.Sandbox, binaryPath, reqArgs, moduleDir string) (analysis.JSONTree, error) {
 	args := []string{"-json"}
 	args = append(args, strings.Fields(reqArgs)...)
 	args = append(args, "./...")
@@ -208,7 +164,7 @@
 	if err != nil {
 		return nil, fmt.Errorf("running analysis binary %s: %s", binaryPath, derrors.IncludeStderr(err))
 	}
-	var tree JSONTree
+	var tree analysis.JSONTree
 	if err := json.Unmarshal(out, &tree); err != nil {
 		return nil, err
 	}
@@ -226,68 +182,16 @@
 	return cmd.Output()
 }
 
-type diagnosticsOrError struct {
-	Diagnostics []JSONDiagnostic
-	Error       *jsonError
-}
-
-func (de *diagnosticsOrError) UnmarshalJSON(data []byte) error {
-	if err := json.Unmarshal(data, &de.Diagnostics); err == nil {
-		return nil
-	}
-	return json.Unmarshal(data, &de.Error)
-}
-
-////////////////////////////////////////////////////////////////
-
-// These structs were copied, with minor changes, from
-// golang.org/x/tools/go/analysis/internal/analysisflags.
-
-// A JSONTree is a mapping from package ID to analysis name to result.
-// Each result is either a jsonError or a list of JSONDiagnostic.
-type JSONTree map[string]map[string]diagnosticsOrError
-
-// A JSONDiagnostic can be used to encode and decode analysis.Diagnostics to and
-// from JSON.
-type JSONDiagnostic struct {
-	Category       string             `json:"category,omitempty"`
-	Posn           string             `json:"posn"`
-	Message        string             `json:"message"`
-	SuggestedFixes []JSONSuggestedFix `json:"suggested_fixes,omitempty"`
-}
-
-// A JSONSuggestedFix describes an edit that should be applied as a whole or not
-// at all. It might contain multiple TextEdits/text_edits if the SuggestedFix
-// consists of multiple non-contiguous edits.
-type JSONSuggestedFix struct {
-	Message string         `json:"message"`
-	Edits   []JSONTextEdit `json:"edits"`
-}
-
-// A TextEdit describes the replacement of a portion of a file.
-// Start and End are zero-based half-open indices into the original byte
-// sequence of the file, and New is the new text.
-type JSONTextEdit struct {
-	Filename string `json:"filename"`
-	Start    int    `json:"start"`
-	End      int    `json:"end"`
-	New      string `json:"new"`
-}
-
-type jsonError struct {
-	Err string `json:"error"`
-}
-
-func (s *analysisServer) writeToBigQuery(ctx context.Context, req *analysisRequest, jsonTree JSONTree, binaryHash []byte) (err error) {
+func (s *analysisServer) writeToBigQuery(ctx context.Context, req *analysis.Request, jsonTree analysis.JSONTree, binaryHash []byte) (err error) {
 	defer derrors.Wrap(&err, "analysisServer.writeToBigQuery(%q, %q)", req.Module, req.Version)
-	row := &bigquery.AnalysisResult{
+	row := &analysis.Result{
 		ModulePath: req.Module,
 		BinaryName: req.Binary,
-		AnalysisWorkVersion: bigquery.AnalysisWorkVersion{
+		WorkVersion: analysis.WorkVersion{
 			BinaryVersion: hex.EncodeToString(binaryHash),
 			BinaryArgs:    req.Args,
 			WorkerVersion: s.cfg.VersionID,
-			SchemaVersion: bigquery.AnalysisSchemaVersion,
+			SchemaVersion: analysis.SchemaVersion,
 		},
 	}
 	info, err := s.proxyClient.Info(ctx, req.Module, req.Version)
@@ -300,12 +204,12 @@
 	row.SortVersion = version.ForSorting(row.Version)
 	row.CommitTime = info.Time
 
-	row.Diagnostics = jsonTreeToDiagnostics(jsonTree)
+	row.Diagnostics = analysis.JSONTreeToDiagnostics(jsonTree)
 	if s.bqClient == nil {
 		log.Infof(ctx, "bigquery disabled, not uploading")
 	} else {
 		log.Infof(ctx, "uploading to bigquery: %s", req.Path())
-		if err := s.bqClient.Upload(ctx, bigquery.AnalysisTableName, row); err != nil {
+		if err := s.bqClient.Upload(ctx, analysis.TableName, row); err != nil {
 			// This is often caused by:
 			// "Upload: googleapi: got HTTP response code 413 with body"
 			// which happens for some modules.
@@ -315,38 +219,3 @@
 	}
 	return nil
 }
-
-// jsonTreeToDiagnostics converts a jsonTree to a list of diagnostics for BigQuery.
-// It ignores the suggested fixes of the diagnostics.
-func jsonTreeToDiagnostics(jsonTree JSONTree) []*bigquery.Diagnostic {
-	var diags []*bigquery.Diagnostic
-	// Sort for determinism.
-	pkgIDs := maps.Keys(jsonTree)
-	sort.Strings(pkgIDs)
-	for _, pkgID := range pkgIDs {
-		amap := jsonTree[pkgID]
-		aNames := maps.Keys(amap)
-		sort.Strings(aNames)
-		for _, aName := range aNames {
-			diagsOrErr := amap[aName]
-			if diagsOrErr.Error != nil {
-				diags = append(diags, &bigquery.Diagnostic{
-					PackageID:    pkgID,
-					AnalyzerName: aName,
-					Error:        diagsOrErr.Error.Err,
-				})
-			} else {
-				for _, jd := range diagsOrErr.Diagnostics {
-					diags = append(diags, &bigquery.Diagnostic{
-						PackageID:    pkgID,
-						AnalyzerName: aName,
-						Category:     jd.Category,
-						Position:     jd.Posn,
-						Message:      jd.Message,
-					})
-				}
-			}
-		}
-	}
-	return diags
-}

diff --git a/internal/worker/analysis_test.go b/internal/worker/analysis_test.go
index a9ab991..c05f389 100644
--- a/internal/worker/analysis_test.go
+++ b/internal/worker/analysis_test.go

@@ -9,7 +9,7 @@
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
-	"golang.org/x/pkgsite-metrics/internal/bigquery"
+	"golang.org/x/pkgsite-metrics/internal/analysis"
 	"golang.org/x/pkgsite-metrics/internal/buildtest"
 )
 
@@ -22,17 +22,17 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	want := JSONTree{
-		"test_module": map[string]diagnosticsOrError{
-			"findcall": diagnosticsOrError{
-				Diagnostics: []JSONDiagnostic{
+	want := analysis.JSONTree{
+		"test_module": map[string]analysis.DiagnosticsOrError{
+			"findcall": analysis.DiagnosticsOrError{
+				Diagnostics: []analysis.JSONDiagnostic{
 					{
 						Posn:    "a.go:7:17",
 						Message: "call of Fact(...)",
-						SuggestedFixes: []JSONSuggestedFix{
+						SuggestedFixes: []analysis.JSONSuggestedFix{
 							{
 								Message: "Add '_TEST_'",
-								Edits: []JSONTextEdit{{
+								Edits: []analysis.JSONTextEdit{{
 									Filename: "a.go",
 									Start:    77,
 									End:      77,
@@ -55,35 +55,3 @@
 		t.Errorf("mismatch (-want, +got):\n%s", diff)
 	}
 }
-
-func TestJSONTreeToDiagnostics(t *testing.T) {
-	in := JSONTree{
-		"pkg1": {
-			"a": {
-				Diagnostics: []JSONDiagnostic{
-					{Category: "c1", Posn: "pos1", Message: "m1"},
-					{Category: "c2", Posn: "pos2", Message: "m2"},
-				},
-			},
-			"b": {
-				Diagnostics: []JSONDiagnostic{{Category: "c3", Posn: "pos3", Message: "m3"}},
-			},
-		},
-		"pkg2": {
-			"c": {
-				Error: &jsonError{Err: "fail"},
-			},
-		},
-	}
-	got := jsonTreeToDiagnostics(in)
-	want := []*bigquery.Diagnostic{
-		{PackageID: "pkg1", AnalyzerName: "a", Category: "c1", Position: "pos1", Message: "m1"},
-		{PackageID: "pkg1", AnalyzerName: "a", Category: "c2", Position: "pos2", Message: "m2"},
-		{PackageID: "pkg1", AnalyzerName: "b", Category: "c3", Position: "pos3", Message: "m3"},
-		{PackageID: "pkg2", AnalyzerName: "c", Error: "fail"},
-	}
-	if diff := cmp.Diff(want, got); diff != "" {
-		t.Errorf("mismatch (-want, +got)\n%s", diff)
-	}
-
-}
commit	d4b0c410af5dfd03d74aac8bb038bc0ef743e810	[log] [tgz]
author	Jonathan Amsterdam <jba@google.com>	Wed Mar 01 16:22:30 2023 -0500
committer	Jonathan Amsterdam <jba@google.com>	Fri Mar 03 14:07:21 2023 +0000
tree	04eb17e0d23f7728af535f13507e9e7eabe895fb
parent	d62bba630cc68cb63c3f279ad02d0139da72948f [diff]