internal/analysis: analysis data structures
Move all the data structures and related functions for analysis,
including HTTP request data, analysis binary results, and BigQuery
structs, into a single package.
I think this makes the structure of the whole analysis part of
the system easier to understand.
It also moves internal/bigquery towards being a general helper
for BigQuery, with no application-specific code. I think that's
a good thing.
I plan to do this same refactoring for vulncheck once the code
there settles down a bit more.
Change-Id: I88eac58572ea397ab5e2ee114b795b82b656655b
Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/472657
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/internal/analysis/analysis.go b/internal/analysis/analysis.go
new file mode 100644
index 0000000..80824f4
--- /dev/null
+++ b/internal/analysis/analysis.go
@@ -0,0 +1,240 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package analysis
+
+import (
+ "context"
+ "encoding/json"
+ "net/http"
+ "sort"
+ "strings"
+ "time"
+
+ "golang.org/x/exp/maps"
+ "golang.org/x/pkgsite-metrics/internal/bigquery"
+ "golang.org/x/pkgsite-metrics/internal/derrors"
+ "golang.org/x/pkgsite-metrics/internal/queue"
+ "golang.org/x/pkgsite-metrics/internal/scan"
+)
+
+type Request struct {
+ scan.ModuleURLPath
+ QueryParams
+}
+
+type QueryParams struct {
+ Binary string // name of analysis binary to run
+ Args string // command-line arguments to binary; split on whitespace
+ ImportedBy int // imported-by count of module in path
+ Insecure bool // if true, run outside sandbox
+ Serve bool // serve results back to client instead of writing them to BigQuery
+}
+
+// Request implements queue.Task so it can be put on a TaskQueue.
+var _ queue.Task = (*Request)(nil)
+
+func (r *Request) Name() string { return r.Binary + "_" + r.Module + "@" + r.Version }
+
+func (r *Request) Path() string { return r.ModuleURLPath.Path() }
+
+func (r *Request) Params() string {
+ return scan.FormatParams(r.QueryParams)
+}
+
+func ParseRequest(r *http.Request, prefix string) (*Request, error) {
+ mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
+ if err != nil {
+ return nil, err
+ }
+
+ ap := QueryParams{}
+ if err := scan.ParseParams(r, &ap); err != nil {
+ return nil, err
+ }
+ return &Request{
+ ModuleURLPath: mp,
+ QueryParams: ap,
+ }, nil
+}
+
+// These structs were copied, with minor changes, from
+// golang.org/x/tools/go/analysis/internal/analysisflags.
+
+// A JSONTree is a mapping from package ID to analysis name to result.
+// Each result is either a jsonError or a list of JSONDiagnostic.
+type JSONTree map[string]map[string]DiagnosticsOrError
+
+// A JSONDiagnostic can be used to encode and decode analysis.Diagnostics to and
+// from JSON.
+type JSONDiagnostic struct {
+ Category string `json:"category,omitempty"`
+ Posn string `json:"posn"`
+ Message string `json:"message"`
+ SuggestedFixes []JSONSuggestedFix `json:"suggested_fixes,omitempty"`
+}
+
+// A JSONSuggestedFix describes an edit that should be applied as a whole or not
+// at all. It might contain multiple TextEdits/text_edits if the SuggestedFix
+// consists of multiple non-contiguous edits.
+type JSONSuggestedFix struct {
+ Message string `json:"message"`
+ Edits []JSONTextEdit `json:"edits"`
+}
+
+// A TextEdit describes the replacement of a portion of a file.
+// Start and End are zero-based half-open indices into the original byte
+// sequence of the file, and New is the new text.
+type JSONTextEdit struct {
+ Filename string `json:"filename"`
+ Start int `json:"start"`
+ End int `json:"end"`
+ New string `json:"new"`
+}
+
+type jsonError struct {
+ Err string `json:"error"`
+}
+
+type DiagnosticsOrError struct {
+ Diagnostics []JSONDiagnostic
+ Error *jsonError
+}
+
+func (de *DiagnosticsOrError) UnmarshalJSON(data []byte) error {
+ if err := json.Unmarshal(data, &de.Diagnostics); err == nil {
+ return nil
+ }
+ return json.Unmarshal(data, &de.Error)
+}
+
+// Definitions for BigQuery.
+
+const TableName = "analysis"
+
+// Note: before modifying AnalysisResult, make sure the change
+// is a valid schema modification.
+// The only supported changes are:
+// - adding a nullable or repeated column
+// - dropping a column
+// - changing a column from required to nullable.
+// See https://cloud.google.com/bigquery/docs/managing-table-schemas for details.
+
+// Result is a row in the BigQuery analysis table. It corresponds to a
+// result from the output for an analysis.
+type Result struct {
+ CreatedAt time.Time `bigquery:"created_at"`
+ ModulePath string `bigquery:"module_path"`
+ Version string `bigquery:"version"`
+ SortVersion string `bigquery:"sort_version"`
+ CommitTime time.Time `bigquery:"commit_time"`
+ // The name of the analysis binary that was executed.
+ // A single binary may run multiple analyzers.
+ BinaryName string `bigquery:"binary_name"`
+ Error string `bigquery:"error"`
+ ErrorCategory string `bigquery:"error_category"`
+ WorkVersion // InferSchema flattens embedded fields
+
+ Diagnostics []*Diagnostic `bigquery:"diagnostic"`
+}
+
+func (r *Result) AddError(err error) {
+ if err == nil {
+ return
+ }
+ r.Error = err.Error()
+ r.ErrorCategory = derrors.CategorizeError(err)
+}
+
+// WorkVersion contains information that can be used to avoid duplicate work.
+// Given two WorkVersion values v1 and v2 for the same module path and version,
+// if v1 == v2 then it is not necessary to scan the module.
+type WorkVersion struct {
+ // A hash of the binary executed.
+ BinaryVersion string `bigquery:"binary_version"`
+ BinaryArgs string `bigquery:"binary_args"` // args passed to binary
+ // The version of the currently running code. This tracks changes in the
+ // logic of module scanning and processing.
+ WorkerVersion string `bigquery:"worker_version"`
+ // The version of the bigquery schema.
+ SchemaVersion string ` bigquery:"schema_version"`
+}
+
+// A Diagnostic is a single analyzer finding.
+type Diagnostic struct {
+ // The package ID as reported by the analysis binary.
+ PackageID string `bigquery:"package_id"`
+ AnalyzerName string `bigquery:"analyzer_name"`
+ Error string `bigquery:"error"`
+ // These fields are from internal/worker.JSONDiagnostic.
+ Category string `bigquery:"category"`
+ Position string `bigquery:"position"`
+ Message string `bigquery:"message"`
+}
+
+// SchemaVersion changes whenever the analysis schema changes.
+var SchemaVersion string
+
+func init() {
+ s, err := bigquery.InferSchema(Result{})
+ if err != nil {
+ panic(err)
+ }
+ SchemaVersion = bigquery.SchemaVersion(s)
+ bigquery.AddTable(TableName, s)
+}
+
+// ReadWorkVersions reads the most recent WorkVersions in the analysis table.
+func ReadWorkVersions(ctx context.Context, c *bigquery.Client) (_ map[[2]string]*WorkVersion, err error) {
+ defer derrors.Wrap(&err, "ReadWorkVersions")
+ m := map[[2]string]*WorkVersion{}
+ query := bigquery.PartitionQuery(c.FullTableName(TableName), "module_path, sort_version", "created_at DESC")
+ iter, err := c.Query(ctx, query)
+ if err != nil {
+ return nil, err
+ }
+ err = bigquery.ForEachRow(iter, func(r *Result) bool {
+ m[[2]string{r.ModulePath, r.Version}] = &r.WorkVersion
+ return true
+ })
+ if err != nil {
+ return nil, err
+ }
+ return m, nil
+}
+
+// JSONTreeToDiagnostics converts a jsonTree to a list of diagnostics for BigQuery.
+// It ignores the suggested fixes of the diagnostics.
+func JSONTreeToDiagnostics(jsonTree JSONTree) []*Diagnostic {
+ var diags []*Diagnostic
+ // Sort for determinism.
+ pkgIDs := maps.Keys(jsonTree)
+ sort.Strings(pkgIDs)
+ for _, pkgID := range pkgIDs {
+ amap := jsonTree[pkgID]
+ aNames := maps.Keys(amap)
+ sort.Strings(aNames)
+ for _, aName := range aNames {
+ diagsOrErr := amap[aName]
+ if diagsOrErr.Error != nil {
+ diags = append(diags, &Diagnostic{
+ PackageID: pkgID,
+ AnalyzerName: aName,
+ Error: diagsOrErr.Error.Err,
+ })
+ } else {
+ for _, jd := range diagsOrErr.Diagnostics {
+ diags = append(diags, &Diagnostic{
+ PackageID: pkgID,
+ AnalyzerName: aName,
+ Category: jd.Category,
+ Position: jd.Posn,
+ Message: jd.Message,
+ })
+ }
+ }
+ }
+ }
+ return diags
+}
diff --git a/internal/analysis/analysis_test.go b/internal/analysis/analysis_test.go
new file mode 100644
index 0000000..dd4dcdc
--- /dev/null
+++ b/internal/analysis/analysis_test.go
@@ -0,0 +1,43 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package analysis
+
+import (
+ "testing"
+
+ "github.com/google/go-cmp/cmp"
+)
+
+func TestJSONTreeToDiagnostics(t *testing.T) {
+ in := JSONTree{
+ "pkg1": {
+ "a": {
+ Diagnostics: []JSONDiagnostic{
+ {Category: "c1", Posn: "pos1", Message: "m1"},
+ {Category: "c2", Posn: "pos2", Message: "m2"},
+ },
+ },
+ "b": {
+ Diagnostics: []JSONDiagnostic{{Category: "c3", Posn: "pos3", Message: "m3"}},
+ },
+ },
+ "pkg2": {
+ "c": {
+ Error: &jsonError{Err: "fail"},
+ },
+ },
+ }
+ got := JSONTreeToDiagnostics(in)
+ want := []*Diagnostic{
+ {PackageID: "pkg1", AnalyzerName: "a", Category: "c1", Position: "pos1", Message: "m1"},
+ {PackageID: "pkg1", AnalyzerName: "a", Category: "c2", Position: "pos2", Message: "m2"},
+ {PackageID: "pkg1", AnalyzerName: "b", Category: "c3", Position: "pos3", Message: "m3"},
+ {PackageID: "pkg2", AnalyzerName: "c", Error: "fail"},
+ }
+ if diff := cmp.Diff(want, got); diff != "" {
+ t.Errorf("mismatch (-want, +got)\n%s", diff)
+ }
+
+}
diff --git a/internal/bigquery/analysis.go b/internal/bigquery/analysis.go
deleted file mode 100644
index 6b796aa..0000000
--- a/internal/bigquery/analysis.go
+++ /dev/null
@@ -1,106 +0,0 @@
-// Copyright 2023 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package bigquery
-
-import (
- "context"
- "time"
-
- bq "cloud.google.com/go/bigquery"
- "golang.org/x/pkgsite-metrics/internal/derrors"
-)
-
-const AnalysisTableName = "analysis"
-
-// Note: before modifying AnalysisResult, make sure the change
-// is a valid schema modification.
-// The only supported changes are:
-// - adding a nullable or repeated column
-// - dropping a column
-// - changing a column from required to nullable.
-// See https://cloud.google.com/bigquery/docs/managing-table-schemas for details.
-
-// AnalysisResult is a row in the BigQuery analysis table. It corresponds to a
-// result from the output for an analysis.
-type AnalysisResult struct {
- CreatedAt time.Time `bigquery:"created_at"`
- ModulePath string `bigquery:"module_path"`
- Version string `bigquery:"version"`
- SortVersion string `bigquery:"sort_version"`
- CommitTime time.Time `bigquery:"commit_time"`
- // The name of the analysis binary that was executed.
- // A single binary may run multiple analyzers.
- BinaryName string `bigquery:"binary_name"`
- Error string `bigquery:"error"`
- ErrorCategory string `bigquery:"error_category"`
- AnalysisWorkVersion // InferSchema flattens embedded fields
-
- Diagnostics []*Diagnostic `bigquery:"diagnostic"`
-}
-
-func (r *AnalysisResult) AddError(err error) {
- if err == nil {
- return
- }
- r.Error = err.Error()
- r.ErrorCategory = derrors.CategorizeError(err)
-}
-
-// AnalysisWorkVersion contains information that can be used to avoid duplicate work.
-// Given two AnalysisWorkVersion values v1 and v2 for the same module path and version,
-// if v1 == v2 then it is not necessary to scan the module.
-type AnalysisWorkVersion struct {
- // A hash of the binary executed.
- BinaryVersion string `bigquery:"binary_version"`
- BinaryArgs string `bigquery:"binary_args"` // args passed to binary
- // The version of the currently running code. This tracks changes in the
- // logic of module scanning and processing.
- WorkerVersion string `bigquery:"worker_version"`
- // The version of the bigquery schema.
- SchemaVersion string ` bigquery:"schema_version"`
-}
-
-// A Diagnostic is a single analyzer finding.
-type Diagnostic struct {
- // The package ID as reported by the analysis binary.
- PackageID string `bigquery:"package_id"`
- AnalyzerName string `bigquery:"analyzer_name"`
- Error string `bigquery:"error"`
- // These fields are from internal/worker.JSONDiagnostic.
- Category string `bigquery:"category"`
- Position string `bigquery:"position"`
- Message string `bigquery:"message"`
-}
-
-// AnalysisSchemaVersion changes whenever the analysis schema changes.
-var AnalysisSchemaVersion string
-
-func init() {
- s, err := bq.InferSchema(AnalysisResult{})
- if err != nil {
- panic(err)
- }
- AnalysisSchemaVersion = schemaVersion(s)
- addTable(AnalysisTableName, s)
-}
-
-// ReadAnalysisWorkVersions reads the most recent WorkVersions in the analysis table.
-func ReadAnalysisWorkVersions(ctx context.Context, c *Client) (_ map[[2]string]*AnalysisWorkVersion, err error) {
- defer derrors.Wrap(&err, "ReadAnalysisWorkVersions")
- m := map[[2]string]*AnalysisWorkVersion{}
- query := partitionQuery(c.FullTableName(AnalysisTableName), "module_path, sort_version", "created_at DESC")
- iter, err := c.Query(ctx, query)
- if err != nil {
- return nil, err
- }
- err = ForEachRow(iter, func(r *AnalysisResult) bool {
- m[[2]string{r.ModulePath, r.Version}] = &r.AnalysisWorkVersion
- return true
- })
- if err != nil {
- return nil, err
- }
- return m, nil
-}
diff --git a/internal/bigquery/bigquery.go b/internal/bigquery/bigquery.go
index b87fdea..f09e0fe 100644
--- a/internal/bigquery/bigquery.go
+++ b/internal/bigquery/bigquery.go
@@ -239,9 +239,9 @@
return bq.NullTime{Time: civil.TimeOf(t), Valid: true}
}
-// schemaVersion computes a relatively short string from a schema, such that
+// SchemaVersion computes a relatively short string from a schema, such that
// different schemas result in different strings with high probability.
-func schemaVersion(schema bq.Schema) string {
+func SchemaVersion(schema bq.Schema) string {
hash := sha256.Sum256([]byte(schemaString(schema)))
return hex.EncodeToString(hash[:])
}
@@ -275,7 +275,7 @@
tables = map[string]bq.Schema{}
)
-func addTable(tableID string, s bq.Schema) {
+func AddTable(tableID string, s bq.Schema) {
tableMu.Lock()
defer tableMu.Unlock()
tables[tableID] = s
@@ -298,7 +298,7 @@
return tableIDs
}
-// partitionQuery returns a query that returns one row for each distinct value
+// PartitionQuery returns a query that returns one row for each distinct value
// of partitionColumn in tableName.
// The selected row will be the first one according to the orderings, which
// should be comma-separated ORDER BY clauses.
@@ -313,7 +313,7 @@
//
// (BigQuery SQL has no DISTINCT ON feature and doesn't allow columns of type RECORD
// in queries with DISTINCT, so we have to take this approach.)
-func partitionQuery(tableName, partitionColumn, orderings string) string {
+func PartitionQuery(tableName, partitionColumn, orderings string) string {
// This query first organizes the table rows into windows that have the same partitionColumn.
// The rows in each window are sorted by the given orderings.
// They are then assigned numbers, where 1 is the first row in the window.
@@ -334,3 +334,7 @@
return fmt.Sprintf(qf, partitionColumn, orderings, "`"+tableName+"`")
}
+
+// Copy InferSchema so users don't have to import cloud.google.com/go/bigquery
+// just to get it.
+var InferSchema = bq.InferSchema
diff --git a/internal/bigquery/bigquery_test.go b/internal/bigquery/bigquery_test.go
index 0715f43..908ac14 100644
--- a/internal/bigquery/bigquery_test.go
+++ b/internal/bigquery/bigquery_test.go
@@ -106,7 +106,7 @@
t.Run("latest", func(t *testing.T) {
latestTableID := VulncheckTableName + "-latest"
- addTable(latestTableID, tableSchema(VulncheckTableName))
+ AddTable(latestTableID, tableSchema(VulncheckTableName))
must(client.CreateTable(ctx, latestTableID))
defer func() { must(client.Table(latestTableID).Delete(ctx)) }()
@@ -167,7 +167,7 @@
// Test InsertVulncheckResults
reportTableID := latestTableID + "-report"
- addTable(reportTableID, tableSchema(VulncheckTableName+"-report"))
+ AddTable(reportTableID, tableSchema(VulncheckTableName+"-report"))
reportTable := client.dataset.Table(reportTableID)
// Table is created by InsertVulncheckResults.
defer func() { must(reportTable.Delete(ctx)) }()
diff --git a/internal/bigquery/vulncheck.go b/internal/bigquery/vulncheck.go
index a583d75..51b58ac 100644
--- a/internal/bigquery/vulncheck.go
+++ b/internal/bigquery/vulncheck.go
@@ -108,15 +108,15 @@
if err != nil {
panic(err)
}
- VulncheckSchemaVersion = schemaVersion(s)
- addTable(VulncheckTableName, s)
+ VulncheckSchemaVersion = SchemaVersion(s)
+ AddTable(VulncheckTableName, s)
}
// ReadVulncheckWorkVersions reads the most recent WorkVersions in the vulncheck table.
func ReadVulncheckWorkVersions(ctx context.Context, c *Client) (_ map[[2]string]*VulncheckWorkVersion, err error) {
defer derrors.Wrap(&err, "ReadVulncheckWorkVersions")
m := map[[2]string]*VulncheckWorkVersion{}
- query := partitionQuery(c.FullTableName(VulncheckTableName), "module_path, sort_version", "created_at DESC")
+ query := PartitionQuery(c.FullTableName(VulncheckTableName), "module_path, sort_version", "created_at DESC")
iter, err := c.Query(ctx, query)
if err != nil {
return nil, err
@@ -151,7 +151,7 @@
func fetchVulncheckResults(ctx context.Context, c *Client, tableName string) (rows []*VulnResult, err error) {
name := c.FullTableName(tableName)
- query := partitionQuery(name, "module_path, scan_mode", orderByClauses)
+ query := PartitionQuery(name, "module_path, scan_mode", orderByClauses)
log.Infof(ctx, "running latest query on %s", name)
iter, err := c.Query(ctx, query)
if err != nil {
@@ -189,7 +189,7 @@
if err != nil {
panic(err)
}
- addTable(VulncheckTableName+"-report", s)
+ AddTable(VulncheckTableName+"-report", s)
}
func InsertVulncheckResults(ctx context.Context, c *Client, results []*VulnResult, date civil.Date, allowDuplicates bool) (err error) {
diff --git a/internal/bigquery/vulndb_requests.go b/internal/bigquery/vulndb_requests.go
index b9bc955..6b1d583 100644
--- a/internal/bigquery/vulndb_requests.go
+++ b/internal/bigquery/vulndb_requests.go
@@ -33,7 +33,7 @@
if err != nil {
panic(err)
}
- addTable(VulnDBRequestTableName, s)
+ AddTable(VulnDBRequestTableName, s)
}
// SetUploadTime is used by Client.Upload.
@@ -65,7 +65,7 @@
func readVulnDBRequestCounts(ctx context.Context, c *Client) (_ []*VulnDBRequestCount, err error) {
// Select the most recently inserted row for each date.
q := fmt.Sprintf("(%s) ORDER BY date DESC",
- partitionQuery(c.FullTableName(VulnDBRequestTableName), "date", "inserted_at DESC"))
+ PartitionQuery(c.FullTableName(VulnDBRequestTableName), "date", "inserted_at DESC"))
iter, err := c.Query(ctx, q)
if err != nil {
return nil, err
diff --git a/internal/worker/analysis.go b/internal/worker/analysis.go
index f4fd42a..a78f1b3 100644
--- a/internal/worker/analysis.go
+++ b/internal/worker/analysis.go
@@ -17,18 +17,14 @@
"os/exec"
"path"
"path/filepath"
- "sort"
"strings"
"cloud.google.com/go/storage"
- "golang.org/x/exp/maps"
- "golang.org/x/pkgsite-metrics/internal/bigquery"
+ "golang.org/x/pkgsite-metrics/internal/analysis"
"golang.org/x/pkgsite-metrics/internal/derrors"
"golang.org/x/pkgsite-metrics/internal/log"
"golang.org/x/pkgsite-metrics/internal/modules"
- "golang.org/x/pkgsite-metrics/internal/queue"
"golang.org/x/pkgsite-metrics/internal/sandbox"
- "golang.org/x/pkgsite-metrics/internal/scan"
"golang.org/x/pkgsite-metrics/internal/version"
)
@@ -36,53 +32,13 @@
*Server
}
-type analysisRequest struct {
- scan.ModuleURLPath
- analysisParams
-}
-
-// analysisRequest implements queue.Task so it can be put on a TaskQueue.
-var _ queue.Task = (*analysisRequest)(nil)
-
-type analysisParams struct {
- Binary string // name of analysis binary to run
- Args string // command-line arguments to binary; split on whitespace
- ImportedBy int // imported-by count of module in path
- Insecure bool // if true, run outside sandbox
- Serve bool // serve results back to client instead of writing them to BigQuery
-}
-
-func (r *analysisRequest) Name() string { return r.Binary + "_" + r.Module + "@" + r.Version }
-
-func (r *analysisRequest) Path() string { return r.ModuleURLPath.Path() }
-
-func (r *analysisRequest) Params() string {
- return scan.FormatParams(r.analysisParams)
-}
-
-func parseAnalysisRequest(r *http.Request, prefix string) (*analysisRequest, error) {
- mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix))
- if err != nil {
- return nil, err
- }
-
- ap := analysisParams{}
- if err := scan.ParseParams(r, &ap); err != nil {
- return nil, err
- }
- return &analysisRequest{
- ModuleURLPath: mp,
- analysisParams: ap,
- }, nil
-}
-
const analysisBinariesBucketDir = "analysis-binaries"
func (s *analysisServer) handleScan(w http.ResponseWriter, r *http.Request) (err error) {
defer derrors.Wrap(&err, "analysisServer.handleScan")
ctx := r.Context()
- req, err := parseAnalysisRequest(r, "/analysis/scan")
+ req, err := analysis.ParseRequest(r, "/analysis/scan")
if err != nil {
return fmt.Errorf("%w: %v", derrors.InvalidArgument, err)
}
@@ -104,7 +60,7 @@
const sandboxRoot = "/bundle/rootfs"
-func (s *analysisServer) scan(ctx context.Context, req *analysisRequest) (_ JSONTree, binaryHash []byte, err error) {
+func (s *analysisServer) scan(ctx context.Context, req *analysis.Request) (_ analysis.JSONTree, binaryHash []byte, err error) {
if req.Binary == "" {
return nil, nil, fmt.Errorf("%w: analysis: missing binary", derrors.InvalidArgument)
}
@@ -200,7 +156,7 @@
}
// Run the binary on the module.
-func runAnalysisBinary(sbox *sandbox.Sandbox, binaryPath, reqArgs, moduleDir string) (JSONTree, error) {
+func runAnalysisBinary(sbox *sandbox.Sandbox, binaryPath, reqArgs, moduleDir string) (analysis.JSONTree, error) {
args := []string{"-json"}
args = append(args, strings.Fields(reqArgs)...)
args = append(args, "./...")
@@ -208,7 +164,7 @@
if err != nil {
return nil, fmt.Errorf("running analysis binary %s: %s", binaryPath, derrors.IncludeStderr(err))
}
- var tree JSONTree
+ var tree analysis.JSONTree
if err := json.Unmarshal(out, &tree); err != nil {
return nil, err
}
@@ -226,68 +182,16 @@
return cmd.Output()
}
-type diagnosticsOrError struct {
- Diagnostics []JSONDiagnostic
- Error *jsonError
-}
-
-func (de *diagnosticsOrError) UnmarshalJSON(data []byte) error {
- if err := json.Unmarshal(data, &de.Diagnostics); err == nil {
- return nil
- }
- return json.Unmarshal(data, &de.Error)
-}
-
-////////////////////////////////////////////////////////////////
-
-// These structs were copied, with minor changes, from
-// golang.org/x/tools/go/analysis/internal/analysisflags.
-
-// A JSONTree is a mapping from package ID to analysis name to result.
-// Each result is either a jsonError or a list of JSONDiagnostic.
-type JSONTree map[string]map[string]diagnosticsOrError
-
-// A JSONDiagnostic can be used to encode and decode analysis.Diagnostics to and
-// from JSON.
-type JSONDiagnostic struct {
- Category string `json:"category,omitempty"`
- Posn string `json:"posn"`
- Message string `json:"message"`
- SuggestedFixes []JSONSuggestedFix `json:"suggested_fixes,omitempty"`
-}
-
-// A JSONSuggestedFix describes an edit that should be applied as a whole or not
-// at all. It might contain multiple TextEdits/text_edits if the SuggestedFix
-// consists of multiple non-contiguous edits.
-type JSONSuggestedFix struct {
- Message string `json:"message"`
- Edits []JSONTextEdit `json:"edits"`
-}
-
-// A TextEdit describes the replacement of a portion of a file.
-// Start and End are zero-based half-open indices into the original byte
-// sequence of the file, and New is the new text.
-type JSONTextEdit struct {
- Filename string `json:"filename"`
- Start int `json:"start"`
- End int `json:"end"`
- New string `json:"new"`
-}
-
-type jsonError struct {
- Err string `json:"error"`
-}
-
-func (s *analysisServer) writeToBigQuery(ctx context.Context, req *analysisRequest, jsonTree JSONTree, binaryHash []byte) (err error) {
+func (s *analysisServer) writeToBigQuery(ctx context.Context, req *analysis.Request, jsonTree analysis.JSONTree, binaryHash []byte) (err error) {
defer derrors.Wrap(&err, "analysisServer.writeToBigQuery(%q, %q)", req.Module, req.Version)
- row := &bigquery.AnalysisResult{
+ row := &analysis.Result{
ModulePath: req.Module,
BinaryName: req.Binary,
- AnalysisWorkVersion: bigquery.AnalysisWorkVersion{
+ WorkVersion: analysis.WorkVersion{
BinaryVersion: hex.EncodeToString(binaryHash),
BinaryArgs: req.Args,
WorkerVersion: s.cfg.VersionID,
- SchemaVersion: bigquery.AnalysisSchemaVersion,
+ SchemaVersion: analysis.SchemaVersion,
},
}
info, err := s.proxyClient.Info(ctx, req.Module, req.Version)
@@ -300,12 +204,12 @@
row.SortVersion = version.ForSorting(row.Version)
row.CommitTime = info.Time
- row.Diagnostics = jsonTreeToDiagnostics(jsonTree)
+ row.Diagnostics = analysis.JSONTreeToDiagnostics(jsonTree)
if s.bqClient == nil {
log.Infof(ctx, "bigquery disabled, not uploading")
} else {
log.Infof(ctx, "uploading to bigquery: %s", req.Path())
- if err := s.bqClient.Upload(ctx, bigquery.AnalysisTableName, row); err != nil {
+ if err := s.bqClient.Upload(ctx, analysis.TableName, row); err != nil {
// This is often caused by:
// "Upload: googleapi: got HTTP response code 413 with body"
// which happens for some modules.
@@ -315,38 +219,3 @@
}
return nil
}
-
-// jsonTreeToDiagnostics converts a jsonTree to a list of diagnostics for BigQuery.
-// It ignores the suggested fixes of the diagnostics.
-func jsonTreeToDiagnostics(jsonTree JSONTree) []*bigquery.Diagnostic {
- var diags []*bigquery.Diagnostic
- // Sort for determinism.
- pkgIDs := maps.Keys(jsonTree)
- sort.Strings(pkgIDs)
- for _, pkgID := range pkgIDs {
- amap := jsonTree[pkgID]
- aNames := maps.Keys(amap)
- sort.Strings(aNames)
- for _, aName := range aNames {
- diagsOrErr := amap[aName]
- if diagsOrErr.Error != nil {
- diags = append(diags, &bigquery.Diagnostic{
- PackageID: pkgID,
- AnalyzerName: aName,
- Error: diagsOrErr.Error.Err,
- })
- } else {
- for _, jd := range diagsOrErr.Diagnostics {
- diags = append(diags, &bigquery.Diagnostic{
- PackageID: pkgID,
- AnalyzerName: aName,
- Category: jd.Category,
- Position: jd.Posn,
- Message: jd.Message,
- })
- }
- }
- }
- }
- return diags
-}
diff --git a/internal/worker/analysis_test.go b/internal/worker/analysis_test.go
index a9ab991..c05f389 100644
--- a/internal/worker/analysis_test.go
+++ b/internal/worker/analysis_test.go
@@ -9,7 +9,7 @@
"testing"
"github.com/google/go-cmp/cmp"
- "golang.org/x/pkgsite-metrics/internal/bigquery"
+ "golang.org/x/pkgsite-metrics/internal/analysis"
"golang.org/x/pkgsite-metrics/internal/buildtest"
)
@@ -22,17 +22,17 @@
if err != nil {
t.Fatal(err)
}
- want := JSONTree{
- "test_module": map[string]diagnosticsOrError{
- "findcall": diagnosticsOrError{
- Diagnostics: []JSONDiagnostic{
+ want := analysis.JSONTree{
+ "test_module": map[string]analysis.DiagnosticsOrError{
+ "findcall": analysis.DiagnosticsOrError{
+ Diagnostics: []analysis.JSONDiagnostic{
{
Posn: "a.go:7:17",
Message: "call of Fact(...)",
- SuggestedFixes: []JSONSuggestedFix{
+ SuggestedFixes: []analysis.JSONSuggestedFix{
{
Message: "Add '_TEST_'",
- Edits: []JSONTextEdit{{
+ Edits: []analysis.JSONTextEdit{{
Filename: "a.go",
Start: 77,
End: 77,
@@ -55,35 +55,3 @@
t.Errorf("mismatch (-want, +got):\n%s", diff)
}
}
-
-func TestJSONTreeToDiagnostics(t *testing.T) {
- in := JSONTree{
- "pkg1": {
- "a": {
- Diagnostics: []JSONDiagnostic{
- {Category: "c1", Posn: "pos1", Message: "m1"},
- {Category: "c2", Posn: "pos2", Message: "m2"},
- },
- },
- "b": {
- Diagnostics: []JSONDiagnostic{{Category: "c3", Posn: "pos3", Message: "m3"}},
- },
- },
- "pkg2": {
- "c": {
- Error: &jsonError{Err: "fail"},
- },
- },
- }
- got := jsonTreeToDiagnostics(in)
- want := []*bigquery.Diagnostic{
- {PackageID: "pkg1", AnalyzerName: "a", Category: "c1", Position: "pos1", Message: "m1"},
- {PackageID: "pkg1", AnalyzerName: "a", Category: "c2", Position: "pos2", Message: "m2"},
- {PackageID: "pkg1", AnalyzerName: "b", Category: "c3", Position: "pos3", Message: "m3"},
- {PackageID: "pkg2", AnalyzerName: "c", Error: "fail"},
- }
- if diff := cmp.Diff(want, got); diff != "" {
- t.Errorf("mismatch (-want, +got)\n%s", diff)
- }
-
-}