| // Copyright 2023 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package analysis provides functionality for manipulating |
| // inputs and outputs of analysis endpoints. |
| package analysis |
| |
| import ( |
| "context" |
| "encoding/json" |
| "fmt" |
| "net/http" |
| "sort" |
| "strings" |
| "time" |
| |
| bq "cloud.google.com/go/bigquery" |
| "golang.org/x/exp/maps" |
| "golang.org/x/pkgsite-metrics/internal/bigquery" |
| "golang.org/x/pkgsite-metrics/internal/derrors" |
| "golang.org/x/pkgsite-metrics/internal/queue" |
| "golang.org/x/pkgsite-metrics/internal/scan" |
| ) |
| |
| type ScanRequest struct { |
| scan.ModuleURLPath |
| ScanParams |
| } |
| |
| type ScanParams struct { |
| Binary string // name of analysis binary to run |
| BinaryVersion string // hex-encoded binary hash |
| Args string // command-line arguments to binary; split on whitespace |
| ImportedBy int // imported-by count of module in path |
| Insecure bool // if true, run outside sandbox |
| Serve bool // serve results back to client instead of writing them to BigQuery |
| JobID string // ID of job, if non-empty |
| } |
| |
| type EnqueueParams struct { |
| Binary string // name of analysis binary to run |
| Args string // command-line arguments to binary; split on whitespace |
| Insecure bool // if true, run outside sandbox |
| Min int // minimum import-by count for a module to be included |
| File string // path to file containing modules; if missing, use DB |
| Suffix string // appended to task queue IDs to generate unique tasks |
| User string // user initiating enqueue |
| } |
| |
| // Request implements queue.Task so it can be put on a TaskQueue. |
| var _ queue.Task = (*ScanRequest)(nil) |
| |
| func (r *ScanRequest) Name() string { return r.Binary + "_" + r.Module + "@" + r.Version } |
| |
| func (r *ScanRequest) Path() string { return r.ModuleURLPath.Path() } |
| |
| func (r *ScanRequest) Params() string { |
| return scan.FormatParams(r.ScanParams) |
| } |
| |
| func ParseScanRequest(r *http.Request, prefix string) (*ScanRequest, error) { |
| mp, err := scan.ParseModuleURLPath(strings.TrimPrefix(r.URL.Path, prefix)) |
| if err != nil { |
| return nil, err |
| } |
| |
| ap := ScanParams{} |
| if err := scan.ParseParams(r, &ap); err != nil { |
| return nil, err |
| } |
| return &ScanRequest{ |
| ModuleURLPath: mp, |
| ScanParams: ap, |
| }, nil |
| } |
| |
| // These structs were copied, with minor changes, from |
| // golang.org/x/tools/go/analysis/internal/analysisflags. |
| |
| // A JSONTree is a mapping from package ID to analysis name to result. |
| // Each result is either a jsonError or a list of JSONDiagnostic. |
| type JSONTree map[string]map[string]DiagnosticsOrError |
| |
| // A JSONDiagnostic can be used to encode and decode |
| // analysis.Diagnostics to and from JSON. |
| type JSONDiagnostic struct { |
| Category string `json:"category,omitempty"` |
| Posn string `json:"posn"` |
| Message string `json:"message"` |
| SuggestedFixes []JSONSuggestedFix `json:"suggested_fixes,omitempty"` |
| } |
| |
| // A JSONSuggestedFix describes an edit that should be applied as a whole or not |
| // at all. It might contain multiple TextEdits/text_edits if the SuggestedFix |
| // consists of multiple non-contiguous edits. |
| type JSONSuggestedFix struct { |
| Message string `json:"message"` |
| Edits []JSONTextEdit `json:"edits"` |
| } |
| |
| // A JSONTextEdit describes the replacement of a portion of a file. |
| // Start and End are zero-based half-open indices into the original byte |
| // sequence of the file, and New is the new text. |
| type JSONTextEdit struct { |
| Filename string `json:"filename"` |
| Start int `json:"start"` |
| End int `json:"end"` |
| New string `json:"new"` |
| } |
| |
| type jsonError struct { |
| Err string `json:"error"` |
| } |
| |
| type DiagnosticsOrError struct { |
| Diagnostics []JSONDiagnostic |
| Error *jsonError |
| } |
| |
| func (de *DiagnosticsOrError) UnmarshalJSON(data []byte) error { |
| if err := json.Unmarshal(data, &de.Diagnostics); err == nil { |
| return nil |
| } |
| return json.Unmarshal(data, &de.Error) |
| } |
| |
| // Definitions for BigQuery. |
| |
| const TableName = "analysis" |
| |
| // Note: before modifying AnalysisResult, make sure the change |
| // is a valid schema modification. |
| // The only supported changes are: |
| // - adding a nullable or repeated column |
| // - dropping a column |
| // - changing a column from required to nullable. |
| // See https://cloud.google.com/bigquery/docs/managing-table-schemas for details. |
| |
| // Result is a row in the BigQuery analysis table. It corresponds to a |
| // result from the output for an analysis. |
| type Result struct { |
| CreatedAt time.Time `bigquery:"created_at"` |
| ModulePath string `bigquery:"module_path"` |
| Version string `bigquery:"version"` |
| SortVersion string `bigquery:"sort_version"` |
| CommitTime time.Time `bigquery:"commit_time"` |
| // The name of the analysis binary that was executed. |
| // A single binary may run multiple analyzers. |
| BinaryName string `bigquery:"binary_name"` |
| Error string `bigquery:"error"` |
| ErrorCategory string `bigquery:"error_category"` |
| WorkVersion // InferSchema flattens embedded fields |
| |
| Diagnostics []*Diagnostic `bigquery:"diagnostic"` |
| } |
| |
| func (r *Result) AddError(err error) { |
| if err == nil { |
| return |
| } |
| r.Error = err.Error() |
| r.ErrorCategory = derrors.CategorizeError(err) |
| } |
| |
| func (r *Result) SetUploadTime(t time.Time) { r.CreatedAt = t } |
| |
| // WorkVersion contains information that can be used to avoid duplicate work. |
| // Given two WorkVersion values v1 and v2 for the same module path and version, |
| // if v1 == v2 then it is not necessary to scan the module. |
| type WorkVersion struct { |
| // A hash of the binary executed. |
| BinaryVersion string `bigquery:"binary_version"` |
| BinaryArgs string `bigquery:"binary_args"` // args passed to binary |
| // The version of the currently running code. This tracks changes in the |
| // logic of module scanning and processing. |
| WorkerVersion string `bigquery:"worker_version"` |
| // The version of the bigquery schema. |
| SchemaVersion string ` bigquery:"schema_version"` |
| } |
| |
| // A Diagnostic is a single analyzer finding. |
| type Diagnostic struct { |
| // The package ID as reported by the analysis binary. |
| PackageID string `bigquery:"package_id"` |
| AnalyzerName string `bigquery:"analyzer_name"` |
| Error string `bigquery:"error"` |
| // These fields are from internal/worker.JSONDiagnostic. |
| Category string `bigquery:"category"` |
| Position string `bigquery:"position"` |
| Message string `bigquery:"message"` |
| Source bq.NullString `bigquery:"source"` |
| } |
| |
| // SchemaVersion changes whenever the analysis schema changes. |
| var SchemaVersion string |
| |
| func init() { |
| s, err := bigquery.InferSchema(Result{}) |
| if err != nil { |
| panic(err) |
| } |
| SchemaVersion = bigquery.SchemaVersion(s) |
| bigquery.AddTable(TableName, s) |
| } |
| |
| // WorkVersionKey is the key for a WorkVersion. |
| // Always compare two WorkVersions with the same key. |
| type WorkVersionKey struct { |
| Module string |
| Version string |
| Binary string |
| } |
| |
| // ReadWorkVersion reads the most recent WorkVersion in the analysis table |
| // for module_path at version for binary. |
| func ReadWorkVersion(ctx context.Context, c *bigquery.Client, module_path, version, binary string) (wv *WorkVersion, err error) { |
| defer derrors.Wrap(&err, "ReadWorkVersion") |
| |
| const qf = ` |
| SELECT module_path, version, binary_name, binary_version, binary_args, worker_version, schema_version |
| FROM %s WHERE module_path="%s" AND version="%s" AND binary_name="%s" ORDER BY created_at DESC LIMIT 1 |
| ` |
| query := fmt.Sprintf(qf, "`"+c.FullTableName(TableName)+"`", module_path, version, binary) |
| iter, err := c.Query(ctx, query) |
| if err != nil { |
| return nil, err |
| } |
| err = bigquery.ForEachRow(iter, func(r *Result) bool { |
| // Should be reached at most once. |
| wv = &r.WorkVersion |
| return true |
| }) |
| if err != nil { |
| return nil, err |
| } |
| return wv, nil |
| } |
| |
| // JSONTreeToDiagnostics converts a jsonTree to a list of diagnostics for BigQuery. |
| // It ignores the suggested fixes of the diagnostics. |
| func JSONTreeToDiagnostics(jsonTree JSONTree) []*Diagnostic { |
| var diags []*Diagnostic |
| // Sort for determinism. |
| pkgIDs := maps.Keys(jsonTree) |
| sort.Strings(pkgIDs) |
| for _, pkgID := range pkgIDs { |
| amap := jsonTree[pkgID] |
| aNames := maps.Keys(amap) |
| sort.Strings(aNames) |
| for _, aName := range aNames { |
| diagsOrErr := amap[aName] |
| if diagsOrErr.Error != nil { |
| diags = append(diags, &Diagnostic{ |
| PackageID: pkgID, |
| AnalyzerName: aName, |
| Error: diagsOrErr.Error.Err, |
| }) |
| } else { |
| for _, jd := range diagsOrErr.Diagnostics { |
| diags = append(diags, &Diagnostic{ |
| PackageID: pkgID, |
| AnalyzerName: aName, |
| Category: jd.Category, |
| Position: jd.Posn, |
| Message: jd.Message, |
| }) |
| } |
| } |
| } |
| } |
| return diags |
| } |
| |
| func ReadResults(ctx context.Context, c *bigquery.Client, binaryName, binaryVersion, binaryArgs string) (_ []*Result, err error) { |
| defer derrors.Wrap(&err, "ReadResults") |
| q := bigquery.PartitionQuery{ |
| From: c.FullTableName(TableName), |
| PartitionOn: "module_path, version", |
| Where: fmt.Sprintf("binary_name='%s' AND binary_version='%s' AND binary_args='%s'", |
| binaryName, binaryVersion, binaryArgs), |
| OrderBy: "created_at DESC", |
| } |
| iter, err := c.Query(ctx, q.String()) |
| if err != nil { |
| return nil, err |
| } |
| var res []*Result |
| err = bigquery.ForEachRow(iter, func(r *Result) bool { |
| res = append(res, r) |
| return true |
| }) |
| if err != nil { |
| return nil, err |
| } |
| return res, nil |
| } |