internal/sarif,cmd/govulncheck: publicize sarif

Also, add and improve documentation.

Fixes golang/go#61347

Change-Id: Ia631615b40b9d23be6efa615b573be6c465fa36b
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/580955
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Ian Cottrell <iancottrell@google.com>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/cmd/govulncheck/doc.go b/cmd/govulncheck/doc.go
index 18229fa..dbfb957 100644
--- a/cmd/govulncheck/doc.go
+++ b/cmd/govulncheck/doc.go
@@ -9,10 +9,10 @@
 
 By default, govulncheck makes requests to the Go vulnerability database at
 https://vuln.go.dev. Requests to the vulnerability database contain only module
-paths, not code or other properties of your program. See
-https://vuln.go.dev/privacy.html for more. Use the -db flag to specify a
-different database, which must implement the specification at
-https://go.dev/security/vuln/database.
+paths with vulnerabilities already known to the database, not code or other
+properties of your program. See https://vuln.go.dev/privacy.html for more.
+Use the -db flag to specify a different database, which must implement the
+specification at https://go.dev/security/vuln/database.
 
 Govulncheck looks for vulnerabilities in Go programs using a specific build
 configuration. For analyzing source code, that configuration is the Go version
@@ -59,12 +59,21 @@
 smaller than the binary, that can also be passed to govulncheck as an argument with
 '-mode binary'. The users should not rely on the contents or representation of the blob.
 
-Govulncheck exits successfully (exit code 0) if there are no vulnerabilities,
-and exits unsuccessfully if there are. It also exits successfully if the -json flag
-(or '-format json') is provided, regardless of the number of detected vulnerabilities.
+# Integrations
 
 Govulncheck supports streaming JSON. For more details, please see [golang.org/x/vuln/internal/govulncheck].
 
+Govulncheck also supports Static Analysis Results Interchange Format (SARIF) output
+format, following the specification at https://www.oasis-open.org/committees/tc_home.php?wg_abbrev=sarif.
+For more details, please see [golang.org/x/vuln/internal/sarif].
+
+# Exit codes
+
+Govulncheck exits successfully (exit code 0) if there are no vulnerabilities,
+and exits unsuccessfully if there are. It also exits successfully if the
+'format -json' ('-json') or '-format sarif' is provided, regardless of the number
+of detected vulnerabilities.
+
 # Limitations
 
 Govulncheck has these limitations:
diff --git a/cmd/govulncheck/testdata/common/testfiles/usage/usage.ct b/cmd/govulncheck/testdata/common/testfiles/usage/usage.ct
index 26bea5a..7d5a143 100644
--- a/cmd/govulncheck/testdata/common/testfiles/usage/usage.ct
+++ b/cmd/govulncheck/testdata/common/testfiles/usage/usage.ct
@@ -14,7 +14,7 @@
     	vulnerability database url (default "https://vuln.go.dev")
   -format value
     	specify format output
-    	The supported values are 'text' and 'json' (default 'text')
+    	The supported values are 'text', 'json', and 'sarif' (default 'text')
   -json
     	output JSON (Go compatible legacy flag, see format flag)
   -mode value
diff --git a/internal/sarif/sarif.go b/internal/sarif/sarif.go
index fb1c5b7..b434f1d 100644
--- a/internal/sarif/sarif.go
+++ b/internal/sarif/sarif.go
@@ -8,10 +8,41 @@
 // The implementation covers the subset of the specification available
 // at https://www.oasis-open.org/committees/tc_home.php?wg_abbrev=sarif.
 //
-// If govulncheck is used in source mode, the locations will include a
-// physical location implemented as a path relative to either the source
-// module (%SRCROOT%), Go root (%GOROOT%), or Go module cache (%GOMODCACHE%)
-// URI base id.
+// The sarif encoding models govulncheck findings as Results. Each
+// Result encodes findings for a unique OSV entry at the most precise
+// detected level only. CodeFlows summarize call stacks, similar to govulncheck
+// textual output, while Stacks contain call stack information verbatim.
+//
+// The result Levels are defined by the govulncheck.ScanLevel and the most
+// precise level at which the finding was detected. Result error is produced
+// when the finding level matches the user desired level of scan precision;
+// all other finding levels are then classified as progressively weaker.
+// For instance, if the user specified symbol scan level and govulncheck
+// detected a use of a vulnerable symbol, then the Result will have error
+// Level. If the symbol was not used but its package was imported, then the
+// Result Level is warning, and so on.
+//
+// Each Result is attached to the first line of the go.mod file. Other
+// ArtifactLocations are paths relative to their enclosing modules.
+// Similar to JSON output format, this makes govulncheck sarif locations
+// portable.
+//
+// The relative paths in PhysicalLocations also come with a URIBaseID offset.
+// Paths for the source module analyzed, the Go standard library, and third-party
+// dependencies are relative to %SRCROOT%, %GOROOT%, and %GOMODCACHE% offsets,
+// resp. We note that the URIBaseID offsets are not explicitly defined in
+// the sarif output. It is the clients responsibility to set them to resolve
+// paths at their local machines.
+//
+// All paths use "/" delimiter for portability.
+//
+// Properties field of a Tool.Driver is a govulncheck.Config used for the
+// invocation of govulncheck producing the Results. Properties field of
+// a Rule contains information on CVE and GHSA aliases for the corresponding
+// rule OSV. Clients can use this information to, say, supress and filter
+// vulnerabilities.
+//
+// Please see the definition of types below for more information.
 package sarif
 
 import "golang.org/x/vuln/internal/govulncheck"
@@ -34,7 +65,7 @@
 type Run struct {
 	Tool Tool `json:"tool,omitempty"`
 	// Results contain govulncheck findings. There should be exactly one
-	// Result per a detected OSV.
+	// Result per a detected use of an OSV.
 	Results []Result `json:"results,omitempty"`
 }
 
@@ -45,11 +76,11 @@
 
 // Driver provides details about the govulncheck binary being executed.
 type Driver struct {
-	// Name should be "govulncheck"
+	// Name is "govulncheck"
 	Name string `json:"name,omitempty"`
-	// Version should be the govulncheck version
+	// Version is the govulncheck version
 	Version string `json:"semanticVersion,omitempty"`
-	// InformationURI should point to the description of govulncheck tool
+	// InformationURI points to the description of govulncheck tool
 	InformationURI string `json:"informationUri,omitempty"`
 	// Properties are govulncheck run metadata, such as vuln db, Go version, etc.
 	Properties govulncheck.Config `json:"properties,omitempty"`
@@ -66,9 +97,9 @@
 	FullDescription  Description `json:"fullDescription,omitempty"`
 	Help             Description `json:"help,omitempty"`
 	HelpURI          string      `json:"helpUri,omitempty"`
-	// Properties should contain OSV.Aliases (CVEs and GHSAs) as tags.
+	// Properties contain OSV.Aliases (CVEs and GHSAs) as tags.
 	// Consumers of govulncheck SARIF can use these tags to filter
-	// results based on, say, CVEs.
+	// results.
 	Properties RuleTags `json:"properties,omitempty"`
 }
 
@@ -85,27 +116,28 @@
 
 // Result is a set of govulncheck findings for an OSV. For call stack
 // mode, it will contain call stacks for the OSV. There is exactly
-// one Result per detected OSV. Only findings at the lowest possible
-// level appear in the Result. For instance, if there are findings
-// with call stacks for an OSV, those findings will be in the Result,
-// but not the “imports” and “requires” findings for the same OSV.
+// one Result per detected OSV. Only findings at the most precise
+// detected level appear in the Result. For instance, if there are
+// symbol findings for an OSV, those findings will be in the Result,
+// but not the package and module level findings for the same OSV.
 type Result struct {
 	// RuleID is the Rule.ID/OSV producing the finding.
 	RuleID string `json:"ruleId,omitempty"`
-	// Level is one of "error", "warning", "note", and "none".
+	// Level is one of "error", "warning", and "note".
 	Level string `json:"level,omitempty"`
 	// Message explains the overall findings.
 	Message Description `json:"message,omitempty"`
-	// Locations to which the findings are associated.
+	// Locations to which the findings are associated. Always
+	// a single location pointing to the first line of the go.mod
+	// file. The path to the file is "go.mod".
 	Locations []Location `json:"locations,omitempty"`
-	// CodeFlows can encode call stacks produced by govulncheck.
+	// CodeFlows summarize call stacks produced by govulncheck.
 	CodeFlows []CodeFlow `json:"codeFlows,omitempty"`
-	// Stacks can encode call stacks produced by govulncheck.
+	// Stacks encode call stacks produced by govulncheck.
 	Stacks []Stack `json:"stacks,omitempty"`
-	// TODO: support Fixes when integration points to the same
 }
 
-// CodeFlow describes a detected offending flow of information in terms of
+// CodeFlow summarizes a detected offending flow of information in terms of
 // code locations. More precisely, it can contain several related information
 // flows, keeping them together. In govulncheck, those can be all call stacks
 // for, say, a particular symbol or package.
@@ -128,8 +160,6 @@
 	Module string `json:"module,omitempty"`
 	// Location also contains a Message field.
 	Location Location `json:"location,omitempty"`
-	// Can also contain Stack field that encodes a call stack
-	// leading to this thread flow location.
 }
 
 // Stack is a sequence of frames and can encode a govulncheck call stack.
@@ -167,12 +197,10 @@
 
 // ArtifactLocation is a path to an offending file.
 type ArtifactLocation struct {
-	// URI is a path to the artifact. If URIBaseID is empty, then
-	// URI is absolute and it needs to start with, say, "file://."
+	// URI is a path relative to URIBaseID.
 	URI string `json:"uri,omitempty"`
-	// URIBaseID is offset for URI. An example is %SRCROOT%, used by
-	// Github Code Scanning to point to the root of the target repo.
-	// Its value must be defined in URIBaseIDs of a Run.
+	// URIBaseID is offset for URI, one of %SRCROOT%, %GOROOT%,
+	// and %GOMODCACHE%.
 	URIBaseID string `json:"uriBaseId,omitempty"`
 }
 
diff --git a/internal/scan/flags.go b/internal/scan/flags.go
index 482620f..210e1b9 100644
--- a/internal/scan/flags.go
+++ b/internal/scan/flags.go
@@ -42,7 +42,7 @@
 	flags.Var(&modeFlag, "mode", "supports 'source', 'binary', and 'extract' (default 'source')")
 	flags.Var(&cfg.tags, "tags", "comma-separated `list` of build tags")
 	flags.Var(&cfg.show, "show", "enable display of additional information specified by the comma separated `list`\nThe supported values are 'traces','color', 'version', and 'verbose'")
-	flags.Var(&cfg.format, "format", "specify format output\nThe supported values are 'text' and 'json' (default 'text')")
+	flags.Var(&cfg.format, "format", "specify format output\nThe supported values are 'text', 'json', and 'sarif' (default 'text')")
 	flags.BoolVar(&version, "version", false, "print the version information")
 	flags.Var(&scanFlag, "scan", "set the scanning level desired, one of 'module', 'package', or 'symbol' (default 'symbol')")