| // Copyright 2023 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| package sarif |
| |
| import ( |
| "encoding/json" |
| "fmt" |
| "io" |
| "sort" |
| |
| "golang.org/x/vuln/internal" |
| "golang.org/x/vuln/internal/govulncheck" |
| "golang.org/x/vuln/internal/osv" |
| "golang.org/x/vuln/internal/traces" |
| ) |
| |
| // handler for sarif output. |
| type handler struct { |
| w io.Writer |
| cfg *govulncheck.Config |
| osvs map[string]*osv.Entry |
| // findings contains same-level findings for an |
| // OSV at the most precise level of granularity |
| // available. This means, for instance, that if |
| // an osv is indeed called, then all findings for |
| // the osv will have call stack info. |
| findings map[string][]*govulncheck.Finding |
| } |
| |
| func NewHandler(w io.Writer) *handler { |
| return &handler{ |
| w: w, |
| osvs: make(map[string]*osv.Entry), |
| findings: make(map[string][]*govulncheck.Finding), |
| } |
| } |
| func (h *handler) Config(c *govulncheck.Config) error { |
| h.cfg = c |
| return nil |
| } |
| |
| func (h *handler) Progress(p *govulncheck.Progress) error { |
| return nil // not needed by sarif |
| } |
| |
| func (h *handler) OSV(e *osv.Entry) error { |
| h.osvs[e.ID] = e |
| return nil |
| } |
| |
| // moreSpecific favors a call finding over a non-call |
| // finding and a package finding over a module finding. |
| func moreSpecific(f1, f2 *govulncheck.Finding) int { |
| if len(f1.Trace) > 1 && len(f2.Trace) > 1 { |
| // Both are call stack findings. |
| return 0 |
| } |
| if len(f1.Trace) > 1 { |
| return -1 |
| } |
| if len(f2.Trace) > 1 { |
| return 1 |
| } |
| |
| fr1, fr2 := f1.Trace[0], f2.Trace[0] |
| if fr1.Function != "" && fr2.Function == "" { |
| return -1 |
| } |
| if fr1.Function == "" && fr2.Function != "" { |
| return 1 |
| } |
| if fr1.Package != "" && fr2.Package == "" { |
| return -1 |
| } |
| if fr1.Package == "" && fr2.Package != "" { |
| return -1 |
| } |
| return 0 // findings always have module info |
| } |
| |
| func (h *handler) Finding(f *govulncheck.Finding) error { |
| fs := h.findings[f.OSV] |
| if len(fs) == 0 { |
| fs = []*govulncheck.Finding{f} |
| } else { |
| if ms := moreSpecific(f, fs[0]); ms == -1 { |
| // The new finding is more specific, so we need |
| // to erase existing findings and add the new one. |
| fs = []*govulncheck.Finding{f} |
| } else if ms == 0 { |
| // The new finding is equal to an existing one and |
| // because of the invariant on h.findings, it is |
| // also equal to all existing ones. |
| fs = append(fs, f) |
| } |
| // Otherwise, the new finding is at a less precise level. |
| } |
| h.findings[f.OSV] = fs |
| return nil |
| } |
| |
| // Flush is used to print out to w the sarif json output. |
| // This is needed as sarif is not streamed. |
| func (h *handler) Flush() error { |
| sLog := toSarif(h) |
| s, err := json.MarshalIndent(sLog, "", " ") |
| if err != nil { |
| return err |
| } |
| h.w.Write(s) |
| return nil |
| } |
| |
| func toSarif(h *handler) Log { |
| cfg := h.cfg |
| r := Run{ |
| Tool: Tool{ |
| Driver: Driver{ |
| Name: cfg.ScannerName, |
| Version: cfg.ScannerVersion, |
| InformationURI: "https://pkg.go.dev/golang.org/x/vuln/cmd/govulncheck", |
| Properties: *cfg, |
| Rules: rules(h), |
| }, |
| }, |
| Results: results(h), |
| } |
| |
| return Log{ |
| Version: "2.1.0", |
| Schema: "https://json.schemastore.org/sarif-2.1.0.json", |
| Runs: []Run{r}, |
| } |
| } |
| |
| func rules(h *handler) []Rule { |
| var rs []Rule |
| for id := range h.findings { |
| osv := h.osvs[id] |
| // s is either summary if it exists, or details |
| // otherwise. Govulncheck text does the same. |
| s := osv.Summary |
| if s == "" { |
| s = osv.Details |
| } |
| rs = append(rs, Rule{ |
| ID: osv.ID, |
| ShortDescription: Description{Text: fmt.Sprintf("[%s] %s", osv.ID, s)}, |
| FullDescription: Description{Text: s}, |
| HelpURI: fmt.Sprintf("https://pkg.go.dev/vuln/%s", osv.ID), |
| Help: Description{Text: osv.Details}, |
| Properties: RuleTags{Tags: osv.Aliases}, |
| }) |
| } |
| sort.SliceStable(rs, func(i, j int) bool { return rs[i].ID < rs[j].ID }) |
| return rs |
| } |
| |
| func results(h *handler) []Result { |
| var results []Result |
| for osv, fs := range h.findings { |
| var locs []Location |
| if h.cfg.ScanMode != govulncheck.ScanModeBinary { |
| // Attach result to the go.mod file for source analysis. |
| // But there is no such place for binaries. |
| locs = []Location{{PhysicalLocation: PhysicalLocation{ |
| ArtifactLocation: ArtifactLocation{ |
| URI: "go.mod", |
| URIBaseID: SrcRootID, |
| }, |
| Region: Region{StartLine: 1}, // for now, point to the first line |
| }, |
| Message: Description{Text: fmt.Sprintf("Findings for vulnerability %s", osv)}, // not having a message here results in an invalid sarif |
| }} |
| } |
| |
| res := Result{ |
| RuleID: osv, |
| Level: level(fs[0], h.cfg), |
| Message: Description{Text: resultMessage(fs, h.cfg)}, |
| Stacks: stacks(h, fs), |
| CodeFlows: codeFlows(h, fs), |
| Locations: locs, |
| } |
| results = append(results, res) |
| } |
| sort.SliceStable(results, func(i, j int) bool { return results[i].RuleID < results[j].RuleID }) // for deterministic output |
| return results |
| } |
| |
| func resultMessage(findings []*govulncheck.Finding, cfg *govulncheck.Config) string { |
| // We can infer the findings' level by just looking at the |
| // top trace frame of any finding. |
| frame := findings[0].Trace[0] |
| uniqueElems := make(map[string]bool) |
| if frame.Function == "" && frame.Package == "" { // module level findings |
| for _, f := range findings { |
| uniqueElems[f.Trace[0].Module] = true |
| } |
| } else { // symbol and package level findings |
| for _, f := range findings { |
| uniqueElems[f.Trace[0].Package] = true |
| } |
| } |
| var elems []string |
| for e := range uniqueElems { |
| elems = append(elems, e) |
| } |
| sort.Strings(elems) |
| |
| l := len(elems) |
| elemList := list(elems) |
| main, addition := "", "" |
| const runCallAnalysis = "Run the call-level analysis to understand whether your code actually calls the vulnerabilities." |
| switch { |
| case frame.Function != "": |
| main = fmt.Sprintf("calls vulnerable functions in %d package%s (%s).", l, choose("", "s", l == 1), elemList) |
| case frame.Package != "": |
| main = fmt.Sprintf("imports %d vulnerable package%s (%s)", l, choose("", "s", l == 1), elemList) |
| addition = choose(", but doesn’t appear to call any of the vulnerable symbols.", ". "+runCallAnalysis, cfg.ScanLevel.WantSymbols()) |
| default: |
| main = fmt.Sprintf("depends on %d vulnerable module%s (%s)", l, choose("", "s", l == 1), elemList) |
| informational := ", but doesn't appear to " + choose("call", "import", cfg.ScanLevel.WantSymbols()) + " any of the vulnerable symbols." |
| addition = choose(informational, ". "+runCallAnalysis, cfg.ScanLevel.WantPackages()) |
| } |
| |
| return fmt.Sprintf("Your code %s%s", main, addition) |
| } |
| |
| const ( |
| errorLevel = "error" |
| warningLevel = "warning" |
| informationalLevel = "note" |
| ) |
| |
| func level(f *govulncheck.Finding, cfg *govulncheck.Config) string { |
| fr := f.Trace[0] |
| switch { |
| case cfg.ScanLevel.WantSymbols(): |
| if fr.Function != "" { |
| return errorLevel |
| } |
| if fr.Package != "" { |
| return warningLevel |
| } |
| return informationalLevel |
| case cfg.ScanLevel.WantPackages(): |
| if fr.Package != "" { |
| return errorLevel |
| } |
| return warningLevel |
| default: |
| return errorLevel |
| } |
| } |
| |
| func stacks(h *handler, fs []*govulncheck.Finding) []Stack { |
| if fs[0].Trace[0].Function == "" { // not call level findings |
| return nil |
| } |
| |
| var stacks []Stack |
| for _, f := range fs { |
| stacks = append(stacks, stack(h, f)) |
| } |
| // Sort stacks for deterministic output. We sort by message |
| // which is effectively sorting by full symbol name. The |
| // performance should not be an issue here. |
| sort.SliceStable(stacks, func(i, j int) bool { return stacks[i].Message.Text < stacks[j].Message.Text }) |
| return stacks |
| } |
| |
| // stack transforms call stack in f to a sarif stack. |
| func stack(h *handler, f *govulncheck.Finding) Stack { |
| trace := f.Trace |
| top := trace[len(trace)-1] // belongs to top level module |
| |
| var frames []Frame |
| for i := len(trace) - 1; i >= 0; i-- { // vulnerable symbol is at the top frame |
| frame := trace[i] |
| pos := govulncheck.Position{Line: 1, Column: 1} |
| if frame.Position != nil { |
| pos = *frame.Position |
| } |
| |
| sf := Frame{ |
| Module: frame.Module, |
| Location: Location{Message: Description{Text: symbol(frame)}}, // show the (full) symbol name |
| } |
| if h.cfg.ScanMode != govulncheck.ScanModeBinary { |
| sf.Location.PhysicalLocation = PhysicalLocation{ |
| ArtifactLocation: ArtifactLocation{ |
| URI: pos.Filename, |
| URIBaseID: uriID(top.Module, frame.Module), |
| }, |
| Region: Region{ |
| StartLine: pos.Line, |
| StartColumn: pos.Column, |
| }, |
| } |
| } |
| frames = append(frames, sf) |
| } |
| |
| return Stack{ |
| Frames: frames, |
| Message: Description{Text: fmt.Sprintf("A call stack for vulnerable function %s", symbol(trace[0]))}, |
| } |
| } |
| |
| func codeFlows(h *handler, fs []*govulncheck.Finding) []CodeFlow { |
| if fs[0].Trace[0].Function == "" { // not call level findings |
| return nil |
| } |
| |
| // group call stacks per symbol. There should |
| // be one call stack currently per symbol, but |
| // this might change in the future. |
| m := make(map[govulncheck.Frame][]*govulncheck.Finding) |
| for _, f := range fs { |
| // fr.Position is currently the position |
| // of the definition of the vuln symbol |
| fr := *f.Trace[0] |
| m[fr] = append(m[fr], f) |
| } |
| |
| var codeFlows []CodeFlow |
| for fr, fs := range m { |
| tfs := threadFlows(h, fs) |
| codeFlows = append(codeFlows, CodeFlow{ |
| ThreadFlows: tfs, |
| // TODO: should we instead show the message from govulncheck text output? |
| Message: Description{Text: fmt.Sprintf("A summarized code flow for vulnerable function %s", symbol(&fr))}, |
| }) |
| } |
| // Sort flows for deterministic output. We sort by message |
| // which is effectively sorting by full symbol name. The |
| // performance should not be an issue here. |
| sort.SliceStable(codeFlows, func(i, j int) bool { return codeFlows[i].Message.Text < codeFlows[j].Message.Text }) |
| return codeFlows |
| } |
| |
| func threadFlows(h *handler, fs []*govulncheck.Finding) []ThreadFlow { |
| var tfs []ThreadFlow |
| for _, f := range fs { |
| trace := traces.Compact(f) |
| top := trace[len(trace)-1] // belongs to top level module |
| |
| var tf []ThreadFlowLocation |
| for i := len(trace) - 1; i >= 0; i-- { // vulnerable symbol is at the top frame |
| // TODO: should we, similar to govulncheck text output, only |
| // mention three elements of the compact trace? |
| frame := trace[i] |
| pos := govulncheck.Position{Line: 1, Column: 1} |
| if frame.Position != nil { |
| pos = *frame.Position |
| } |
| |
| tfl := ThreadFlowLocation{ |
| Module: frame.Module, |
| Location: Location{Message: Description{Text: symbol(frame)}}, // show the (full) symbol name |
| } |
| if h.cfg.ScanMode != govulncheck.ScanModeBinary { |
| tfl.Location.PhysicalLocation = PhysicalLocation{ |
| ArtifactLocation: ArtifactLocation{ |
| URI: pos.Filename, |
| URIBaseID: uriID(top.Module, frame.Module), |
| }, |
| Region: Region{ |
| StartLine: pos.Line, |
| StartColumn: pos.Column, |
| }, |
| } |
| } |
| tf = append(tf, tfl) |
| } |
| tfs = append(tfs, ThreadFlow{Locations: tf}) |
| } |
| return tfs |
| } |
| |
| func uriID(top, module string) string { |
| if top == module { |
| return SrcRootID |
| } |
| if module == internal.GoStdModulePath { |
| return GoRootID |
| } |
| return GoModCacheID |
| } |