all: refactor to unify report creation from different sources

Add a new interface, report.Source, which represents a source
(e.g., a GHSA or CVE) that can be converted into a Report.

This is part of an effort to make report creation more uniform
across various sources, and collect together common logic.

Change-Id: If78bab9de143bca72b88ca7f98bd01a51d17ccae
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/579855
Commit-Queue: Tatiana Bradley <tatianabradley@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
diff --git a/cmd/vulnreport/create.go b/cmd/vulnreport/create.go
index 4a42ca5..8dabeca 100644
--- a/cmd/vulnreport/create.go
+++ b/cmd/vulnreport/create.go
@@ -14,7 +14,6 @@
 	"time"
 
 	"golang.org/x/vulndb/cmd/vulnreport/log"
-	"golang.org/x/vulndb/internal/cveclient"
 	"golang.org/x/vulndb/internal/cveschema5"
 	"golang.org/x/vulndb/internal/genai"
 	"golang.org/x/vulndb/internal/genericosv"
@@ -178,11 +177,8 @@
 		log.Infof("#%d (%q) has no CVE or GHSA IDs", iss.Number, iss.Title)
 	}
 
-	r, err = reportFromAliases(ctx, parsed.id, parsed.modulePath, parsed.aliases,
+	r = reportFromAliases(ctx, parsed.id, parsed.modulePath, parsed.aliases,
 		pc, gc, ac)
-	if err != nil {
-		return nil, err
-	}
 
 	if parsed.excluded != "" {
 		r = &report.Report{
@@ -200,20 +196,46 @@
 	return r, nil
 }
 
+func fetch(ctx context.Context, alias string, gc *ghsa.Client) report.Source {
+	var f report.Fetcher
+	switch {
+	case ghsa.IsGHSA(alias):
+		if *graphQL {
+			f = report.LegacyGHSAFetcher(gc)
+		} else {
+			f = genericosv.NewFetcher()
+		}
+	case cveschema5.IsCVE(alias):
+		f = report.CVE5Fetcher()
+	default:
+		log.Warnf("alias %s is not supported, creating basic report", alias)
+		return report.Original()
+	}
+
+	src, err := f.Fetch(ctx, alias)
+	if err != nil {
+		log.Warnf("could not fetch %s (err = %q), creating basic report", alias, err)
+		return report.Original()
+	}
+
+	return src
+}
+
 func reportFromAliases(ctx context.Context, id, modulePath string, aliases []string,
-	pc *proxy.Client, gc *ghsa.Client, ac *genai.GeminiClient) (r *report.Report, err error) {
+	pc *proxy.Client, gc *ghsa.Client, ac *genai.GeminiClient) *report.Report {
+
+	var src report.Source
 	aliases = allAliases(ctx, aliases, gc)
 	if alias, ok := pickBestAlias(aliases, *preferCVE); ok {
 		log.Infof("creating report %s based on %s (picked from [%s])", id, alias, strings.Join(aliases, ", "))
-		r, err = reportFromAlias(ctx, id, modulePath, alias, pc, gc)
-		if err != nil {
-			return nil, err
-		}
+		src = fetch(ctx, alias, gc)
 	} else {
-		log.Infof("no alias found, creating basic report for %s", id)
-		r = basicReport(id, modulePath)
+		log.Info("no alias found, creating basic report")
+		src = report.Original()
 	}
 
+	r := report.New(src, id, modulePath, pc)
+
 	// Ensure all source aliases are added to the report.
 	r.AddAliases(aliases)
 
@@ -243,12 +265,12 @@
 		}
 	}
 
-	if r.Source != nil {
+	if r.SourceMeta != nil {
 		now := time.Now()
-		r.Source.Created = &now
+		r.SourceMeta.Created = &now
 	}
 
-	return r, nil
+	return r
 }
 
 func parseGithubIssue(iss *issues.Issue, pc *proxy.Client, allowClosed bool) (*parsedIssue, error) {
@@ -302,54 +324,6 @@
 	excluded   report.ExcludedReason
 }
 
-// reportFromBestAlias returns a new report created from the "best" alias in the list.
-// For now, it prefers the first GHSA in the list, followed by the first CVE in the list
-// (if no GHSA is present). If no GHSAs or CVEs are present, it returns a new empty Report.
-func reportFromAlias(ctx context.Context, id, modulePath, alias string, pc *proxy.Client, gc *ghsa.Client) (*report.Report, error) {
-	switch {
-	case ghsa.IsGHSA(alias) && *graphQL:
-		ghsa, err := gc.FetchGHSA(ctx, alias)
-		if err != nil {
-			return nil, err
-		}
-		r := report.GHSAToReport(ghsa, modulePath, pc)
-		r.ID = id
-		return r, nil
-	case ghsa.IsGHSA(alias):
-		ghsa, err := genericosv.Fetch(alias)
-		if err != nil {
-			return nil, err
-		}
-		return ghsa.ToReport(id, pc), nil
-	case cveschema5.IsCVE(alias):
-		cve, err := cveclient.Fetch(alias)
-		if err != nil {
-			// If a CVE is not found, it is most likely a CVE we reserved but haven't
-			// published yet.
-			log.Infof("no published record found for %s, creating basic report", alias)
-			return basicReport(id, modulePath), nil
-		}
-		return report.CVE5ToReport(cve, id, modulePath, pc), nil
-	}
-
-	log.Infof("alias %s is not a CVE or GHSA, creating basic report", alias)
-	return basicReport(id, modulePath), nil
-}
-
-func basicReport(id, modulePath string) *report.Report {
-	return &report.Report{
-		ID: id,
-		Modules: []*report.Module{
-			{
-				Module: modulePath,
-			},
-		},
-		Source: &report.Source{
-			ID: report.SourceGoTeam,
-		},
-	}
-}
-
 const todo = "TODO: "
 
 // addTODOs adds "TODO" comments to unfilled fields of r.
diff --git a/cmd/vulnreport/unexclude.go b/cmd/vulnreport/unexclude.go
index cfb9f0b..185fc33 100644
--- a/cmd/vulnreport/unexclude.go
+++ b/cmd/vulnreport/unexclude.go
@@ -81,10 +81,7 @@
 	if len(r.Modules) > 0 {
 		modulePath = r.Modules[0].Module
 	}
-	newR, err := reportFromAliases(ctx, id, modulePath, aliases, u.pc, u.gc, u.ac)
-	if err != nil {
-		return err
-	}
+	newR := reportFromAliases(ctx, id, modulePath, aliases, u.pc, u.gc, u.ac)
 
 	// Remove description because this is a "basic" report.
 	newR.Description = ""
diff --git a/internal/genericosv/fetch.go b/internal/genericosv/fetch.go
index e445490..9e0703b 100644
--- a/internal/genericosv/fetch.go
+++ b/internal/genericosv/fetch.go
@@ -7,12 +7,14 @@
 package genericosv
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
 
 	"github.com/google/osv-scanner/pkg/models"
+	"golang.org/x/vulndb/internal/report"
 )
 
 // Entry is a a generic OSV entry, not specialized for Go.
@@ -20,10 +22,13 @@
 
 var EcosystemGo = models.EcosystemGo
 
+func NewFetcher() report.Fetcher {
+	return &client{http.DefaultClient, "https://api.osv.dev/v1"}
+}
+
 // Fetch returns the OSV entry from the osv.dev API for the
 // given ID.
-func Fetch(id string) (*Entry, error) {
-	c := &client{http.DefaultClient, "https://api.osv.dev/v1"}
+func (c *client) Fetch(_ context.Context, id string) (report.Source, error) {
 	return c.fetch(id)
 }
 
diff --git a/internal/genericosv/report.go b/internal/genericosv/report.go
index ba2307c..4bea906 100644
--- a/internal/genericosv/report.go
+++ b/internal/genericosv/report.go
@@ -22,13 +22,15 @@
 	"golang.org/x/vulndb/internal/version"
 )
 
+var _ report.Source = &Entry{}
+
 // ToReport converts OSV into a Go Report with the given ID.
-func (osv *Entry) ToReport(goID string, pc *proxy.Client) *report.Report {
+func (osv *Entry) ToReport(goID string, _ string, pc *proxy.Client) *report.Report {
 	r := &report.Report{
 		ID:          goID,
 		Summary:     report.Summary(osv.Summary),
 		Description: report.Description(osv.Details),
-		Source: &report.Source{
+		SourceMeta: &report.SourceMeta{
 			ID: osv.ID,
 		},
 	}
@@ -60,6 +62,10 @@
 	return r
 }
 
+func (osv *Entry) SourceID() string {
+	return osv.ID
+}
+
 func affectedToModules(as []osvschema.Affected, pc *proxy.Client) []*report.Module {
 	var modules []*report.Module
 	for _, a := range as {
diff --git a/internal/genericosv/report_test.go b/internal/genericosv/report_test.go
index 13122f4..864c67f 100644
--- a/internal/genericosv/report_test.go
+++ b/internal/genericosv/report_test.go
@@ -54,7 +54,8 @@
 				t.Fatal(err)
 			}
 
-			got := osv.ToReport("GO-TEST-ID", pc)
+			modulePath := "" // ignored by ToReport
+			got := osv.ToReport("GO-TEST-ID", modulePath, pc)
 			// Keep record of what lints would apply to each generated report.
 			got.LintAsNotes(pc)
 
diff --git a/internal/report/cve.go b/internal/report/cve.go
index 6f2050e..b7c37cb 100644
--- a/internal/report/cve.go
+++ b/internal/report/cve.go
@@ -82,7 +82,7 @@
 		Description: description,
 		Credits:     credits,
 		References:  refs,
-		Source: &Source{
+		SourceMeta: &SourceMeta{
 			ID: c.Metadata.ID,
 		},
 	}
@@ -145,7 +145,7 @@
 		Description: description,
 		Credits:     credits,
 		References:  refs,
-		Source: &Source{
+		SourceMeta: &SourceMeta{
 			ID: c.Metadata.ID,
 		},
 	}
diff --git a/internal/report/ghsa.go b/internal/report/ghsa.go
index 39d2068..c0d4099 100644
--- a/internal/report/ghsa.go
+++ b/internal/report/ghsa.go
@@ -17,7 +17,7 @@
 	r := &Report{
 		Summary:     Summary(sa.Summary),
 		Description: Description(sa.Description),
-		Source: &Source{
+		SourceMeta: &SourceMeta{
 			ID: sa.ID,
 		},
 	}
diff --git a/internal/report/ghsa_test.go b/internal/report/ghsa_test.go
index 03ef292..bcbf197 100644
--- a/internal/report/ghsa_test.go
+++ b/internal/report/ghsa_test.go
@@ -58,7 +58,7 @@
 				GHSAs:       []string{"G1"},
 				CVEs:        []string{"C1"},
 				References:  []*Reference{{Type: "REPORT", URL: "https://github.com/permalink/to/issue/12345"}},
-				Source:      &Source{ID: "G1_blah"},
+				SourceMeta:  &SourceMeta{ID: "G1_blah"},
 			},
 		},
 		{
@@ -79,7 +79,7 @@
 				GHSAs:       []string{"G1"},
 				CVEs:        []string{"C1"},
 				References:  []*Reference{{Type: "REPORT", URL: "https://github.com/permalink/to/issue/12345"}},
-				Source:      &Source{ID: "G1_blah"},
+				SourceMeta:  &SourceMeta{ID: "G1_blah"},
 			},
 		},
 	} {
diff --git a/internal/report/new.go b/internal/report/new.go
new file mode 100644
index 0000000..e8fa425
--- /dev/null
+++ b/internal/report/new.go
@@ -0,0 +1,148 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package report
+
+import (
+	"context"
+
+	"golang.org/x/vulndb/internal/cveclient"
+	"golang.org/x/vulndb/internal/cveschema"
+	"golang.org/x/vulndb/internal/cveschema5"
+	"golang.org/x/vulndb/internal/ghsa"
+	"golang.org/x/vulndb/internal/proxy"
+)
+
+// Source represents a vulnerability format (e.g., GHSA, CVE)
+// that can be converted to our Report format.
+type Source interface {
+	// SourceID returns the ID of the source.
+	// For example, the GHSA or CVE id.
+	SourceID() string
+	ToReport(goID string, modulePath string, pc *proxy.Client) *Report
+}
+
+func New(src Source, goID string, modulePath string, pc *proxy.Client) *Report {
+	return src.ToReport(goID, modulePath, pc)
+}
+
+type Fetcher interface {
+	Fetch(ctx context.Context, id string) (Source, error)
+}
+
+type cve5 struct {
+	*cveschema5.CVERecord
+}
+
+var _ Source = &cve5{}
+
+func (c *cve5) ToReport(goID, modulePath string, pc *proxy.Client) *Report {
+	return CVE5ToReport(c.CVERecord, goID, modulePath, pc)
+}
+
+func (c *cve5) SourceID() string {
+	return c.Metadata.ID
+}
+
+type cve5Fetcher struct{}
+
+func CVE5Fetcher() Fetcher {
+	return &cve5Fetcher{}
+}
+
+func (_ *cve5Fetcher) Fetch(ctx context.Context, id string) (Source, error) {
+	cve, err := cveclient.Fetch(id)
+	if err != nil {
+		return nil, err
+	}
+	return &cve5{CVERecord: cve}, nil
+}
+
+// cve4 is a wrapper for a CVE in CVE JSON 4.0 (legacy) format.
+//
+// Note: Fetch is not implemented for CVE4, as it is a legacy format
+// which will be phased out soon.
+type cve4 cveschema.CVE
+
+var _ Source = &cve4{}
+
+func (c *cve4) ToReport(goID, modulePath string, pc *proxy.Client) *Report {
+	cve := cveschema.CVE(*c)
+	return CVEToReport(&cve, goID, modulePath, pc)
+}
+
+func (c *cve4) SourceID() string {
+	return c.ID
+}
+
+// legacyGHSA is a wrapper for a GHSA in the format retrievable
+// via the Github GraphQL API.
+//
+// We are planning to phase this out in favor of the Github OSV format,
+// but some of our processes still rely on this format.
+type legacyGHSA struct {
+	*ghsa.SecurityAdvisory
+}
+
+var _ Source = &legacyGHSA{}
+
+func (g *legacyGHSA) ToReport(goID, modulePath string, pc *proxy.Client) *Report {
+	r := GHSAToReport(g.SecurityAdvisory, modulePath, pc)
+	r.ID = goID
+	return r
+}
+
+func (g *legacyGHSA) SourceID() string {
+	return g.ID
+}
+
+type legacyGHSAFetcher struct {
+	*ghsa.Client
+}
+
+func LegacyGHSAFetcher(c *ghsa.Client) Fetcher {
+	return &legacyGHSAFetcher{
+		Client: c,
+	}
+}
+
+func (g *legacyGHSAFetcher) Fetch(ctx context.Context, id string) (Source, error) {
+	fetched, err := g.FetchGHSA(ctx, id)
+	if err != nil {
+		return nil, err
+	}
+	return &legacyGHSA{
+		SecurityAdvisory: fetched,
+	}, err
+}
+
+// original represents an original report created from scratch by the Go Security Team.
+//
+// This is used for standard library & toolchain reports, or in cases where the
+// source report cannot be retrieved automatically.
+type original struct{}
+
+var _ Source = &original{}
+
+func Original() Source {
+	return &original{}
+}
+
+func (original) ToReport(goID, modulePath string, _ *proxy.Client) *Report {
+	return &Report{
+		ID: goID,
+		Modules: []*Module{
+			{
+				Module: modulePath,
+			},
+		},
+		SourceMeta: &SourceMeta{
+			ID: sourceGoTeam,
+		},
+	}
+}
+
+func (original) SourceID() string {
+	return sourceGoTeam
+}
diff --git a/internal/report/report.go b/internal/report/report.go
index d553e4f..3e99fc4 100644
--- a/internal/report/report.go
+++ b/internal/report/report.go
@@ -241,12 +241,12 @@
 
 	// Metadata about how this report was generated.
 	// Not published to OSV.
-	Source *Source `yaml:",omitempty"`
+	SourceMeta *SourceMeta `yaml:"source,omitempty"`
 }
 
-const SourceGoTeam = "go-security-team"
+const sourceGoTeam = "go-security-team"
 
-type Source struct {
+type SourceMeta struct {
 	// The ID (GHSA or CVE) of the original source of this report.
 	// If created by a human, this is "go-security-team".
 	ID string `yaml:",omitempty"`