internal/worker: check against vuln DB

Don't mark a CVE as needing an issue if it is already
in the Go vuln DB.

Change-Id: I1857de4d9aa289f445c5e35f818ed74504720f4a
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/368857
Trust: Jonathan Amsterdam <jba@google.com>
Run-TryBot: Jonathan Amsterdam <jba@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Julie Qiu <julie@golang.org>
diff --git a/internal/worker/testdata/basic.txtar b/internal/worker/testdata/basic.txtar
index ea782fa..5832def 100644
--- a/internal/worker/testdata/basic.txtar
+++ b/internal/worker/testdata/basic.txtar
@@ -55,6 +55,11 @@
                 "refsource": "MISC",
                 "name": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html",
                 "url": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html"
+            },
+            {
+                "refsource": "*added for testing*",
+                "name": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html",
+                "url": "https://golang.org/x/mod"
             }
         ]
     },
@@ -96,6 +101,15 @@
         "ASSIGNER": "cve@mitre.org",
         "STATE": "REJECT"
     },
+    "references": {
+        "reference_data": [
+            {
+                "refsource": "*added for testing*",
+                "name": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html",
+                "url": "https://golang.org/x/sync"
+            }
+        ]
+    },
     "description": {
         "description_data": [
             {
@@ -105,4 +119,72 @@
         ]
     }
 }
+-- 2020/9xxx/CVE-2020-9283.json --
+{
+    "CVE_data_meta": {
+        "ASSIGNER": "cve@mitre.org",
+        "ID": "CVE-2020-9283",
+        "STATE": "PUBLIC"
+    },
+    "affects": {
+        "vendor": {
+            "vendor_data": [
+                {
+                    "product": {
+                        "product_data": [
+                            {
+                                "product_name": "n/a",
+                                "version": {
+                                    "version_data": [
+                                        {
+                                            "version_value": "n/a"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "vendor_name": "n/a"
+                }
+            ]
+        }
+    },
+    "data_format": "MITRE",
+    "data_type": "CVE",
+    "data_version": "4.0",
+    "description": {
+        "description_data": [
+            {
+                "lang": "eng",
+                "value": "golang.org/x/crypto before v0.0.0-20200220183623-bac4c82f6975 for Go allows a panic during signature verification in the golang.org/x/crypto/ssh package. A client can attack an SSH server that accepts public keys. Also, a server can attack any SSH client."
+            }
+        ]
+    },
+    "problemtype": {
+        "problemtype_data": [
+            {
+                "description": [
+                    {
+                        "lang": "eng",
+                        "value": "n/a"
+                    }
+                ]
+            }
+        ]
+    },
+    "references": {
+        "reference_data": [
+            {
+                "refsource": "CONFIRM",
+                "name": "https://groups.google.com/forum/#!topic/golang-announce/3L45YRc91SY",
+                "url": "https://groups.google.com/forum/#!topic/golang-announce/3L45YRc91SY"
+            },
+            {
+                "refsource": "*added for testing*",
+                "name": "https://groups.google.com/forum/#!topic/golang-announce/3L45YRc91SY",
+                "url": "https://golang.org/x/crypto"
+            }
+        ]
+    }
+}
 
diff --git a/internal/worker/triage_test.go b/internal/worker/triage_test.go
index 8e3e032..3f4d7b4 100644
--- a/internal/worker/triage_test.go
+++ b/internal/worker/triage_test.go
@@ -22,20 +22,7 @@
 
 	const validModule = "golang.org/x/mod"
 
-	var url string
-
-	if *usePkgsite {
-		url = "https://pkg.go.dev"
-	} else {
-		s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-			modulePath := strings.TrimPrefix(r.URL.Path, "/mod/")
-			if modulePath != validModule {
-				http.Error(w, "unknown", http.StatusNotFound)
-			}
-		}))
-		defer s.Close()
-		url = s.URL
-	}
+	url := pkgsiteURL(t)
 
 	for _, test := range []struct {
 		in   string
@@ -55,3 +42,20 @@
 		})
 	}
 }
+
+// pkgsiteURL returns a URL to either a fake server or the real pkg.go.dev,
+// depending on the usePkgsite flag.
+func pkgsiteURL(t *testing.T) string {
+	if *usePkgsite {
+		return "https://pkg.go.dev"
+	}
+	// Start a test server that recognizes anything from golang.org.
+	s := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		modulePath := strings.TrimPrefix(r.URL.Path, "/mod/")
+		if !strings.HasPrefix(modulePath, "golang.org/") {
+			http.Error(w, "unknown", http.StatusNotFound)
+		}
+	}))
+	t.Cleanup(s.Close)
+	return s.URL
+}
diff --git a/internal/worker/update.go b/internal/worker/update.go
index e2d87db..07e8640 100644
--- a/internal/worker/update.go
+++ b/internal/worker/update.go
@@ -33,7 +33,7 @@
 // of the DB and updates the DB to match.
 //
 // needsIssue determines whether a CVE needs an issue to be filed for it.
-func doUpdate(ctx context.Context, repo *git.Repository, commitHash plumbing.Hash, st store.Store, needsIssue triageFunc) (ur *store.CommitUpdateRecord, err error) {
+func doUpdate(ctx context.Context, repo *git.Repository, commitHash plumbing.Hash, st store.Store, knownVulnIDs []string, needsIssue triageFunc) (ur *store.CommitUpdateRecord, err error) {
 	// We want the action of reading the old DB record, updating it and
 	// writing it back to be atomic. It would be too expensive to do that one
 	// record at a time. Ideally we'd process the whole repo commit in one
@@ -76,6 +76,12 @@
 		return nil, err
 	}
 
+	// Put the known vuln CVE IDs into a map.
+	known := map[string]bool{}
+	for _, k := range knownVulnIDs {
+		known[k] = true
+	}
+
 	// Create a new CommitUpdateRecord to describe this run of doUpdate.
 	ur = &store.CommitUpdateRecord{
 		StartedAt:  time.Now(),
@@ -88,7 +94,7 @@
 	}
 
 	for _, dirFiles := range filesByDir {
-		numProc, numAdds, numMods, err := updateDirectory(ctx, dirFiles, st, repo, commitHash, needsIssue)
+		numProc, numAdds, numMods, err := updateDirectory(ctx, dirFiles, st, repo, commitHash, known, needsIssue)
 		// Change the CommitUpdateRecord in the Store to reflect the results of the directory update.
 		if err != nil {
 			ur.Error = err.Error()
@@ -107,7 +113,7 @@
 	return ur, st.SetCommitUpdateRecord(ctx, ur)
 }
 
-func updateDirectory(ctx context.Context, dirFiles []repoFile, st store.Store, repo *git.Repository, commitHash plumbing.Hash, needsIssue triageFunc) (numProc, numAdds, numMods int, err error) {
+func updateDirectory(ctx context.Context, dirFiles []repoFile, st store.Store, repo *git.Repository, commitHash plumbing.Hash, knownIDs map[string]bool, needsIssue triageFunc) (numProc, numAdds, numMods int, err error) {
 	dirPath := dirFiles[0].dirPath
 	dirHash := dirFiles[0].treeHash.String()
 
@@ -140,7 +146,7 @@
 		if j > len(dirFiles) {
 			j = len(dirFiles)
 		}
-		numBatchAdds, numBatchMods, err := updateBatch(ctx, dirFiles[i:j], st, repo, commitHash, needsIssue)
+		numBatchAdds, numBatchMods, err := updateBatch(ctx, dirFiles[i:j], st, repo, commitHash, knownIDs, needsIssue)
 		if err != nil {
 			return 0, 0, 0, err
 		}
@@ -158,7 +164,7 @@
 	return numProc, numAdds, numMods, nil
 }
 
-func updateBatch(ctx context.Context, batch []repoFile, st store.Store, repo *git.Repository, commitHash plumbing.Hash, needsIssue triageFunc) (numAdds, numMods int, err error) {
+func updateBatch(ctx context.Context, batch []repoFile, st store.Store, repo *git.Repository, commitHash plumbing.Hash, knownIDs map[string]bool, needsIssue triageFunc) (numAdds, numMods int, err error) {
 	startID := idFromFilename(batch[0].filename)
 	endID := idFromFilename(batch[len(batch)-1].filename)
 	defer derrors.Wrap(&err, "updateBatch(%s-%s)", startID, endID)
@@ -186,7 +192,7 @@
 				// No change; do nothing.
 				continue
 			}
-			added, err := handleCVE(repo, f, old, commitHash, needsIssue, tx)
+			added, err := handleCVE(repo, f, old, commitHash, knownIDs, needsIssue, tx)
 			if err != nil {
 				return err
 			}
@@ -212,7 +218,7 @@
 // handleCVE determines how to change the store for a single CVE.
 // The CVE will definitely be either added, if it's new, or modified, if it's
 // already in the DB.
-func handleCVE(repo *git.Repository, f repoFile, old *store.CVERecord, commitHash plumbing.Hash, needsIssue triageFunc, tx store.Transaction) (added bool, err error) {
+func handleCVE(repo *git.Repository, f repoFile, old *store.CVERecord, commitHash plumbing.Hash, knownIDs map[string]bool, needsIssue triageFunc, tx store.Transaction) (added bool, err error) {
 	defer derrors.Wrap(&err, "handleCVE(%s)", f.filename)
 
 	// Read CVE from repo.
@@ -226,7 +232,7 @@
 		return false, err
 	}
 	needs := false
-	if cve.State == cveschema.StatePublic {
+	if cve.State == cveschema.StatePublic && !knownIDs[cve.ID] {
 		needs, err = needsIssue(cve)
 		if err != nil {
 			return false, err
diff --git a/internal/worker/update_test.go b/internal/worker/update_test.go
index 5e7f517..b320e82 100644
--- a/internal/worker/update_test.go
+++ b/internal/worker/update_test.go
@@ -7,7 +7,6 @@
 import (
 	"context"
 	"encoding/json"
-	"strings"
 	"testing"
 	"time"
 
@@ -35,6 +34,7 @@
 	}
 
 	want := []repoFile{
+		{dirPath: "2020/9xxx", filename: "CVE-2020-9283.json", year: 2020, number: 9283},
 		{dirPath: "2021/0xxx", filename: "CVE-2021-0001.json", year: 2021, number: 1},
 		{dirPath: "2021/0xxx", filename: "CVE-2021-0010.json", year: 2021, number: 10},
 		{dirPath: "2021/1xxx", filename: "CVE-2021-1384.json", year: 2021, number: 1384},
@@ -56,8 +56,10 @@
 	if err != nil {
 		t.Fatal(err)
 	}
+
+	purl := pkgsiteURL(t)
 	needsIssue := func(cve *cveschema.CVE) (bool, error) {
-		return strings.HasSuffix(cve.ID, "0001"), nil
+		return TriageCVE(ctx, cve, purl)
 	}
 
 	ref, err := repo.Reference(plumbing.HEAD, true)
@@ -66,43 +68,41 @@
 	}
 
 	commitHash := ref.Hash().String()
-	const (
-		path1 = "2021/0xxx/CVE-2021-0001.json"
-		path2 = "2021/0xxx/CVE-2021-0010.json"
-		path3 = "2021/1xxx/CVE-2021-1384.json"
-	)
-	cve1, bh1 := readCVE(t, repo, path1)
-	cve2, bh2 := readCVE(t, repo, path2)
-	cve3, bh3 := readCVE(t, repo, path3)
+	knownVulns := []string{"CVE-2020-9283"}
 
-	// CVERecords after the above CVEs are added to an empty DB.
-	rs := []*store.CVERecord{
-		{
-			ID:          cve1.ID,
-			CVEState:    cve1.State,
-			Path:        path1,
-			BlobHash:    bh1,
-			CommitHash:  commitHash,
-			TriageState: store.TriageStateNeedsIssue, // a public CVE, needsIssue returns true
-		},
-		{
-			ID:          cve2.ID,
-			CVEState:    cve2.State,
-			Path:        path2,
-			BlobHash:    bh2,
-			CommitHash:  commitHash,
-			TriageState: store.TriageStateNoActionNeeded, // state is reserved
-		},
-		{
-			ID:          cve3.ID,
-			CVEState:    cve3.State,
-			Path:        path3,
-			BlobHash:    bh3,
-			CommitHash:  commitHash,
-			TriageState: store.TriageStateNoActionNeeded, // state is rejected
-		},
+	paths := []string{
+		"2021/0xxx/CVE-2021-0001.json",
+		"2021/0xxx/CVE-2021-0010.json",
+		"2021/1xxx/CVE-2021-1384.json",
+		"2020/9xxx/CVE-2020-9283.json",
 	}
 
+	var (
+		cves       []*cveschema.CVE
+		blobHashes []string
+	)
+	for _, p := range paths {
+		cve, bh := readCVE(t, repo, p)
+		cves = append(cves, cve)
+		blobHashes = append(blobHashes, bh)
+	}
+	// CVERecords after the above CVEs are added to an empty DB.
+	var rs []*store.CVERecord
+	for i := 0; i < len(cves); i++ {
+		r := &store.CVERecord{
+			ID:         cves[i].ID,
+			CVEState:   cves[i].State,
+			Path:       paths[i],
+			BlobHash:   blobHashes[i],
+			CommitHash: commitHash,
+		}
+		rs = append(rs, r)
+	}
+	rs[0].TriageState = store.TriageStateNeedsIssue     // a public CVE, has a golang.org path
+	rs[1].TriageState = store.TriageStateNoActionNeeded // state is reserved
+	rs[2].TriageState = store.TriageStateNoActionNeeded // state is rejected
+	rs[3].TriageState = store.TriageStateNoActionNeeded // CVE is in the Go vuln DB
+
 	// withTriageState returns a copy of r with the TriageState field changed to ts.
 	withTriageState := func(r *store.CVERecord, ts store.TriageState) *store.CVERecord {
 		c := *r
@@ -160,13 +160,14 @@
 					return &c
 				}(),
 				rs[2],
+				rs[3],
 			},
 		},
 	} {
 		t.Run(test.name, func(t *testing.T) {
 			mstore := store.NewMemStore()
 			createCVERecords(t, mstore, test.cur)
-			if _, err := doUpdate(ctx, repo, h, mstore, needsIssue); err != nil {
+			if _, err := doUpdate(ctx, repo, h, mstore, knownVulns, needsIssue); err != nil {
 				t.Fatal(err)
 			}
 			got := mstore.CVERecords()
@@ -243,7 +244,7 @@
 	c := headCommit(t, repo)
 	file, err := c.File(path)
 	if err != nil {
-		t.Fatal(err)
+		t.Fatalf("%s: %v", path, err)
 	}
 	var cve cveschema.CVE
 	r, err := file.Reader()
@@ -251,7 +252,7 @@
 		t.Fatal(err)
 	}
 	if err := json.NewDecoder(r).Decode(&cve); err != nil {
-		t.Fatal(err)
+		t.Fatalf("%s: %v", path, err)
 	}
 	return &cve, file.Hash.String()
 }
diff --git a/internal/worker/worker.go b/internal/worker/worker.go
index 6042787..ea72716 100644
--- a/internal/worker/worker.go
+++ b/internal/worker/worker.go
@@ -10,10 +10,13 @@
 import (
 	"context"
 	"fmt"
+	"sync"
 	"time"
 
 	"github.com/go-git/go-git/v5"
 	"github.com/go-git/go-git/v5/plumbing"
+	"golang.org/x/sync/errgroup"
+	vulnc "golang.org/x/vuln/client"
 	"golang.org/x/vuln/internal/cveschema"
 	"golang.org/x/vuln/internal/derrors"
 	"golang.org/x/vuln/internal/gitrepo"
@@ -35,7 +38,11 @@
 			return err
 		}
 	}
-	_, err = doUpdate(ctx, repo, ch, st, func(cve *cveschema.CVE) (bool, error) {
+	knownVulnIDs, err := readVulnDB(ctx)
+	if err != nil {
+		return err
+	}
+	_, err = doUpdate(ctx, repo, ch, st, knownVulnIDs, func(cve *cveschema.CVE) (bool, error) {
 		return TriageCVE(ctx, cve, pkgsiteURL)
 	})
 	return err
@@ -87,3 +94,46 @@
 func (c *CheckUpdateError) Error() string {
 	return c.msg
 }
+
+const vulnDBURL = "https://storage.googleapis.com/go-vulndb"
+
+// readVulnDB returns a list of all CVE IDs in the Go vuln DB.
+func readVulnDB(ctx context.Context) ([]string, error) {
+	const concurrency = 4
+
+	client, err := vulnc.NewClient([]string{vulnDBURL}, vulnc.Options{})
+	if err != nil {
+		return nil, err
+	}
+
+	goIDs, err := client.ListIDs(ctx)
+	if err != nil {
+		return nil, err
+	}
+	var (
+		mu     sync.Mutex
+		cveIDs []string
+	)
+	sem := make(chan struct{}, concurrency)
+	g, ctx := errgroup.WithContext(ctx)
+	for _, id := range goIDs {
+		id := id
+		sem <- struct{}{}
+		g.Go(func() error {
+			defer func() { <-sem }()
+			e, err := client.GetByID(ctx, id)
+			if err != nil {
+				return err
+			}
+			// Assume all the aliases are CVE IDs.
+			mu.Lock()
+			cveIDs = append(cveIDs, e.Aliases...)
+			mu.Unlock()
+			return nil
+		})
+	}
+	if err := g.Wait(); err != nil {
+		return nil, err
+	}
+	return cveIDs, nil
+}