internal/cvelistrepo: use real CVE data for cvelistrepo tests

Creates the infrastructure to pull real, up-to-date data from the
cvelist for use in tests.

Updates the existing tests to rely on this data instead of the
(very) stale data previously used.

For golang/go#49289

Change-Id: I31d61d932875e628e8c439cc0ef1dee5d1ccf92c
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/545298
Reviewed-by: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/cvelistrepo/cvelistrepo.go b/internal/cvelistrepo/cvelistrepo.go
index fa979a3..0530fa8 100644
--- a/internal/cvelistrepo/cvelistrepo.go
+++ b/internal/cvelistrepo/cvelistrepo.go
@@ -152,3 +152,11 @@
 	}
 	return cve, nil
 }
+
+func (f *File) ReadAll(repo *git.Repository) ([]byte, error) {
+	r, err := blobReader(repo, f.BlobHash)
+	if err != nil {
+		return nil, err
+	}
+	return io.ReadAll(r)
+}
diff --git a/internal/cvelistrepo/cvelistrepo_test.go b/internal/cvelistrepo/cvelistrepo_test.go
index b479918..e9e7bb7 100644
--- a/internal/cvelistrepo/cvelistrepo_test.go
+++ b/internal/cvelistrepo/cvelistrepo_test.go
@@ -5,22 +5,116 @@
 package cvelistrepo
 
 import (
+	"context"
+	"flag"
+	"fmt"
+	"os"
+	"path"
 	"testing"
 	"time"
 
 	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing"
 	"github.com/go-git/go-git/v5/plumbing/object"
 	"github.com/google/go-cmp/cmp"
 	"github.com/google/go-cmp/cmp/cmpopts"
+	"golang.org/x/tools/txtar"
+	"golang.org/x/vulndb/internal/cveschema5"
 	"golang.org/x/vulndb/internal/gitrepo"
+	"golang.org/x/vulndb/internal/test"
 )
 
-func TestRepoCVEFiles(t *testing.T) {
-	repo, err := gitrepo.ReadTxtarRepo("testdata/basic.txtar", time.Now())
+var update = flag.Bool("update", false, "update the .txtar files with real CVE data (this takes a while)")
+
+var (
+	v4txtar = "testdata/v4.txtar"
+	cveIDs  = []string{
+		"CVE-2021-0001",
+		"CVE-2021-0010",
+		"CVE-2021-1384",
+		"CVE-2020-9283",
+		"CVE-2022-39213",
+	}
+)
+
+func TestMain(m *testing.M) {
+	flag.Parse()
+	if *update {
+		ctx := context.Background()
+		if err := updateTxtar(ctx, v4txtar, URL, plumbing.HEAD, cveIDs); err != nil {
+			fail(err)
+		}
+	}
+	os.Exit(m.Run())
+}
+
+func fail(err error) {
+	fmt.Fprintln(os.Stderr, err)
+	os.Exit(1)
+}
+
+func updateTxtar(ctx context.Context, txtarFile, url string, ref plumbing.ReferenceName, cveIDs []string) error {
+	repo, err := gitrepo.CloneAt(ctx, url, ref)
+	if err != nil {
+		return err
+	}
+
+	commit, err := headCommit(repo)
+	if err != nil {
+		return err
+	}
+
+	files, err := Files(repo, commit)
+	if err != nil {
+		return err
+	}
+
+	idToFile := make(map[string]*File)
+	for _, f := range files {
+		f := f
+		id := cveschema5.FindCVE(f.Filename)
+		if id != "" {
+			if _, ok := idToFile[id]; ok {
+				return fmt.Errorf("found duplicate record files for %s", id)
+			}
+			idToFile[id] = &f
+		}
+	}
+
+	arFiles := make([]txtar.File, 0, len(cveIDs))
+	arFiles = append(arFiles, txtar.File{
+		Name: "README.md",
+		Data: []byte("ignore me please\n\n"),
+	})
+
+	for _, cveID := range cveIDs {
+		f, ok := idToFile[cveID]
+		if !ok {
+			return fmt.Errorf("could not update %s based on %q: no file for %s found", txtarFile, url, cveID)
+		}
+
+		b, err := f.ReadAll(repo)
+		if err != nil {
+			return err
+		}
+
+		arFiles = append(arFiles, txtar.File{
+			Name: path.Join(f.DirPath, f.Filename),
+			Data: b,
+		})
+	}
+
+	return test.WriteTxtar(txtarFile, arFiles,
+		fmt.Sprintf("Repo in the shape of %q.\nUpdated with real data %s.\nAuto-generated; do not edit directly.",
+			url, time.Now().Truncate(24*time.Hour).Format(time.RFC3339)))
+}
+
+func TestFiles(t *testing.T) {
+	repo, err := gitrepo.ReadTxtarRepo(v4txtar, time.Now())
 	if err != nil {
 		t.Fatal(err)
 	}
-	commit := headCommit(t, repo)
+	commit, err := headCommit(repo)
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -45,14 +139,14 @@
 }
 
 // headCommit returns the commit at the repo HEAD.
-func headCommit(t *testing.T, repo *git.Repository) *object.Commit {
+func headCommit(repo *git.Repository) (*object.Commit, error) {
 	h, err := gitrepo.HeadHash(repo)
 	if err != nil {
-		t.Fatal(err)
+		return nil, err
 	}
 	commit, err := repo.CommitObject(h)
 	if err != nil {
-		t.Fatal(err)
+		return nil, err
 	}
-	return commit
+	return commit, nil
 }
diff --git a/internal/cvelistrepo/testdata/NOTICE b/internal/cvelistrepo/testdata/NOTICE
new file mode 100644
index 0000000..4d9daed
--- /dev/null
+++ b/internal/cvelistrepo/testdata/NOTICE
@@ -0,0 +1,6 @@
+# NOTICE
+
+The `testdata` folder contains unmodified data from
+[CVE List](https://github.com/CVEProject/cvelist),
+and [CVE List V5](https://github.com/CVEProject/cvelistV5),
+licensed under [CVE license](https://www.cve.org/Legal/TermsOfUse).
diff --git a/internal/cvelistrepo/testdata/basic.txtar b/internal/cvelistrepo/testdata/v4.txtar
similarity index 65%
rename from internal/cvelistrepo/testdata/basic.txtar
rename to internal/cvelistrepo/testdata/v4.txtar
index 27e4439..a97c237 100644
--- a/internal/cvelistrepo/testdata/basic.txtar
+++ b/internal/cvelistrepo/testdata/v4.txtar
@@ -1,5 +1,10 @@
-Repo in the shape of github.com/CVEProject/cvelist, with
-some actual data.
+Copyright 2023 The Go Authors. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+
+Repo in the shape of "https://github.com/CVEProject/cvelist".
+Updated with real data 2023-11-20T19:00:00-05:00.
+Auto-generated; do not edit directly.
 
 -- README.md --
 ignore me please
@@ -55,11 +60,6 @@
                 "refsource": "MISC",
                 "name": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html",
                 "url": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html"
-            },
-            {
-                "refsource": "*added for testing*",
-                "name": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html",
-                "url": "https://golang.org/x/mod"
             }
         ]
     },
@@ -80,43 +80,107 @@
     "CVE_data_meta": {
         "ID": "CVE-2021-0010",
         "ASSIGNER": "cve@mitre.org",
-        "STATE": "RESERVED"
+        "STATE": "REJECT"
     },
     "description": {
         "description_data": [
             {
                 "lang": "eng",
-                "value": "** RESERVED ** This candidate has been reserved by an organization or individual that will use it when announcing a new security problem. When the candidate has been publicized, the details for this candidate will be provided."
+                "value": "** REJECT ** DO NOT USE THIS CANDIDATE NUMBER. ConsultIDs: none. Reason: This candidate was in a CNA pool that was not assigned to any issues during 2021. Notes: none."
             }
         ]
     }
 }
 -- 2021/1xxx/CVE-2021-1384.json --
 {
-    "data_type": "CVE",
-    "data_format": "MITRE",
-    "data_version": "4.0",
     "CVE_data_meta": {
+        "ASSIGNER": "psirt@cisco.com",
+        "DATE_PUBLIC": "2021-03-24T16:00:00",
         "ID": "CVE-2021-1384",
-        "ASSIGNER": "cve@mitre.org",
-        "STATE": "REJECT"
+        "STATE": "PUBLIC",
+        "TITLE": "Cisco IOx for IOS XE Software Command Injection Vulnerability"
     },
-    "references": {
-        "reference_data": [
-            {
-                "refsource": "*added for testing*",
-                "name": "https://www.intel.com/content/www/us/en/security-center/advisory/intel-sa-00477.html",
-                "url": "https://golang.org/x/sync"
-            }
-        ]
+    "affects": {
+        "vendor": {
+            "vendor_data": [
+                {
+                    "product": {
+                        "product_data": [
+                            {
+                                "product_name": "Cisco IOS XE Software ",
+                                "version": {
+                                    "version_data": [
+                                        {
+                                            "version_value": "n/a"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "vendor_name": "Cisco"
+                }
+            ]
+        }
     },
+    "data_format": "MITRE",
+    "data_type": "CVE",
+    "data_version": "4.0",
     "description": {
         "description_data": [
             {
                 "lang": "eng",
-                "value": "** REJECT ** DO NOT USE THIS CANDIDATE NUMBER. ConsultIDs: none. Reason: This candidate was withdrawn by its CNA. Further investigation showed that it was not a security issue. Notes: none."
+                "value": "A vulnerability in Cisco IOx application hosting environment of Cisco IOS XE Software could allow an authenticated, remote attacker to inject commands into the underlying operating system as the root user. This vulnerability is due to incomplete validation of fields in the application packages loaded onto IOx. An attacker could exploit this vulnerability by creating a crafted application .tar file and loading it onto the device. A successful exploit could allow the attacker to perform command injection into the underlying operating system as the root user."
             }
         ]
+    },
+    "exploit": [
+        {
+            "lang": "eng",
+            "value": "The Cisco Product Security Incident Response Team (PSIRT) is not aware of any public announcements or malicious use of the vulnerability that is described in this advisory. "
+        }
+    ],
+    "impact": {
+        "cvss": {
+            "baseScore": "6.5",
+            "vectorString": "CVSS:3.1/AV:N/AC:L/PR:H/UI:N/S:U/C:H/I:H/A:N ",
+            "version": "3.0"
+        }
+    },
+    "problemtype": {
+        "problemtype_data": [
+            {
+                "description": [
+                    {
+                        "lang": "eng",
+                        "value": "CWE-77"
+                    }
+                ]
+            }
+        ]
+    },
+    "references": {
+        "reference_data": [
+            {
+                "name": "20210324 Cisco IOx for IOS XE Software Command Injection Vulnerability",
+                "refsource": "CISCO",
+                "url": "https://tools.cisco.com/security/center/content/CiscoSecurityAdvisory/cisco-sa-iox-cmdinj-RkSURGHG"
+            },
+            {
+                "refsource": "MISC",
+                "name": "https://github.com/orangecertcc/security-research/security/advisories/GHSA-h332-fj6p-2232",
+                "url": "https://github.com/orangecertcc/security-research/security/advisories/GHSA-h332-fj6p-2232"
+            }
+        ]
+    },
+    "source": {
+        "advisory": "cisco-sa-iox-cmdinj-RkSURGHG",
+        "defect": [
+            [
+                "CSCvw64798"
+            ]
+        ],
+        "discovery": "INTERNAL"
     }
 }
 -- 2020/9xxx/CVE-2020-9283.json --
@@ -180,9 +244,29 @@
                 "url": "https://groups.google.com/forum/#!topic/golang-announce/3L45YRc91SY"
             },
             {
-                "refsource": "*added for testing*",
-                "name": "https://groups.google.com/forum/#!topic/golang-announce/3L45YRc91SY",
-                "url": "https://golang.org/x/crypto"
+                "refsource": "MISC",
+                "name": "http://packetstormsecurity.com/files/156480/Go-SSH-0.0.2-Denial-Of-Service.html",
+                "url": "http://packetstormsecurity.com/files/156480/Go-SSH-0.0.2-Denial-Of-Service.html"
+            },
+            {
+                "refsource": "MLIST",
+                "name": "[debian-lts-announce] 20201007 [SECURITY] [DLA 2402-1] golang-go.crypto security update",
+                "url": "https://lists.debian.org/debian-lts-announce/2020/10/msg00014.html"
+            },
+            {
+                "refsource": "MLIST",
+                "name": "[debian-lts-announce] 20201116 [SECURITY] [DLA 2453-1] restic security update",
+                "url": "https://lists.debian.org/debian-lts-announce/2020/11/msg00027.html"
+            },
+            {
+                "refsource": "MLIST",
+                "name": "[debian-lts-announce] 20201118 [SECURITY] [DLA 2455-1] packer security update",
+                "url": "https://lists.debian.org/debian-lts-announce/2020/11/msg00031.html"
+            },
+            {
+                "refsource": "MLIST",
+                "name": "[debian-lts-announce] 20230616 [SECURITY] [DLA 3455-1] golang-go.crypto security update",
+                "url": "https://lists.debian.org/debian-lts-announce/2023/06/msg00017.html"
             }
         ]
     }
@@ -273,11 +357,6 @@
                 "name": "https://github.com/pandatix/go-cvss/blob/master/SECURITY.md",
                 "refsource": "MISC",
                 "url": "https://github.com/pandatix/go-cvss/blob/master/SECURITY.md"
-            },
-            {
-                "name": "https://bitbucket.org/foo/bar/baz",
-                "refsource": "*added for testing*",
-                "url": "https://bitbucket.org/foo/bar/baz"
             }
         ]
     },