internal/report: add basic logic to convert cve5 to report

Add function to convert CVE JSON 5.0 to our internal YAML format.

As a starting point, this function does the same thing as the old function
that converts from CVE JSON 4 to our format. A temporary test checks
that this is the case.

For golang/go#49289

Change-Id: Ie5226537cefaeb9e68b98e5dce9c6b97b29f968a
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/547556
Reviewed-by: Damien Neil <dneil@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/report/cve.go b/internal/report/cve.go
index 1f90bb1..631fbf2 100644
--- a/internal/report/cve.go
+++ b/internal/report/cve.go
@@ -9,6 +9,7 @@
 	"strings"
 
 	"golang.org/x/vulndb/internal/cveschema"
+	"golang.org/x/vulndb/internal/cveschema5"
 	"golang.org/x/vulndb/internal/proxy"
 	"golang.org/x/vulndb/internal/stdlib"
 )
@@ -80,16 +81,88 @@
 		Credits:     credits,
 		References:  refs,
 	}
+	r.addCVE(c.Metadata.ID, modulePath)
+	return r
+}
+
+func (r *Report) addCVE(cveID, modulePath string) {
 	// New standard library and x/ repo CVEs are likely maintained by
 	// the Go CNA.
 	if stdlib.IsStdModule(modulePath) || stdlib.IsCmdModule(modulePath) ||
 		stdlib.IsXModule(modulePath) {
 		r.CVEMetadata = &CVEMeta{
-			ID:  c.Metadata.ID,
+			ID:  cveID,
 			CWE: "TODO",
 		}
 	} else {
-		r.CVEs = []string{c.Metadata.ID}
+		r.CVEs = append(r.CVEs, cveID)
 	}
+}
+
+func CVE5ToReport(c *cveschema5.CVERecord, id, modulePath string, pc *proxy.Client) *Report {
+	r := cve5ToReport(c, id, modulePath)
+	r.Fix(pc)
+	return r
+}
+
+func cve5ToReport(c *cveschema5.CVERecord, id, modulePath string) *Report {
+	cna := c.Containers.CNAContainer
+
+	var description Description
+	for _, d := range cna.Descriptions {
+		if d.Lang == "en" {
+			description += Description(d.Value + "\n")
+		}
+	}
+
+	var credits []string
+	for _, c := range cna.Credits {
+		credits = append(credits, c.Value)
+	}
+
+	var refs []*Reference
+	for _, ref := range c.Containers.CNAContainer.References {
+		refs = append(refs, referenceFromUrl(ref.URL))
+	}
+
+	// For now, use the first product name as the package path.
+	// TODO(tatianabradley): Make this more sophisticated, to consider
+	// all the blocks in cna.Affected, versions, etc.
+	var pkgPath string
+	if affected := cna.Affected; len(affected) > 0 {
+		pkgPath = affected[0].Product
+	}
+	if stdlib.Contains(modulePath) {
+		pkgPath = modulePath
+		modulePath = stdlib.ModulePath
+	}
+	if modulePath == "" {
+		modulePath = "TODO"
+	}
+	if pkgPath == "" {
+		pkgPath = modulePath
+	}
+	modules := []*Module{
+		{
+			Module:   modulePath,
+			Versions: nil,
+			Packages: []*Package{
+				{
+					Package: pkgPath,
+				},
+			},
+		},
+	}
+
+	r := &Report{
+		ID:      id,
+		Modules: modules,
+		// TODO(tatianabradley): Add CVE title as summary.
+		Description: description,
+		Credits:     credits,
+		References:  refs,
+	}
+
+	r.addCVE(c.Metadata.ID, modulePath)
 	return r
 }
diff --git a/internal/report/cve_test.go b/internal/report/cve_test.go
index add607e..22ade4f 100644
--- a/internal/report/cve_test.go
+++ b/internal/report/cve_test.go
@@ -8,6 +8,7 @@
 	"context"
 	"flag"
 	"fmt"
+	"io/fs"
 	"os"
 	"path/filepath"
 	"testing"
@@ -74,6 +75,64 @@
 	}
 }
 
+func TestCVE5ToReport(t *testing.T) {
+	newV5 := func() cvelistrepo.CVE {
+		return new(cveschema5.CVERecord)
+	}
+	toReportV5 := func(cve cvelistrepo.CVE, modulePath string) *Report {
+		return cve5ToReport(cve.(*cveschema5.CVERecord), placeholderID, modulePath)
+	}
+	if err := run(t, v5txtar, newV5, toReportV5); err != nil {
+		t.Fatal(err)
+	}
+}
+
+// Check that the created report is the same for v4 and v5.
+// This is a transitional test that will be removed once we are OK
+// with divergence between v4 and v5, or if we remove support for v4 entirely.
+func TestV4V5Equivalence(t *testing.T) {
+	if err := filepath.WalkDir(filepath.Join(testdata, "TestCVE5ToReport"), func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if d.IsDir() {
+			return nil
+		}
+
+		fname := filepath.Base(path)
+		t.Run(fname, func(t *testing.T) {
+			v5b, err := findCVEFile(path)
+			if err != nil {
+				t.Fatal(err)
+			}
+			v4file := filepath.Join(testdata, "TestCVEToReport", fname)
+			v4b, err := findCVEFile(v4file)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if diff := cmp.Diff(v4b, v5b); diff != "" {
+				t.Errorf("mismatch (-v4, +v5):\n%s", diff)
+			}
+		})
+		return nil
+	}); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func findCVEFile(tf string) (*txtar.File, error) {
+	ar, err := txtar.ParseFile(tf)
+	if err != nil {
+		return nil, err
+	}
+	for _, af := range ar.Files {
+		if cveschema5.IsCVE(af.Name) {
+			return &af, nil
+		}
+	}
+	return nil, fmt.Errorf("%s: cve archive file not found", tf)
+}
+
 func run(t *testing.T, txtarFile string, newCVE func() cvelistrepo.CVE, toReport func(cvelistrepo.CVE, string) *Report) error {
 	if *updateGolden {
 		if err := os.RemoveAll(filepath.Join(testdata, t.Name())); err != nil {
diff --git a/internal/report/references.go b/internal/report/references.go
index c5e1b43..c607def 100644
--- a/internal/report/references.go
+++ b/internal/report/references.go
@@ -5,6 +5,7 @@
 package report
 
 import (
+	"net/url"
 	"strings"
 
 	"golang.org/x/vulndb/internal/osv"
@@ -26,18 +27,23 @@
 
 // referenceFromUrl creates a new Reference from a url
 // with Type inferred from the contents of the url.
-func referenceFromUrl(url string) *Reference {
+func referenceFromUrl(u string) *Reference {
+	unescaped, err := url.PathUnescape(u)
+	if err != nil {
+		// Ignore error and use original.
+		unescaped = u
+	}
 	typ := osv.ReferenceTypeWeb
 	switch {
-	case isFix(url):
+	case isFix(unescaped):
 		typ = osv.ReferenceTypeFix
-	case isIssue(url):
+	case isIssue(unescaped):
 		typ = osv.ReferenceTypeReport
-	case isAdvisory(url):
+	case isAdvisory(unescaped):
 		typ = osv.ReferenceTypeAdvisory
 	}
 	return &Reference{
 		Type: typ,
-		URL:  url,
+		URL:  unescaped,
 	}
 }
diff --git a/internal/report/testdata/cve/TestCVE5ToReport/CVE-2020-9283.txtar b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2020-9283.txtar
new file mode 100644
index 0000000..c09c5f3
--- /dev/null
+++ b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2020-9283.txtar
@@ -0,0 +1,24 @@
+Copyright 2023 The Go Authors. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+
+Expected output of TestCVE5ToReport/CVE-2020-9283.
+
+-- CVE-2020-9283 --
+id: PLACEHOLDER-ID
+modules:
+    - module: golang.org/x/crypto
+      packages:
+        - package: n/a
+description: |
+    golang.org/x/crypto before v0.0.0-20200220183623-bac4c82f6975 for Go allows a panic during signature verification in the golang.org/x/crypto/ssh package. A client can attack an SSH server that accepts public keys. Also, a server can attack any SSH client.
+references:
+    - web: https://groups.google.com/forum/#!topic/golang-announce/3L45YRc91SY
+    - web: http://packetstormsecurity.com/files/156480/Go-SSH-0.0.2-Denial-Of-Service.html
+    - web: https://lists.debian.org/debian-lts-announce/2020/10/msg00014.html
+    - web: https://lists.debian.org/debian-lts-announce/2020/11/msg00027.html
+    - web: https://lists.debian.org/debian-lts-announce/2020/11/msg00031.html
+    - web: https://lists.debian.org/debian-lts-announce/2023/06/msg00017.html
+cve_metadata:
+    id: CVE-2020-9283
+    cwe: TODO
diff --git a/internal/report/testdata/cve/TestCVE5ToReport/CVE-2022-39213.txtar b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2022-39213.txtar
new file mode 100644
index 0000000..9a4cfe1
--- /dev/null
+++ b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2022-39213.txtar
@@ -0,0 +1,20 @@
+Copyright 2023 The Go Authors. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+
+Expected output of TestCVE5ToReport/CVE-2022-39213.
+
+-- CVE-2022-39213 --
+id: PLACEHOLDER-ID
+modules:
+    - module: github.com/pandatix/go-cvss
+      packages:
+        - package: go-cvss
+description: |
+    go-cvss is a Go module to manipulate Common Vulnerability Scoring System (CVSS). In affected versions when a full CVSS v2.0 vector string is parsed using `ParseVector`, an Out-of-Bounds Read is possible due to a lack of tests. The Go module will then panic. The problem is patched in tag `v0.4.0`, by the commit `d9d478ff0c13b8b09ace030db9262f3c2fe031f4`. Users are advised to upgrade. Users unable to upgrade may avoid this issue by parsing only CVSS v2.0 vector strings that do not have all attributes defined (e.g. `AV:N/AC:L/Au:N/C:P/I:P/A:C/E:U/RL:OF/RC:C/CDP:MH/TD:H/CR:M/IR:M/AR:M`). As stated in [SECURITY.md](https://github.com/pandatix/go-cvss/blob/master/SECURITY.md), the CPE v2.3 to refer to this Go module is `cpe:2.3:a:pandatix:go_cvss:*:*:*:*:*:*:*:*`. The entry has already been requested to the NVD CPE dictionary.
+cves:
+    - CVE-2022-39213
+references:
+    - advisory: https://github.com/pandatix/go-cvss/security/advisories/GHSA-xhmf-mmv2-4hhx
+    - fix: https://github.com/pandatix/go-cvss/commit/d9d478ff0c13b8b09ace030db9262f3c2fe031f4
+    - web: https://github.com/pandatix/go-cvss/blob/master/SECURITY.md
diff --git a/internal/report/testdata/cve/TestCVE5ToReport/CVE-2023-44378.txtar b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2023-44378.txtar
new file mode 100644
index 0000000..2b011ba
--- /dev/null
+++ b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2023-44378.txtar
@@ -0,0 +1,20 @@
+Copyright 2023 The Go Authors. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+
+Expected output of TestCVE5ToReport/CVE-2023-44378.
+
+-- CVE-2023-44378 --
+id: PLACEHOLDER-ID
+modules:
+    - module: github.com/Consensys/gnark
+      packages:
+        - package: gnark
+description: |
+    gnark is a zk-SNARK library that offers a high-level API to design circuits. Prior to version 0.9.0, for some in-circuit values, it is possible to construct two valid decomposition to bits. In addition to the canonical decomposition of `a`, for small values there exists a second decomposition for `a+r` (where `r` is the modulus the values are being reduced by). The second decomposition was possible due to overflowing the field where the values are defined. Upgrading to version 0.9.0 should fix the issue without needing to change the calls to value comparison methods.
+cves:
+    - CVE-2023-44378
+references:
+    - advisory: https://github.com/Consensys/gnark/security/advisories/GHSA-498w-5j49-vqjg
+    - report: https://github.com/zkopru-network/zkopru/issues/116
+    - fix: https://github.com/Consensys/gnark/commit/59a4087261a6c73f13e80d695c17b398c3d0934f
diff --git a/internal/report/testdata/cve/TestCVE5ToReport/CVE-2023-45141.txtar b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2023-45141.txtar
new file mode 100644
index 0000000..0514762
--- /dev/null
+++ b/internal/report/testdata/cve/TestCVE5ToReport/CVE-2023-45141.txtar
@@ -0,0 +1,18 @@
+Copyright 2023 The Go Authors. All rights reserved.
+Use of this source code is governed by a BSD-style
+license that can be found in the LICENSE file.
+
+Expected output of TestCVE5ToReport/CVE-2023-45141.
+
+-- CVE-2023-45141 --
+id: PLACEHOLDER-ID
+modules:
+    - module: github.com/gofiber/fiber
+      packages:
+        - package: fiber
+description: |
+    Fiber is an express inspired web framework written in Go. A Cross-Site Request Forgery (CSRF) vulnerability has been identified in the application, which allows an attacker to obtain tokens and forge malicious requests on behalf of a user. This can lead to unauthorized actions being taken on the user's behalf, potentially compromising the security and integrity of the application. The vulnerability is caused by improper validation and enforcement of CSRF tokens within the application. This vulnerability has been addressed in version 2.50.0 and users are advised to upgrade. Users should take additional security measures like captchas or Two-Factor Authentication (2FA) and set Session cookies with SameSite=Lax or SameSite=Secure, and the Secure and HttpOnly attributes.
+cves:
+    - CVE-2023-45141
+references:
+    - advisory: https://github.com/gofiber/fiber/security/advisories/GHSA-mv73-f69x-444p