internal/worker: refactor CVE parsing logic

Logic for parsing data from a CVE is refactored and moved to various
helper functions.

Change-Id: Idd0162c6240a8ae2b1a63d3faf4d32e29e00a40c
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/362239
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/internal/worker/cve.go b/internal/worker/cve.go
index 88faac3..1272070 100644
--- a/internal/worker/cve.go
+++ b/internal/worker/cve.go
@@ -5,8 +5,8 @@
 package worker
 
 import (
+	"errors"
 	"fmt"
-	"log"
 	"net/http"
 	"net/url"
 	"strings"
@@ -16,66 +16,58 @@
 	"golang.org/x/vuln/internal/report"
 )
 
-const goGitHubRepo = "github.com/golang/go"
+const (
+	stateReserved        = "Reserved"
+	statePublicNotGoVuln = "Public - Not Go Vuln"
+	statePublicGoVuln    = "Public - Go Vuln"
+)
 
-// cveToIssue creates a cveRecord from a c *cveschema.CVE.
-func cveToIssue(c *cveschema.CVE) (_ *cve, err error) {
+var errCVEVersionUnsupported = errors.New("unsupported CVE version")
+
+// triageCVE triages the CVE and creates a cve record state.
+func triageCVE(c *cveschema.CVE) (_ *cve, err error) {
 	defer derrors.Wrap(&err, "cveToIssue(%q)", c.CVEDataMeta.ID)
-	if isPendingCVE(c) {
-		return nil, nil
+	if isReservedCVE(c) {
+		return createCVE(c, stateReserved, "", false), nil
 	}
 	switch c.DataVersion {
 	case "4.0":
-		return cveToIssueV4(c)
+		mp, err := cveModulePath(c)
+		if err != nil {
+			return nil, err
+		}
+		if mp == "" {
+			return createCVE(c, statePublicNotGoVuln, "", false), nil
+		}
+		return createCVE(c, statePublicGoVuln, mp, true), nil
 	default:
 		// TODO(https://golang.org/issue/49289): Add support for v5.0.
-		log.Printf("Unxpected data_version for CVE %q: %q (skipping)", c.CVEDataMeta.ID, c.DataVersion)
-		return nil, nil
+		return nil, fmt.Errorf("CVE %q has DataVersion %q: %w", c.CVEDataMeta.ID, c.DataVersion, errCVEVersionUnsupported)
 	}
 }
 
-func cveToIssueV4(c *cveschema.CVE) (_ *cve, err error) {
-	mp, err := modulePathFromCVE(c)
-	if err != nil {
-		return nil, err
-	}
-	if mp == "" {
-		return nil, nil
-	}
-	var links report.Links
-	for _, r := range c.References.ReferenceData {
-		if links.Commit == "" && strings.Contains(r.URL, "/commit/") {
-			links.Commit = r.URL
-		} else if links.PR == "" && strings.Contains(r.URL, "/pull/") {
-			links.PR = r.URL
-		} else {
-			links.Context = append(links.Context, r.URL)
-		}
-	}
-	var cwe string
-	for _, pt := range c.Problemtype.ProblemtypeData {
-		for _, d := range pt.Description {
-			if strings.Contains(d.Value, "CWE") {
-				cwe = d.Value
-			}
-		}
-	}
+const goGitHubRepo = "github.com/golang/go"
+
+// createCVE creates a cve record state from the data provided.
+func createCVE(c *cveschema.CVE, state string, mp string, isGoVuln bool) *cve {
 	r := &cve{
 		CVE:         *c,
-		cwe:         cwe,
+		state:       state,
+		cwe:         cveCWE(c),
 		modulePath:  mp,
-		links:       links,
+		links:       cveLinks(c),
 		description: description(c),
+		isGoVuln:    isGoVuln,
 	}
 	if mp == goGitHubRepo {
 		r.modulePath = "Standard Library"
 	}
-	return r, nil
+	return r
 }
 
 // isPendingCVE reports if the CVE is still waiting on information and not
 // ready to be triaged.
-func isPendingCVE(c *cveschema.CVE) bool {
+func isReservedCVE(c *cveschema.CVE) bool {
 	return c.CVEDataMeta.STATE == cveschema.StateReserved
 }
 
@@ -88,10 +80,10 @@
 	"golang.org":    true,
 }
 
-// modulePathFromCVE returns a Go module path for a CVE, if we can determine
-// what it is.
-func modulePathFromCVE(c *cveschema.CVE) (_ string, err error) {
-	defer derrors.Wrap(&err, "modulePathFromCVE(c)")
+// cveModulePath returns a Go module path for a CVE, if we can determine what
+// it is.
+func cveModulePath(c *cveschema.CVE) (_ string, err error) {
+	defer derrors.Wrap(&err, "cveModulePath(%q)", c.CVEDataMeta.ID)
 	for _, r := range c.References.ReferenceData {
 		if r.URL == "" {
 			continue
@@ -122,6 +114,32 @@
 	return "", nil
 }
 
+func cveLinks(c *cveschema.CVE) report.Links {
+	var links report.Links
+	for _, r := range c.References.ReferenceData {
+		if links.Commit == "" && strings.Contains(r.URL, "/commit/") {
+			links.Commit = r.URL
+		} else if links.PR == "" && strings.Contains(r.URL, "/pull/") {
+			links.PR = r.URL
+		} else {
+			links.Context = append(links.Context, r.URL)
+		}
+	}
+	return links
+}
+
+func cveCWE(c *cveschema.CVE) string {
+	var cwe string
+	for _, pt := range c.Problemtype.ProblemtypeData {
+		for _, d := range pt.Description {
+			if strings.Contains(d.Value, "CWE") {
+				cwe = d.Value
+			}
+		}
+	}
+	return cwe
+}
+
 func description(c *cveschema.CVE) string {
 	var ds []string
 	for _, d := range c.Description.DescriptionData {
diff --git a/internal/worker/worker.go b/internal/worker/worker.go
index 8fff940..7aa150c 100644
--- a/internal/worker/worker.go
+++ b/internal/worker/worker.go
@@ -11,6 +11,7 @@
 	"fmt"
 	"log"
 	"path"
+	"sort"
 	"strings"
 
 	"github.com/go-git/go-git/v5"
@@ -42,12 +43,17 @@
 	if err := walkRepo(repo, root, "", t); err != nil {
 		return err
 	}
+	var newVulns []string
 	for cveID, r := range t {
 		if r.isGoVuln {
-			fmt.Println(cveID)
+			newVulns = append(newVulns, fmt.Sprintf("%s (%s)", cveID, r.modulePath))
 		}
 	}
+	sort.Strings(newVulns)
 	log.Printf("Found %d new issues from %d CVEs", t.totalVulns(), t.totalCVEs())
+	for _, v := range newVulns {
+		fmt.Println(v)
+	}
 	return nil
 }
 
@@ -81,7 +87,7 @@
 			if err != nil {
 				return err
 			}
-			issue, err := cveToIssue(c)
+			issue, err := triageCVE(c)
 			if err != nil {
 				return err
 			}