internal/cvelist: add walkRepo skeleton

A skeleton of walkRepo is added, which looks at all of the files in the
CVE list repo and prints the CVE ID if the file has not been triaged before.

In the future, logic will be added to check if it is a CVE that needs to
be added to the vulndb or we can ignore.

Change-Id: I62d627809c7c38120b87dccc38fb00688f8ae1da
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/356392
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Roland Shoemaker <roland@golang.org>
diff --git a/internal/cvelist/cvelist.go b/internal/cvelist/cvelist.go
index 4ba9954..91cb83a 100644
--- a/internal/cvelist/cvelist.go
+++ b/internal/cvelist/cvelist.go
@@ -7,22 +7,29 @@
 package cvelist
 
 import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"path"
+	"strings"
+
 	"github.com/go-git/go-git/v5"
 	"github.com/go-git/go-git/v5/plumbing"
+	"github.com/go-git/go-git/v5/plumbing/filemode"
 	"github.com/go-git/go-git/v5/plumbing/object"
 	"github.com/go-git/go-git/v5/storage/memory"
+	"golang.org/x/vulndb/internal/cveschema"
 )
 
 // Run clones the CVEProject/cvelist repository and compares the files to the
 // existing triaged-cve-list.
 func Run(triaged map[string]bool) error {
 	// 1. Clone the repo.
-	_, _, err := cloneRepo(cvelistRepoURL)
+	repo, root, err := cloneRepo(cvelistRepoURL)
 	if err != nil {
 		return err
 	}
-	// 2. TODO: walk the repo and figure out if something is a CVE.
-	return nil
+	return walkRepo(repo, root, "", triaged)
 }
 
 const cvelistRepoURL = "https://github.com/CVEProject/cvelist"
@@ -55,3 +62,66 @@
 	}
 	return repo, root, nil
 }
+
+// walkRepo looks at the files in t, recursively, and check if it is a CVE that
+// needs to be manually triaged.
+func walkRepo(r *git.Repository, t *object.Tree, dirpath string, triaged map[string]bool) (err error) {
+	var recent []object.TreeEntry
+	for _, e := range t.Entries {
+		if e.Mode == filemode.Dir && strings.HasPrefix(e.Name, "202") {
+			recent = append(recent, e)
+		}
+	}
+	for _, e := range recent {
+		switch e.Mode {
+		case filemode.Dir:
+			dp := path.Join(dirpath, e.Name)
+			t2, err := r.TreeObject(e.Hash)
+			if err != nil {
+				return err
+			}
+			if err := walkRepo(r, t2, dp, triaged); err != nil {
+				return err
+			}
+		default:
+			if !strings.HasPrefix(e.Name, "CVE-") {
+				continue
+			}
+			cveID := strings.TrimSuffix(e.Name, ".json")
+			if triaged[cveID] {
+				continue
+			}
+			blob, err := r.BlobObject(e.Hash)
+			if err != nil {
+				return fmt.Errorf("r.BlobObject: %v", err)
+			}
+			src, err := blob.Reader()
+			if err != nil {
+				_ = src.Close()
+				return fmt.Errorf("blob.Reader: %v", err)
+			}
+			_, err = parseCVE(src)
+			if err != nil {
+				_ = src.Close()
+				filename := path.Join(dirpath, e.Name)
+				return fmt.Errorf("parseCVE(%q, src): %v", filename, err)
+			}
+			// TODO: implement triage CVE logic
+			if err := src.Close(); err != nil {
+				return fmt.Errorf("src.Close: %v", err)
+			}
+		}
+	}
+	return nil
+}
+
+// parseCVEJSON parses a CVE file following the CVE JSON format:
+// https://github.com/CVEProject/automation-working-group/blob/master/cve_json_schema/DRAFT-JSON-file-format-v4.md
+func parseCVE(src io.Reader) (_ *cveschema.CVE, err error) {
+	var c cveschema.CVE
+	d := json.NewDecoder(src)
+	if err := d.Decode(&c); err != nil {
+		return nil, fmt.Errorf("d.Decode: %v", err)
+	}
+	return &c, nil
+}