internal/database: set published date from git history

When a report has no 'published' field set, populate it from the
submitter timestamp of the commit that added the report to the
vulndb repo.

Fixes golang/go#50434

Change-Id: I21ef234ffe78457ff42db3ffa4decb1199a129f3
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/376154
Trust: Damien Neil <dneil@google.com>
Run-TryBot: Damien Neil <dneil@google.com>
Reviewed-by: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/cmd/db/main.go b/cmd/db/main.go
deleted file mode 100644
index 7ed6e25..0000000
--- a/cmd/db/main.go
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Command db provides a tool for creating and checking the vulndb.
-package main
-
-import (
-	"flag"
-	"fmt"
-	"log"
-	"os"
-
-	"golang.org/x/vulndb/internal/database"
-)
-
-func main() {
-	flag.Usage = func() {
-		fmt.Fprintf(flag.CommandLine.Output(), "usage: db [cmd]\n")
-		fmt.Fprintf(flag.CommandLine.Output(), "  diff [dbname1] [dbname2]: compare two different versions of the vulndb\n")
-		fmt.Fprintf(flag.CommandLine.Output(), "  generate [reportsDir] [jsonDir]: create a new vulndb\n")
-		flag.PrintDefaults()
-	}
-	flag.Parse()
-	if flag.NArg() != 3 {
-		flag.Usage()
-		os.Exit(1)
-	}
-	cmd := os.Args[0]
-	switch cmd {
-	case "diff":
-		if err := database.Diff(os.Args[1], os.Args[2]); err != nil {
-			log.Fatal(err)
-		}
-	case "generate":
-		if err := database.Generate(os.Args[1], os.Args[2]); err != nil {
-			log.Fatal(err)
-		}
-	default:
-		log.Fatalf("unsupported command: %q", cmd)
-	}
-}
diff --git a/cmd/gendb/main.go b/cmd/gendb/main.go
index b83cf83..7e7c846 100644
--- a/cmd/gendb/main.go
+++ b/cmd/gendb/main.go
@@ -7,6 +7,7 @@
 package main
 
 import (
+	"context"
 	"flag"
 	"log"
 
@@ -14,13 +15,14 @@
 )
 
 var (
-	yamlDir = flag.String("reports", "reports", "Directory containing yaml reports")
+	repoDir = flag.String("repo", ".", "Directory containing vulndb repo")
 	jsonDir = flag.String("out", "out", "Directory to write JSON database to")
 )
 
 func main() {
 	flag.Parse()
-	if err := database.Generate(*yamlDir, *jsonDir); err != nil {
+	ctx := context.Background()
+	if err := database.Generate(ctx, *repoDir, *jsonDir); err != nil {
 		log.Fatal(err)
 	}
 }
diff --git a/internal/database/generate.go b/internal/database/generate.go
index a8338b3..0d0ce47 100644
--- a/internal/database/generate.go
+++ b/internal/database/generate.go
@@ -6,6 +6,7 @@
 package database
 
 import (
+	"context"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
@@ -13,10 +14,12 @@
 	"path/filepath"
 	"strings"
 
+	"github.com/go-git/go-git/v5/plumbing/object"
 	"golang.org/x/mod/semver"
 	"golang.org/x/vuln/client"
 	"golang.org/x/vuln/osv"
 	"golang.org/x/vulndb/internal/derrors"
+	"golang.org/x/vulndb/internal/gitrepo"
 	"golang.org/x/vulndb/internal/report"
 	"golang.org/x/vulndb/internal/stdlib"
 	"gopkg.in/yaml.v2"
@@ -28,22 +31,31 @@
 	// idDirectory is the name of the directory that contains entries
 	// listed by their IDs.
 	idDirectory = "ID"
+
+	// yamlDir is the name of the directory in the vulndb repo that
+	// contains reports.
+	yamlDir = "reports"
 )
 
-func Generate(yamlDir, jsonDir string) (err error) {
-	defer derrors.Wrap(&err, "Generate(%q)", yamlDir)
-	yamlFiles, err := ioutil.ReadDir(yamlDir)
+func Generate(ctx context.Context, repoDir, jsonDir string) (err error) {
+	defer derrors.Wrap(&err, "Generate(%q)", repoDir)
+	yamlFiles, err := ioutil.ReadDir(filepath.Join(repoDir, yamlDir))
 	if err != nil {
 		return fmt.Errorf("can't read %q: %s", yamlDir, err)
 	}
 
+	repo, err := gitrepo.Open(ctx, repoDir)
+	if err != nil {
+		return err
+	}
+
 	jsonVulns := map[string][]osv.Entry{}
 	var entries []osv.Entry
 	for _, f := range yamlFiles {
 		if !strings.HasSuffix(f.Name(), ".yaml") {
 			continue
 		}
-		content, err := ioutil.ReadFile(filepath.Join(yamlDir, f.Name()))
+		content, err := ioutil.ReadFile(filepath.Join(repoDir, yamlDir, f.Name()))
 		if err != nil {
 			return fmt.Errorf("can't read %q: %s", f.Name(), err)
 		}
@@ -51,6 +63,18 @@
 		if err := yaml.UnmarshalStrict(content, &r); err != nil {
 			return fmt.Errorf("unable to unmarshal %q: %s", f.Name(), err)
 		}
+		if r.Published.IsZero() {
+			yamlPath := filepath.Join(yamlDir, f.Name())
+			if err := gitrepo.FileHistory(repo, yamlPath, func(commit *object.Commit) error {
+				when := commit.Committer.When.UTC()
+				if r.Published.IsZero() || when.Before(r.Published) {
+					r.Published = when
+				}
+				return nil
+			}); err != nil {
+				return fmt.Errorf("can't find git history for %q: %v", yamlPath, err)
+			}
+		}
 		if lints := r.Lint(); len(lints) > 0 {
 			return fmt.Errorf("vuln.Lint: %v", lints)
 		}
diff --git a/internal/gitrepo/gitrepo.go b/internal/gitrepo/gitrepo.go
index f630206..62057c8 100644
--- a/internal/gitrepo/gitrepo.go
+++ b/internal/gitrepo/gitrepo.go
@@ -140,3 +140,21 @@
 		return "", "", fmt.Errorf("%q is not in the form {github.com/}owner/repo", s)
 	}
 }
+
+// FileHistory calls f for every commit in filepath's history, starting from HEAD.
+func FileHistory(repo *git.Repository, filepath string, f func(*object.Commit) error) error {
+	refName := plumbing.HEAD
+	ref, err := repo.Reference(refName, true)
+	if err != nil {
+		return err
+	}
+	commit, err := repo.CommitObject(ref.Hash())
+	if err != nil {
+		return err
+	}
+	return object.NewCommitFileIterFromIter(
+		filepath,
+		object.NewCommitPreorderIter(commit, nil, nil),
+		false,
+	).ForEach(f)
+}
diff --git a/internal/gitrepo/gitrepo_test.go b/internal/gitrepo/gitrepo_test.go
new file mode 100644
index 0000000..c3b9612
--- /dev/null
+++ b/internal/gitrepo/gitrepo_test.go
@@ -0,0 +1,91 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gitrepo_test
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+
+	"github.com/go-git/go-billy/v5"
+	"github.com/go-git/go-billy/v5/memfs"
+	"github.com/go-git/go-git/v5"
+	"github.com/go-git/go-git/v5/plumbing/object"
+	"github.com/go-git/go-git/v5/storage/memory"
+	"golang.org/x/vulndb/internal/gitrepo"
+)
+
+func TestFileHistory(t *testing.T) {
+	test := newTest(t)
+	message := []string{"one", "two", "three"}
+	for _, message := range message {
+		test.Commit(message, map[string]string{
+			"file": message,
+		})
+
+		// These commits touch other files, and should not be iterated over.
+		test.Commit("other commit", map[string]string{
+			"some_other_file": message,
+		})
+	}
+	var got []string
+	gitrepo.FileHistory(test.Repo, "file", func(commit *object.Commit) error {
+		got = append([]string{strings.TrimSpace(commit.Message)}, got...)
+		return nil
+	})
+	if !reflect.DeepEqual(got, message) {
+		t.Errorf("got %v\nwant %v", got, message)
+	}
+
+}
+
+type gitTest struct {
+	t    *testing.T
+	FS   billy.Filesystem
+	Repo *git.Repository
+}
+
+func newTest(t *testing.T) *gitTest {
+	t.Helper()
+	mfs := memfs.New()
+	repo, err := git.Init(memory.NewStorage(), mfs)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return &gitTest{
+		t:    t,
+		FS:   mfs,
+		Repo: repo,
+	}
+}
+
+func (test *gitTest) Commit(message string, files map[string]string) {
+	test.t.Helper()
+	wt, err := test.Repo.Worktree()
+	if err != nil {
+		test.t.Fatal(err)
+	}
+	for name, content := range files {
+		f, err := test.FS.Create(name)
+		if err != nil {
+			test.t.Fatal(err)
+		}
+		if _, err := f.Write([]byte(content)); err != nil {
+			test.t.Fatal(err)
+		}
+		if err := f.Close(); err != nil {
+			test.t.Fatal(err)
+		}
+		if _, err := wt.Add(name); err != nil {
+			test.t.Fatal(err)
+		}
+	}
+	if _, err := wt.Commit(message, &git.CommitOptions{All: true, Author: &object.Signature{
+		Name:  "Author",
+		Email: "author@example.com",
+	}}); err != nil {
+		test.t.Fatal(err)
+	}
+}