client,cmd/gendb: structure database around modules

Rather than structuring the database around packages. This results
in larger JSON files, but provides a more obvious grouping for
vulnerabilities.

Change-Id: Ica3301f4f88e7aa5e704dc9e390ec336515d3a4e
Reviewed-on: https://go-review.googlesource.com/c/vulndb/+/339269
Trust: Roland Shoemaker <roland@golang.org>
Run-TryBot: Roland Shoemaker <roland@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Filippo Valsorda <filippo@golang.org>
Vulndb-Deploy: Roland Shoemaker <bracewell@google.com>
diff --git a/client/client.go b/client/client.go
index acf29d3..c7e811e 100644
--- a/client/client.go
+++ b/client/client.go
@@ -7,29 +7,29 @@
 //
 // The expected database layout is the same for both HTTP and local
 // databases. The database  index is located at the root of the
-// database, and contains a list of all of the vulnerable packages
+// database, and contains a list of all of the vulnerable modules
 // documented in the databse and the time the most recent vulnerability
 // was added. The index file is called indx.json, and has the
 // following format:
 //
 //   map[string]time.Time (osv.DBIndex)
 //
-// Each vulnerable package is represented by an individual JSON file
-// which contains all of the vulnerabilities in that package. The path
-// for each package file is simply the import path of the package,
-// i.e. vulnerabilities in golang.org/x/crypto/ssh are contained in the
-// golang.org/x/crypto/ssh.json file. The per-package JSON files have
+// Each vulnerable module is represented by an individual JSON file
+// which contains all of the vulnerabilities in that module. The path
+// for each module file is simply the import path of the module,
+// i.e. vulnerabilities in golang.org/x/crypto are contained in the
+// golang.org/x/crypto.json file. The per-module JSON files have
 // the following format:
 //
 //   []osv.Entry
 //
 // A single client.Client can be used to access multiple vulnerability
-// databases. When looking up vulnerable packages each database is
+// databases. When looking up vulnerable module each database is
 // consulted, and results are merged together.
 //
-// TODO: allow filtering private packages, possibly at a database level?
+// TODO: allow filtering private module, possibly at a database level?
 // (e.g. I may want to use multiple databases, but only lookup a specific
-// package in a subset of them)
+// module in a subset of them)
 package client
 
 import (
@@ -46,8 +46,6 @@
 	"golang.org/x/vulndb/osv"
 )
 
-type dbIndex struct{}
-
 type source interface {
 	Get([]string) ([]*osv.Entry, error)
 	Index() (osv.DBIndex, error)
@@ -57,9 +55,9 @@
 	dir string
 }
 
-func (ls *localSource) Get(packages []string) ([]*osv.Entry, error) {
+func (ls *localSource) Get(modules []string) ([]*osv.Entry, error) {
 	var entries []*osv.Entry
-	for _, p := range packages {
+	for _, p := range modules {
 		content, err := ioutil.ReadFile(filepath.Join(ls.dir, p+".json"))
 		if os.IsNotExist(err) {
 			continue
@@ -149,7 +147,7 @@
 	return index, nil
 }
 
-func (hs *httpSource) Get(packages []string) ([]*osv.Entry, error) {
+func (hs *httpSource) Get(modules []string) ([]*osv.Entry, error) {
 	var entries []*osv.Entry
 
 	index, err := hs.Index()
@@ -158,7 +156,7 @@
 	}
 
 	var stillNeed []string
-	for _, p := range packages {
+	for _, p := range modules {
 		lastModified, present := index[p]
 		if !present {
 			continue
@@ -202,7 +200,7 @@
 			return nil, err
 		}
 		// TODO: we may want to check that the returned entries actually match
-		// the package we asked about, so that the cache cannot be poisoned
+		// the module we asked about, so that the cache cannot be poisoned
 		entries = append(entries, e...)
 
 		if hs.cache != nil {
@@ -254,11 +252,11 @@
 	return c, nil
 }
 
-func (c *Client) Get(packages []string) ([]*osv.Entry, error) {
+func (c *Client) Get(modules []string) ([]*osv.Entry, error) {
 	var entries []*osv.Entry
 	// probably should be parallelized
 	for _, s := range c.sources {
-		e, err := s.Get(packages)
+		e, err := s.Get(modules)
 		if err != nil {
 			return nil, err // be failure tolerant?
 		}
diff --git a/cmd/gendb/main.go b/cmd/gendb/main.go
index a38fc56..e7f25e1 100644
--- a/cmd/gendb/main.go
+++ b/cmd/gendb/main.go
@@ -41,21 +41,21 @@
 }
 
 func main() {
-	tomlDir := flag.String("reports", "reports", "Directory containing toml reports")
+	yamlDir := flag.String("reports", "reports", "Directory containing yaml reports")
 	jsonDir := flag.String("out", "out", "Directory to write JSON database to")
 	flag.Parse()
 
-	tomlFiles, err := ioutil.ReadDir(*tomlDir)
+	yamlFiles, err := ioutil.ReadDir(*yamlDir)
 	if err != nil {
-		fail(fmt.Sprintf("can't read %q: %s", *tomlDir, err))
+		fail(fmt.Sprintf("can't read %q: %s", *yamlDir, err))
 	}
 
 	jsonVulns := map[string][]osv.Entry{}
-	for _, f := range tomlFiles {
+	for _, f := range yamlFiles {
 		if !strings.HasSuffix(f.Name(), ".yaml") {
 			continue
 		}
-		content, err := ioutil.ReadFile(filepath.Join(*tomlDir, f.Name()))
+		content, err := ioutil.ReadFile(filepath.Join(*yamlDir, f.Name()))
 		if err != nil {
 			fail(fmt.Sprintf("can't read %q: %s", f.Name(), err))
 		}
@@ -77,8 +77,8 @@
 		// TODO(rolandshoemaker): once the HTML representation is ready this should be
 		// the link to the HTML page.
 		linkName := fmt.Sprintf("%s%s.yaml", dbURL, name)
-		for _, e := range osv.Generate(name, linkName, vuln) {
-			jsonVulns[e.Package.Name] = append(jsonVulns[e.Package.Name], e)
+		for path, e := range osv.Generate(name, linkName, vuln) {
+			jsonVulns[path] = append(jsonVulns[path], e...)
 		}
 	}
 
@@ -98,7 +98,7 @@
 			fail(fmt.Sprintf("failed to write %q: %s", outPath+".json", err))
 		}
 		for _, v := range vulns {
-			if v.Modified.After(index[path]) {
+			if v.Modified.After(index[path]) || v.Published.After(index[path]) {
 				index[path] = v.Modified
 			}
 		}
diff --git a/go.mod b/go.mod
index dc3e07a..7fb71f3 100644
--- a/go.mod
+++ b/go.mod
@@ -5,6 +5,7 @@
 require (
 	github.com/google/go-cmp v0.5.4
 	golang.org/x/mod v0.4.1
-	golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect
 	gopkg.in/yaml.v2 v2.4.0
 )
+
+require golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 // indirect
diff --git a/osv/json.go b/osv/json.go
index c634d98..590b230 100644
--- a/osv/json.go
+++ b/osv/json.go
@@ -161,7 +161,7 @@
 	EcosystemSpecific GoSpecific  `json:"ecosystem_specific"`
 }
 
-func Generate(id string, url string, r report.Report) []Entry {
+func Generate(id string, url string, r report.Report) map[string][]Entry {
 	importPath := r.Module
 	if r.Package != "" {
 		importPath = r.Package
@@ -203,7 +203,12 @@
 		entry.Aliases = []string{r.CVE}
 	}
 
-	entries := []Entry{entry}
+	entries := map[string][]Entry{}
+	modulePath := r.Module
+	if r.Stdlib {
+		modulePath = "stdlib"
+	}
+	entries[modulePath] = []Entry{entry}
 
 	// It would be better if this was just a recursive thing maybe?
 	for _, additional := range r.AdditionalPackages {
@@ -216,7 +221,11 @@
 		entryCopy.EcosystemSpecific.Symbols = additional.Symbols
 		entryCopy.Affects = generateAffects(additional.Versions)
 
-		entries = append(entries, entryCopy)
+		modulePath := additional.Module
+		if r.Stdlib {
+			modulePath = "stdlib"
+		}
+		entries[modulePath] = append(entries[modulePath], entryCopy)
 	}
 
 	return entries
diff --git a/osv/json_test.go b/osv/json_test.go
index c306db0..20c3026 100644
--- a/osv/json_test.go
+++ b/osv/json_test.go
@@ -45,82 +45,86 @@
 		},
 	}
 
-	want := []Entry{
-		{
-			ID: "GO-1991-0001",
-			Package: Package{
-				Name:      "example.com/vulnerable/v2",
-				Ecosystem: "Go",
-			},
-			Details: "It's a real bad one, I'll tell you that",
-			Affects: Affects{
-				Ranges: []AffectsRange{
-					{
-						Type:  TypeSemver,
-						Fixed: "2.1.1",
-					},
-					{
-						Type:       TypeSemver,
-						Introduced: "2.3.4",
-						Fixed:      "2.3.5",
-					},
-					{
-						Type:       TypeSemver,
-						Introduced: "2.5.0",
+	want := map[string][]Entry{
+		"example.com/vulnerable/v2": []Entry{
+			{
+				ID: "GO-1991-0001",
+				Package: Package{
+					Name:      "example.com/vulnerable/v2",
+					Ecosystem: "Go",
+				},
+				Details: "It's a real bad one, I'll tell you that",
+				Affects: Affects{
+					Ranges: []AffectsRange{
+						{
+							Type:  TypeSemver,
+							Fixed: "2.1.1",
+						},
+						{
+							Type:       TypeSemver,
+							Introduced: "2.3.4",
+							Fixed:      "2.3.5",
+						},
+						{
+							Type:       TypeSemver,
+							Introduced: "2.5.0",
+						},
 					},
 				},
-			},
-			References: []Reference{
-				Reference{Type: "FIX", URL: "pr"},
-				Reference{Type: "FIX", URL: "commit"},
-				Reference{Type: "WEB", URL: "issue-a"},
-				Reference{Type: "WEB", URL: "issue-b"},
-			},
-			Aliases: []string{"CVE-0000-0000"},
-			EcosystemSpecific: GoSpecific{
-				Symbols: []string{"A", "B.b"},
-				GOOS:    []string{"windows"},
-				GOARCH:  []string{"arm64"},
-				URL:     "https://vulns.golang.org/GO-1991-0001.html",
+				References: []Reference{
+					Reference{Type: "FIX", URL: "pr"},
+					Reference{Type: "FIX", URL: "commit"},
+					Reference{Type: "WEB", URL: "issue-a"},
+					Reference{Type: "WEB", URL: "issue-b"},
+				},
+				Aliases: []string{"CVE-0000-0000"},
+				EcosystemSpecific: GoSpecific{
+					Symbols: []string{"A", "B.b"},
+					GOOS:    []string{"windows"},
+					GOARCH:  []string{"arm64"},
+					URL:     "https://vulns.golang.org/GO-1991-0001.html",
+				},
 			},
 		},
-		{
+		"vanity.host/vulnerable": []Entry{
+			{
 
-			ID: "GO-1991-0001",
-			Package: Package{
-				Name:      "vanity.host/vulnerable/package",
-				Ecosystem: "Go",
-			},
-			Details: "It's a real bad one, I'll tell you that",
-			Affects: Affects{
-				Ranges: []AffectsRange{
-					{
-						Type:  TypeSemver,
-						Fixed: "2.1.1",
-					},
-					{
-						Type:       TypeSemver,
-						Introduced: "2.3.4",
-						Fixed:      "2.3.5",
-					},
-					{
-						Type:       TypeSemver,
-						Introduced: "2.5.0",
+				ID: "GO-1991-0001",
+				Package: Package{
+					Name:      "vanity.host/vulnerable/package",
+					Ecosystem: "Go",
+				},
+				Details: "It's a real bad one, I'll tell you that",
+				Affects: Affects{
+					Ranges: []AffectsRange{
+						{
+							Type:  TypeSemver,
+							Fixed: "2.1.1",
+						},
+						{
+							Type:       TypeSemver,
+							Introduced: "2.3.4",
+							Fixed:      "2.3.5",
+						},
+						{
+							Type:       TypeSemver,
+							Introduced: "2.5.0",
+						},
 					},
 				},
-			},
-			References: []Reference{
-				Reference{Type: "FIX", URL: "pr"},
-				Reference{Type: "FIX", URL: "commit"},
-				Reference{Type: "WEB", URL: "issue-a"},
-				Reference{Type: "WEB", URL: "issue-b"},
-			},
-			Aliases: []string{"CVE-0000-0000"},
-			EcosystemSpecific: GoSpecific{
-				Symbols: []string{"b", "A.b"},
-				GOOS:    []string{"windows"},
-				GOARCH:  []string{"arm64"},
-				URL:     "https://vulns.golang.org/GO-1991-0001.html",
+				References: []Reference{
+					Reference{Type: "FIX", URL: "pr"},
+					Reference{Type: "FIX", URL: "commit"},
+					Reference{Type: "WEB", URL: "issue-a"},
+					Reference{Type: "WEB", URL: "issue-b"},
+				},
+				Aliases: []string{"CVE-0000-0000"},
+				EcosystemSpecific: GoSpecific{
+					Symbols: []string{"b", "A.b"},
+					GOOS:    []string{"windows"},
+					GOARCH:  []string{"arm64"},
+					URL:     "https://vulns.golang.org/GO-1991-0001.html",
+				},
 			},
 		},
 	}
diff --git a/reports/GO-2020-0025.yaml b/reports/GO-2020-0025.yaml
index 2641a76..5c216a0 100644
--- a/reports/GO-2020-0025.yaml
+++ b/reports/GO-2020-0025.yaml
@@ -4,9 +4,10 @@
     symbols:
       - tgzExtractor.Extract
       - zipExtractor.Extract
+    versions:
+      - fixed: v0.0.0-20180523222229-09b5706aa936
 versions:
   - fixed: v0.0.0-20180523222229-09b5706aa936
-  - fixed: v0.0.0-20180523222229-09b5706aa936
 description: |
   Due to improper path santization, archives containing relative file
   paths can cause files to be written (or overwritten) outside of the
diff --git a/template b/template
index 80035c7..2cb53a7 100644
--- a/template
+++ b/template
@@ -1,23 +1,17 @@
-module = ""
-package = ""
-
-description = """
-
-"""
-
-cve = ""
-
-credit = ""
-
-symbols = [""]
-
-published = ""
-
-[[versions]]
-introduced = ""
-fixed = ""
-
-[links]
-commit = ""
-pr = ""
-context = [""]
\ No newline at end of file
+module: 
+package: 
+versions:
+  - introduced: 
+  - fixed: 
+description: |
+  
+cve: 
+credit: 
+symbols:
+  - 
+published: 
+links:
+  commit: 
+  pr: 
+  context:
+    - 
\ No newline at end of file