internal/client: allow flat list of OSV as db

Allows a user of govulncheck to pass in a folder containing OSV JSON
files as a database.

Under the hood, we index the files and create an in-memory index. The
bulk of this logic is in a new type, hybridSource.

This is currently a hidden, experimental feature.

Change-Id: Ic1cc5f02badf2e3a51024eb3e6e9e3d7cf69b576
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/494405
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Julie Qiu <julieqiu@google.com>
Run-TryBot: Tatiana Bradley <tatianabradley@google.com>
diff --git a/internal/client/client.go b/internal/client/client.go
index e10cc1d..236c9d1 100644
--- a/internal/client/client.go
+++ b/internal/client/client.go
@@ -94,7 +94,15 @@
 		return &Client{source: newLocalSource(dir)}, nil
 	}
 
-	return nil, errUnknownSchema
+	// If the DB doesn't follow the v1 schema,
+	// attempt to intepret it as a flat list of OSV files.
+	// This is currently a "hidden" feature, so don't output the
+	// specific error if this fails.
+	src, err := newHybridSource(dir)
+	if err != nil {
+		return nil, errUnknownSchema
+	}
+	return &Client{source: src}, nil
 }
 
 func toDir(uri *url.URL) (string, error) {
diff --git a/internal/client/client_test.go b/internal/client/client_test.go
index c3ff09b..a04dfd5 100644
--- a/internal/client/client_test.go
+++ b/internal/client/client_test.go
@@ -26,6 +26,8 @@
 	testLegacyVulndbFileURL = localURL(testLegacyVulndb)
 	testVulndb              = filepath.Join("testdata", "vulndb-v1")
 	testVulndbFileURL       = localURL(testVulndb)
+	testFlatVulndb          = filepath.Join("testdata", "vulndb-v1", "ID")
+	testFlatVulndbFileURL   = localURL(testFlatVulndb)
 	testIDs                 = []string{
 		"GO-2021-0159",
 		"GO-2022-0229",
@@ -124,6 +126,17 @@
 		}
 	})
 
+	t.Run("local/flat", func(t *testing.T) {
+		src := testFlatVulndbFileURL
+		c, err := NewClient(src, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if c == nil {
+			t.Errorf("NewClient(%s) = nil, want instantiated *Client", src)
+		}
+	})
+
 	t.Run("local/legacy", func(t *testing.T) {
 		src := testLegacyVulndbFileURL
 		_, err := NewClient(src, nil)
@@ -304,6 +317,15 @@
 		test(t, fc)
 	})
 
+	t.Run("hybrid", func(t *testing.T) {
+		fc, err := NewClient(testFlatVulndbFileURL, nil)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(t, fc)
+	})
+
 	t.Run("in-memory", func(t *testing.T) {
 		testEntries, err := entries(testIDs)
 		if err != nil {
diff --git a/internal/client/index.go b/internal/client/index.go
new file mode 100644
index 0000000..5f2d0c0
--- /dev/null
+++ b/internal/client/index.go
@@ -0,0 +1,120 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package client
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+
+	"golang.org/x/vuln/internal/osv"
+	isem "golang.org/x/vuln/internal/semver"
+)
+
+// indexFromDir returns a raw index created from a directory
+// containing OSV entries.
+// It skips any non-JSON files but errors if any of the JSON files
+// cannot be unmarshaled into OSV, or have a filename other than <ID>.json.
+func indexFromDir(dir string) (map[string][]byte, error) {
+	idx := newIndex()
+	f := os.DirFS(dir)
+
+	if err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
+		fname := d.Name()
+		ext := filepath.Ext(fname)
+		switch {
+		case err != nil:
+			return err
+		case d.IsDir():
+			return nil
+		case ext != ".json":
+			return nil
+		}
+
+		b, err := fs.ReadFile(f, d.Name())
+		if err != nil {
+			return err
+		}
+		var entry osv.Entry
+		if err := json.Unmarshal(b, &entry); err != nil {
+			return err
+		}
+		if fname != entry.ID+".json" {
+			return fmt.Errorf("OSV entries must have filename of the form <ID>.json, got %s", fname)
+		}
+
+		idx.add(&entry)
+		return nil
+	}); err != nil {
+		return nil, err
+	}
+
+	return idx.raw()
+}
+
+func indexFromEntries(entries []*osv.Entry) (map[string][]byte, error) {
+	idx := newIndex()
+
+	for _, entry := range entries {
+		idx.add(entry)
+	}
+
+	return idx.raw()
+}
+
+type index struct {
+	db      *dbMeta
+	modules modulesIndex
+}
+
+func newIndex() *index {
+	return &index{
+		db:      &dbMeta{},
+		modules: make(map[string]*moduleMeta),
+	}
+}
+
+func (i *index) add(entry *osv.Entry) {
+	// Add to db index.
+	if entry.Modified.After(i.db.Modified) {
+		i.db.Modified = entry.Modified
+	}
+	// Add to modules index.
+	for _, affected := range entry.Affected {
+		modulePath := affected.Module.Path
+		if _, ok := i.modules[modulePath]; !ok {
+			i.modules[modulePath] = &moduleMeta{
+				Path:  modulePath,
+				Vulns: []moduleVuln{},
+			}
+		}
+		module := i.modules[modulePath]
+		module.Vulns = append(module.Vulns, moduleVuln{
+			ID:       entry.ID,
+			Modified: entry.Modified,
+			Fixed:    isem.LatestFixedVersion(affected.Ranges),
+		})
+	}
+}
+
+func (i *index) raw() (map[string][]byte, error) {
+	data := make(map[string][]byte)
+
+	b, err := json.Marshal(i.db)
+	if err != nil {
+		return nil, err
+	}
+	data[dbEndpoint] = b
+
+	b, err = json.Marshal(i.modules)
+	if err != nil {
+		return nil, err
+	}
+	data[modulesEndpoint] = b
+
+	return data, nil
+}
diff --git a/internal/client/schema.go b/internal/client/schema.go
index ffb94a8..0b84237 100644
--- a/internal/client/schema.go
+++ b/internal/client/schema.go
@@ -5,7 +5,9 @@
 package client
 
 import (
+	"encoding/json"
 	"path"
+	"sort"
 	"time"
 )
 
@@ -54,3 +56,22 @@
 	// vulnerability, in SemVer 2.0.0 format, with no leading "v" prefix.
 	Fixed string `json:"fixed,omitempty"`
 }
+
+// modulesIndex represents an in-memory modules index.
+type modulesIndex map[string]*moduleMeta
+
+func (m modulesIndex) MarshalJSON() ([]byte, error) {
+	modules := make([]*moduleMeta, 0, len(m))
+	for _, module := range m {
+		modules = append(modules, module)
+	}
+	sort.SliceStable(modules, func(i, j int) bool {
+		return modules[i].Path < modules[j].Path
+	})
+	for _, module := range modules {
+		sort.SliceStable(module.Vulns, func(i, j int) bool {
+			return module.Vulns[i].ID < module.Vulns[j].ID
+		})
+	}
+	return json.Marshal(modules)
+}
diff --git a/internal/client/source.go b/internal/client/source.go
index 89af764..3e47b09 100644
--- a/internal/client/source.go
+++ b/internal/client/source.go
@@ -13,11 +13,10 @@
 	"io/fs"
 	"net/http"
 	"os"
-	"sort"
+	"path/filepath"
 
 	"golang.org/x/vuln/internal/derrors"
 	"golang.org/x/vuln/internal/osv"
-	isem "golang.org/x/vuln/internal/semver"
 )
 
 type source interface {
@@ -85,34 +84,46 @@
 	return fs.ReadFile(ls.fs, endpoint+".json")
 }
 
-// newInMemorySource creates a new in-memory source.
+func newHybridSource(dir string) (*hybridSource, error) {
+	index, err := indexFromDir(dir)
+	if err != nil {
+		return nil, err
+	}
+
+	return &hybridSource{
+		index: &inMemorySource{data: index},
+		osv:   &localSource{fs: os.DirFS(dir)},
+	}, nil
+}
+
+// hybridSource reads OSV entries from a local file system, but reads
+// indexes from an in-memory map.
+type hybridSource struct {
+	index *inMemorySource
+	osv   *localSource
+}
+
+func (hs *hybridSource) get(ctx context.Context, endpoint string) (_ []byte, err error) {
+	derrors.Wrap(&err, "get(%s)", endpoint)
+
+	dir, file := filepath.Split(endpoint)
+
+	if filepath.Dir(dir) == indexDir {
+		return hs.index.get(ctx, endpoint)
+	}
+
+	return hs.osv.get(ctx, file)
+}
+
+// newInMemorySource creates a new in-memory source from OSV entries.
 // Adapted from x/vulndb/internal/database.go.
 func newInMemorySource(entries []*osv.Entry) (*inMemorySource, error) {
-	data := make(map[string][]byte)
-	db := dbMeta{}
-	modulesMap := make(map[string]*moduleMeta)
+	data, err := indexFromEntries(entries)
+	if err != nil {
+		return nil, err
+	}
+
 	for _, entry := range entries {
-		if entry.ID == "" {
-			return nil, fmt.Errorf("entry %v has no ID", entry)
-		}
-		if entry.Modified.After(db.Modified) {
-			db.Modified = entry.Modified
-		}
-		for _, affected := range entry.Affected {
-			modulePath := affected.Module.Path
-			if _, ok := modulesMap[modulePath]; !ok {
-				modulesMap[modulePath] = &moduleMeta{
-					Path:  modulePath,
-					Vulns: []moduleVuln{},
-				}
-			}
-			module := modulesMap[modulePath]
-			module.Vulns = append(module.Vulns, moduleVuln{
-				ID:       entry.ID,
-				Modified: entry.Modified,
-				Fixed:    isem.LatestFixedVersion(affected.Ranges),
-			})
-		}
 		b, err := json.Marshal(entry)
 		if err != nil {
 			return nil, err
@@ -120,31 +131,6 @@
 		data[entryEndpoint(entry.ID)] = b
 	}
 
-	b, err := json.Marshal(db)
-	if err != nil {
-		return nil, err
-	}
-	data[dbEndpoint] = b
-
-	// Add the modules endpoint.
-	modules := make([]*moduleMeta, 0, len(modulesMap))
-	for _, module := range modulesMap {
-		modules = append(modules, module)
-	}
-	sort.SliceStable(modules, func(i, j int) bool {
-		return modules[i].Path < modules[j].Path
-	})
-	for _, module := range modules {
-		sort.SliceStable(module.Vulns, func(i, j int) bool {
-			return module.Vulns[i].ID < module.Vulns[j].ID
-		})
-	}
-	b, err = json.Marshal(modules)
-	if err != nil {
-		return nil, err
-	}
-	data[modulesEndpoint] = b
-
 	return &inMemorySource{data: data}, nil
 }
 
diff --git a/internal/client/source_test.go b/internal/client/source_test.go
index a6933c3..df2d92b 100644
--- a/internal/client/source_test.go
+++ b/internal/client/source_test.go
@@ -67,4 +67,13 @@
 
 		test(t, ms)
 	})
+
+	t.Run("hybrid", func(t *testing.T) {
+		hs, err := newHybridSource(testFlatVulndb)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		test(t, hs)
+	})
 }