internal/vulncheck: add binary abstraction data structure

This is minimal information needed for govulncheck to analyze binaries.
This will be jsonized into the follow-up CLs.

Change-Id: I52f71ad66ea7e5004d4ced388c488eff5e686fd6
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/540235
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Ian Cottrell <iancottrell@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Zvonimir Pavlinovic <zpavlinovic@google.com>
diff --git a/internal/vulncheck/binary.go b/internal/vulncheck/binary.go
index e1cf99d..e637a50 100644
--- a/internal/vulncheck/binary.go
+++ b/internal/vulncheck/binary.go
@@ -12,7 +12,9 @@
 	"fmt"
 	"io"
 	"runtime/debug"
+	"sort"
 
+	"golang.org/x/tools/go/packages"
 	"golang.org/x/vuln/internal"
 	"golang.org/x/vuln/internal/client"
 	"golang.org/x/vuln/internal/govulncheck"
@@ -23,24 +25,50 @@
 // Binary detects presence of vulnerable symbols in exe and
 // emits findings to exe.
 func Binary(ctx context.Context, handler govulncheck.Handler, exe io.ReaderAt, cfg *govulncheck.Config, client *client.Client) error {
-	vr, err := binary(ctx, handler, exe, cfg, client)
+	bin, err := createBin(exe)
+	if err != nil {
+		return err
+	}
+
+	vr, err := binary(ctx, handler, bin, cfg, client)
 	if err != nil {
 		return err
 	}
 	return emitBinaryResult(handler, vr, binaryCallstacks(vr))
 }
 
-// binary detects presence of vulnerable symbols in exe.
-// The Calls, Imports, and Requires fields on Result will be empty.
-func binary(ctx context.Context, handler govulncheck.Handler, exe io.ReaderAt, cfg *govulncheck.Config, client *client.Client) (*Result, error) {
+// bin is an abstraction of Go binary containing
+// minimal information needed by govulncheck.
+type bin struct {
+	Modules    []*packages.Module `json:"modules,omitempty"`
+	PkgSymbols []buildinfo.Symbol `json:"pkgSymbols,omitempty"`
+	GoVersion  string             `json:"goVersion,omitempty"`
+	GOOS       string             `json:"goos,omitempty"`
+	GOARCH     string             `json:"goarch,omitempty"`
+}
+
+func createBin(exe io.ReaderAt) (*bin, error) {
 	mods, packageSymbols, bi, err := buildinfo.ExtractPackagesAndSymbols(exe)
 	if err != nil {
 		return nil, fmt.Errorf("could not parse provided binary: %v", err)
 	}
 
-	graph := NewPackageGraph(bi.GoVersion)
-	graph.AddModules(mods...)
-	mods = append(mods, graph.GetModule(internal.GoStdModulePath))
+	return &bin{
+		Modules:    mods,
+		PkgSymbols: packageSymbols,
+		GoVersion:  bi.GoVersion,
+		GOOS:       findSetting("GOOS", bi),
+		GOARCH:     findSetting("GOARCH", bi),
+	}, nil
+}
+
+// binary detects presence of vulnerable symbols in bin.
+// It does not compute call graphs so the corresponding
+// info in Result will be empty.
+func binary(ctx context.Context, handler govulncheck.Handler, bin *bin, cfg *govulncheck.Config, client *client.Client) (*Result, error) {
+	graph := NewPackageGraph(bin.GoVersion)
+	graph.AddModules(bin.Modules...)
+	mods := append(bin.Modules, graph.GetModule(internal.GoStdModulePath))
 
 	mv, err := FetchVulnerabilities(ctx, client, mods)
 	if err != nil {
@@ -52,21 +80,27 @@
 		return nil, err
 	}
 
-	goos := findSetting("GOOS", bi)
-	goarch := findSetting("GOARCH", bi)
-	if goos == "" || goarch == "" {
-		fmt.Printf("warning: failed to extract build system specification GOOS: %s GOARCH: %s\n", goos, goarch)
+	if bin.GOOS == "" || bin.GOARCH == "" {
+		fmt.Printf("warning: failed to extract build system specification GOOS: %s GOARCH: %s\n", bin.GOOS, bin.GOARCH)
 	}
-	affVulns := affectingVulnerabilities(mv, goos, goarch)
+	affVulns := affectingVulnerabilities(mv, bin.GOOS, bin.GOARCH)
 
 	result := &Result{}
-	if packageSymbols == nil {
+	if len(bin.PkgSymbols) == 0 {
 		// The binary exe is stripped. We currently cannot detect inlined
 		// symbols for stripped binaries (see #57764), so we report
 		// vulnerabilities at the go.mod-level precision.
 		addRequiresOnlyVulns(result, graph, affVulns)
 	} else {
-		for pkg, symbols := range packageSymbols {
+		// Group symbols per package to avoid querying vulns all over again.
+		pkgSymbols := make(map[string][]string)
+		for _, sym := range bin.PkgSymbols {
+			pkgSymbols[sym.Pkg] = append(pkgSymbols[sym.Pkg], sym.Name)
+		}
+
+		for pkg, symbols := range pkgSymbols {
+			// sort symbols for deterministic results
+			sort.SliceStable(symbols, func(i, j int) bool { return symbols[i] < symbols[j] })
 			if !cfg.ScanLevel.WantSymbols() {
 				addImportsOnlyVulns(result, graph, pkg, symbols, affVulns)
 			} else {
diff --git a/internal/vulncheck/binary_test.go b/internal/vulncheck/binary_test.go
index fc4c9bf..66ea343 100644
--- a/internal/vulncheck/binary_test.go
+++ b/internal/vulncheck/binary_test.go
@@ -10,7 +10,6 @@
 import (
 	"context"
 	"fmt"
-	"io"
 	"os"
 	"os/exec"
 	"path/filepath"
@@ -22,7 +21,6 @@
 	"golang.org/x/vuln/internal/semver"
 	"golang.org/x/vuln/internal/test"
 	"golang.org/x/vuln/internal/testenv"
-	"golang.org/x/vuln/internal/vulncheck/internal/buildinfo"
 )
 
 // TODO: we build binary programatically, so what if the underlying tool chain changes?
@@ -107,11 +105,15 @@
 		t.Fatalf("failed to build the binary %v %v", err, string(out))
 	}
 
-	bin, err := os.Open(filepath.Join(e.Config.Dir, "entry"))
+	exe, err := os.Open(filepath.Join(e.Config.Dir, "entry"))
 	if err != nil {
-		t.Fatalf("failed to access the binary %v", err)
+		t.Fatal(err)
 	}
-	defer bin.Close()
+	defer exe.Close()
+	bin, err := createBin(exe)
+	if err != nil {
+		t.Fatal(err)
+	}
 
 	c, err := newTestClient()
 	if err != nil {
@@ -125,7 +127,7 @@
 		t.Fatal(err)
 	}
 
-	goversion := getGoVersion(bin)
+	goversion := goVersion(bin)
 	// In importsOnly mode, vulnerable symbols
 	// {avuln.VulnData.Vuln1, avuln.VulnData.Vuln2, bvuln.Vuln}
 	// should be detected.
@@ -161,9 +163,8 @@
 	compareVulns(t, wantVulns, res)
 }
 
-func getGoVersion(exe io.ReaderAt) string {
-	_, _, bi, _ := buildinfo.ExtractPackagesAndSymbols(exe)
-	return semver.GoTagToSemver(bi.GoVersion)
+func goVersion(bin *bin) string {
+	return semver.GoTagToSemver(bin.GoVersion)
 }
 
 // Test58509 is supposed to test issue #58509 where a whole
@@ -226,11 +227,15 @@
 				t.Fatalf("failed to build the binary %v %v", err, string(out))
 			}
 
-			bin, err := os.Open(filepath.Join(e.Config.Dir, "entry"))
+			exe, err := os.Open(filepath.Join(e.Config.Dir, "entry"))
 			if err != nil {
-				t.Fatalf("failed to access the binary %v", err)
+				t.Fatal(err)
 			}
-			defer bin.Close()
+			defer exe.Close()
+			bin, err := createBin(exe)
+			if err != nil {
+				t.Fatal(err)
+			}
 
 			c, err := newTestClient()
 			if err != nil {
diff --git a/internal/vulncheck/internal/buildinfo/additions_scan.go b/internal/vulncheck/internal/buildinfo/additions_scan.go
index 9790cfa..d718252 100644
--- a/internal/vulncheck/internal/buildinfo/additions_scan.go
+++ b/internal/vulncheck/internal/buildinfo/additions_scan.go
@@ -17,7 +17,6 @@
 	"io"
 	"net/url"
 	"runtime/debug"
-	"sort"
 	"strings"
 
 	"golang.org/x/tools/go/packages"
@@ -41,12 +40,17 @@
 	return packagesModules
 }
 
+type Symbol struct {
+	Pkg  string `json:"pkg,omitempty"`
+	Name string `json:"name,omitempty"`
+}
+
 // ExtractPackagesAndSymbols extracts symbols, packages, modules from
 // bin as well as bin's metadata.
 //
 // If the symbol table is not available, such as in the case of stripped
 // binaries, returns module and binary info but without the symbol info.
-func ExtractPackagesAndSymbols(bin io.ReaderAt) ([]*packages.Module, map[string][]string, *debug.BuildInfo, error) {
+func ExtractPackagesAndSymbols(bin io.ReaderAt) ([]*packages.Module, []Symbol, *debug.BuildInfo, error) {
 	bi, err := buildinfo.Read(bin)
 	if err != nil {
 		return nil, nil, nil, err
@@ -87,11 +91,7 @@
 		return nil, nil, nil, err
 	}
 
-	type pkgSymbol struct {
-		pkg string
-		sym string
-	}
-	pkgSyms := make(map[pkgSymbol]bool)
+	pkgSyms := make(map[Symbol]bool)
 	for _, f := range tab.Funcs {
 		if f.Func == nil {
 			continue
@@ -100,7 +100,7 @@
 		if err != nil {
 			return nil, nil, nil, err
 		}
-		pkgSyms[pkgSymbol{pkgName, symName}] = true
+		pkgSyms[Symbol{pkgName, symName}] = true
 
 		// Collect symbols that were inlined in f.
 		it, err := lineTab.InlineTree(&f, value, base, r)
@@ -112,20 +112,16 @@
 			if err != nil {
 				return nil, nil, nil, err
 			}
-			pkgSyms[pkgSymbol{pkgName, symName}] = true
+			pkgSyms[Symbol{pkgName, symName}] = true
 		}
 	}
 
-	packageSymbols := make(map[string][]string)
-	for p := range pkgSyms {
-		packageSymbols[p.pkg] = append(packageSymbols[p.pkg], p.sym)
-	}
-	// Sort symbols per pkg for deterministic results.
-	for _, syms := range packageSymbols {
-		sort.Strings(syms)
+	var syms []Symbol
+	for ps := range pkgSyms {
+		syms = append(syms, ps)
 	}
 
-	return debugModulesToPackagesModules(bi.Deps), packageSymbols, bi, nil
+	return debugModulesToPackagesModules(bi.Deps), syms, bi, nil
 }
 
 func parseName(s *gosym.Sym) (pkg, sym string, err error) {
diff --git a/internal/vulncheck/internal/buildinfo/additions_scan_test.go b/internal/vulncheck/internal/buildinfo/additions_scan_test.go
index 7e238ce..cd80972 100644
--- a/internal/vulncheck/internal/buildinfo/additions_scan_test.go
+++ b/internal/vulncheck/internal/buildinfo/additions_scan_test.go
@@ -9,6 +9,7 @@
 
 import (
 	"os"
+	"sort"
 	"testing"
 
 	"github.com/google/go-cmp/cmp"
@@ -56,10 +57,29 @@
 			if err != nil {
 				t.Fatal(err)
 			}
-			got := syms["main"]
-			want := []string{"f", "g", "main"}
-			if !cmp.Equal(got, want) {
-				t.Errorf("\ngot  %q\nwant %q", got, want)
+
+			got := mainSortedSymbols(syms)
+			want := []Symbol{
+				{"main", "f"},
+				{"main", "g"},
+				{"main", "main"},
+			}
+
+			if diff := cmp.Diff(want, got); diff != "" {
+				t.Errorf("(-want,+got):%s", diff)
 			}
 		})
 }
+
+// mainSortedSymbols gets symbols for "main" package and
+// sorts them for testing purposes.
+func mainSortedSymbols(syms []Symbol) []Symbol {
+	var s []Symbol
+	for _, ps := range syms {
+		if ps.Pkg == "main" {
+			s = append(s, ps)
+		}
+	}
+	sort.SliceStable(s, func(i, j int) bool { return s[i].Pkg+"."+s[i].Name < s[j].Pkg+"."+s[j].Name })
+	return s
+}
diff --git a/internal/vulncheck/internal/buildinfo/additions_stripped_test.go b/internal/vulncheck/internal/buildinfo/additions_stripped_test.go
index ce10de6..eed90a0 100644
--- a/internal/vulncheck/internal/buildinfo/additions_stripped_test.go
+++ b/internal/vulncheck/internal/buildinfo/additions_stripped_test.go
@@ -59,10 +59,16 @@
 			if err != nil {
 				t.Fatal(err)
 			}
-			got := syms["main"]
-			want := []string{"f", "g", "main"}
-			if !cmp.Equal(got, want) {
-				t.Errorf("\ngot  %q\nwant %q", got, want)
+
+			got := mainSortedSymbols(syms)
+			want := []Symbol{
+				{"main", "f"},
+				{"main", "g"},
+				{"main", "main"},
+			}
+
+			if diff := cmp.Diff(want, got); diff != "" {
+				t.Errorf("(-want,+got):%s", diff)
 			}
 		})
 }