vulncheck/internal/binscan: extract inlined function names from binaries

Obtain the names of inlined functions from the binary by using
internal/gosym's ability to read inline tree information.

For golang/go#51412.

Change-Id: I1f3c1a2122939293c5a6f3e382f4b7bf32fccfeb
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/398756
Run-TryBot: Jonathan Amsterdam <jba@google.com>
Reviewed-by: Zvonimir Pavlinovic <zpavlinovic@google.com>
Reviewed-by: Roland Shoemaker <roland@golang.org>
diff --git a/vulncheck/internal/binscan/exe.go b/vulncheck/internal/binscan/exe.go
index fc2f862..4d09f9f 100644
--- a/vulncheck/internal/binscan/exe.go
+++ b/vulncheck/internal/binscan/exe.go
@@ -31,6 +31,8 @@
 	DataStart() uint64
 
 	PCLNTab() ([]byte, uint64)
+
+	SymbolInfo(name string) (uint64, uint64, io.ReaderAt, error)
 }
 
 // openExe returns reader r as an exe.
@@ -110,6 +112,40 @@
 	return 0
 }
 
+func (x *elfExe) SymbolInfo(name string) (uint64, uint64, io.ReaderAt, error) {
+	sym := x.lookupSymbol(name)
+	if sym == nil {
+		return 0, 0, nil, fmt.Errorf("no symbol %q", name)
+	}
+	prog := x.progContaining(sym.Value)
+	if prog == nil {
+		return 0, 0, nil, fmt.Errorf("no Prog containing value %d for %q", sym.Value, name)
+	}
+	return sym.Value, prog.Vaddr, prog.ReaderAt, nil
+}
+
+func (x *elfExe) lookupSymbol(name string) *elf.Symbol {
+	syms, err := x.f.Symbols()
+	if err != nil {
+		return nil
+	}
+	for _, s := range syms {
+		if s.Name == name {
+			return &s
+		}
+	}
+	return nil
+}
+
+func (x *elfExe) progContaining(addr uint64) *elf.Prog {
+	for _, p := range x.f.Progs {
+		if addr >= p.Vaddr && addr < p.Vaddr+p.Filesz {
+			return p
+		}
+	}
+	return nil
+}
+
 const go12magic = 0xfffffffb
 const go116magic = 0xfffffffa
 
@@ -223,6 +259,25 @@
 	return 0
 }
 
+func (x *peExe) SymbolInfo(name string) (uint64, uint64, io.ReaderAt, error) {
+	sym := x.lookupSymbol(name)
+	if sym == nil {
+		return 0, 0, nil, fmt.Errorf("no symbol %q", name)
+	}
+	sect := x.f.Sections[sym.SectionNumber-1]
+	// In PE, the symbol's value is the offset from the section start.
+	return uint64(sym.Value), 0, sect.ReaderAt, nil
+}
+
+func (x *peExe) lookupSymbol(name string) *pe.Symbol {
+	for _, s := range x.f.Symbols {
+		if s.Name == name {
+			return s
+		}
+	}
+	return nil
+}
+
 func (x *peExe) PCLNTab() ([]byte, uint64) {
 	var textOffset uint64
 	for _, section := range x.f.Sections {
@@ -252,7 +307,6 @@
 	if _, err := x.r.ReadAt(pclntab, offset); err != nil {
 		return nil, 0
 	}
-
 	return pclntab, textOffset
 }
 
@@ -304,6 +358,37 @@
 	return 0
 }
 
+func (x *machoExe) SymbolInfo(name string) (uint64, uint64, io.ReaderAt, error) {
+	sym := x.lookupSymbol(name)
+	if sym == nil {
+		return 0, 0, nil, fmt.Errorf("no symbol %q", name)
+	}
+	seg := x.segmentContaining(sym.Value)
+	if seg == nil {
+		return 0, 0, nil, fmt.Errorf("no Segment containing value %d for %q", sym.Value, name)
+	}
+	return sym.Value, seg.Addr, seg.ReaderAt, nil
+}
+
+func (x *machoExe) lookupSymbol(name string) *macho.Symbol {
+	for _, s := range x.f.Symtab.Syms {
+		if s.Name == name {
+			return &s
+		}
+	}
+	return nil
+}
+
+func (x *machoExe) segmentContaining(addr uint64) *macho.Segment {
+	for _, load := range x.f.Loads {
+		seg, ok := load.(*macho.Segment)
+		if ok && seg.Addr <= addr && addr <= seg.Addr+seg.Filesz-1 && seg.Name != "__PAGEZERO" {
+			return seg
+		}
+	}
+	return nil
+}
+
 func (x *machoExe) PCLNTab() ([]byte, uint64) {
 	var textOffset uint64
 	text := x.f.Section("__text")
diff --git a/vulncheck/internal/binscan/scan.go b/vulncheck/internal/binscan/scan.go
index 954d097..05efad6 100644
--- a/vulncheck/internal/binscan/scan.go
+++ b/vulncheck/internal/binscan/scan.go
@@ -42,10 +42,8 @@
 	return packagesModules
 }
 
-// ExtractPackagesAndSymbols extracts the symbols, packages, and their associated module versions
-// from a Go binary. Stripped binaries are not supported.
-//
-// TODO(#51412): detect inlined symbols too
+// ExtractPackagesAndSymbols extracts the symbols, packages, and their
+// associated module versions from a Go binary.
 func ExtractPackagesAndSymbols(bin io.ReaderAt) ([]*packages.Module, map[string][]string, error) {
 	bi, err := buildinfo.Read(bin)
 	if err != nil {
@@ -77,25 +75,46 @@
 		if f.Func == nil {
 			continue
 		}
-		symName := f.Func.BaseName()
-		if r := f.Func.ReceiverName(); r != "" {
-			if strings.HasPrefix(r, "(*") {
-				r = strings.Trim(r, "(*)")
-			}
-			symName = fmt.Sprintf("%s.%s", r, symName)
-		}
-
-		pkgName := f.Func.PackageName()
-		if pkgName == "" {
-			continue
-		}
-		pkgName, err := url.PathUnescape(pkgName)
+		pkgName, symName, err := parseName(f.Func.Sym)
 		if err != nil {
 			return nil, nil, err
 		}
-
 		packageSymbols[pkgName] = append(packageSymbols[pkgName], symName)
+		value, base, r, err := x.SymbolInfo("go.func.*")
+		if err != nil {
+			return nil, nil, fmt.Errorf("reading go.func.*: %v", err)
+		}
+		it, err := lineTab.InlineTree(&f, value, base, r)
+		if err != nil {
+			return nil, nil, fmt.Errorf("InlineTree: %v", err)
+		}
+		for _, ic := range it {
+			pkgName, symName, err := parseName(&gosym.Sym{Name: ic.Name})
+			if err != nil {
+				return nil, nil, err
+			}
+			packageSymbols[pkgName] = append(packageSymbols[pkgName], symName)
+		}
 	}
 
 	return debugModulesToPackagesModules(bi.Deps), packageSymbols, nil
 }
+
+func parseName(s *gosym.Sym) (pkg, sym string, err error) {
+	symName := s.BaseName()
+	if r := s.ReceiverName(); r != "" {
+		if strings.HasPrefix(r, "(*") {
+			r = strings.Trim(r, "(*)")
+		}
+		symName = fmt.Sprintf("%s.%s", r, symName)
+	}
+
+	pkgName := s.PackageName()
+	if pkgName != "" {
+		pkgName, err = url.PathUnescape(pkgName)
+		if err != nil {
+			return "", "", err
+		}
+	}
+	return pkgName, symName, nil
+}
diff --git a/vulncheck/internal/binscan/scan_test.go b/vulncheck/internal/binscan/scan_test.go
index c5cb210..9c9cb83 100644
--- a/vulncheck/internal/binscan/scan_test.go
+++ b/vulncheck/internal/binscan/scan_test.go
@@ -16,21 +16,25 @@
 )
 
 func TestExtractPackagesAndSymbols(t *testing.T) {
-	binary, done := buildtest.GoBuild(t, "testdata")
-	defer done()
+	for _, goos := range []string{"linux", "darwin", "windows"} {
+		t.Run(goos, func(t *testing.T) {
+			binary, done := buildtest.GoBuild(t, "testdata", "GOOS", goos)
+			defer done()
 
-	f, err := os.Open(binary)
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer f.Close()
-	_, syms, err := ExtractPackagesAndSymbols(f)
-	if err != nil {
-		t.Fatal(err)
-	}
-	got := syms["main"]
-	want := []string{"main"}
-	if !cmp.Equal(got, want) {
-		t.Errorf("\ngot  %q\nwant %q", got, want)
+			f, err := os.Open(binary)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer f.Close()
+			_, syms, err := ExtractPackagesAndSymbols(f)
+			if err != nil {
+				t.Fatal(err)
+			}
+			got := syms["main"]
+			want := []string{"main", "f", "g"}
+			if !cmp.Equal(got, want) {
+				t.Errorf("\ngot  %q\nwant %q", got, want)
+			}
+		})
 	}
 }
diff --git a/vulncheck/internal/binscan/testdata/main.go b/vulncheck/internal/binscan/testdata/main.go
index 7009b8e..0bd37df 100644
--- a/vulncheck/internal/binscan/testdata/main.go
+++ b/vulncheck/internal/binscan/testdata/main.go
@@ -8,6 +8,6 @@
 	g()
 }
 
-func g() int {
-	return 1
+func g() {
+	println(1)
 }
diff --git a/vulncheck/internal/buildtest/buildtest.go b/vulncheck/internal/buildtest/buildtest.go
index e676576..e57b784 100644
--- a/vulncheck/internal/buildtest/buildtest.go
+++ b/vulncheck/internal/buildtest/buildtest.go
@@ -7,23 +7,45 @@
 package buildtest
 
 import (
+	"fmt"
 	"os"
 	"os/exec"
 	"path/filepath"
 	"runtime"
+	"strings"
 	"testing"
 )
 
-// GoBuild runs "go build" on dir using the additional environment
-// variables in env. Each element of env should be of the form
-// "VAR=VALUE".
+var unsupportedGoosGoarch = map[string]bool{
+	"darwin/386": true,
+}
+
+// GoBuild runs "go build" on dir using the additional environment variables in
+// envVarVals, which should be an alternating list of variables and values.
 // It returns the path to the resulting binary, and a function
 // to call when finished with the binary.
-func GoBuild(t *testing.T, dir string, env ...string) (binaryPath string, cleanup func()) {
+func GoBuild(t *testing.T, dir string, envVarVals ...string) (binaryPath string, cleanup func()) {
 	switch runtime.GOOS {
 	case "android", "js", "ios":
 		t.Skipf("skipping on OS without 'go build' %s", runtime.GOOS)
 	}
+
+	if len(envVarVals)%2 != 0 {
+		t.Fatal("last args should be alternating variables and values")
+	}
+	var env []string
+	if len(envVarVals) > 0 {
+		env = os.Environ()
+		for i := 0; i < len(envVarVals); i += 2 {
+			env = append(env, fmt.Sprintf("%s=%s", envVarVals[i], envVarVals[i+1]))
+		}
+	}
+
+	gg := lookupEnv("GOOS", env, runtime.GOOS) + "/" + lookupEnv("GOARCH", env, runtime.GOARCH)
+	if unsupportedGoosGoarch[gg] {
+		t.Skipf("skipping unsupported GOOS/GOARCH pair %s", gg)
+	}
+
 	tmpDir, err := os.MkdirTemp("", "buildtest")
 	if err != nil {
 		t.Fatal(err)
@@ -40,9 +62,7 @@
 	}
 	cmd := exec.Command(goCommandPath, "build", "-o", binaryPath)
 	cmd.Dir = dir
-	if len(env) > 0 {
-		cmd.Env = append(os.Environ(), env...)
-	}
+	cmd.Env = env
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
 	if err := cmd.Run(); err != nil {
@@ -50,3 +70,19 @@
 	}
 	return binaryPath, func() { os.RemoveAll(tmpDir) }
 }
+
+// lookEnv looks for name in env, a list of "VAR=VALUE" strings. It returns
+// the value if name is found, and defaultValue if it is not.
+func lookupEnv(name string, env []string, defaultValue string) string {
+	for _, vv := range env {
+		i := strings.IndexByte(vv, '=')
+		if i < 0 {
+			// malformed env entry; just ignore it
+			continue
+		}
+		if name == vv[:i] {
+			return vv[i+1:]
+		}
+	}
+	return defaultValue
+}
diff --git a/vulncheck/internal/gosym/pclntab_test.go b/vulncheck/internal/gosym/pclntab_test.go
index 8d21bb4..7a412dd 100644
--- a/vulncheck/internal/gosym/pclntab_test.go
+++ b/vulncheck/internal/gosym/pclntab_test.go
@@ -33,7 +33,7 @@
 		t.Skipf("skipping in short mode on non-Linux system %s", runtime.GOARCH)
 	}
 
-	return buildtest.GoBuild(t, "testdata", "GOOS=linux")
+	return buildtest.GoBuild(t, "testdata", "GOOS", "linux")
 }
 
 // skipIfNotELF skips the test if we are not running on an ELF system.