vulncheck: add vulnerability detection for binaries

The binscan package has been mostly copied from internal/audit. The only
change is to use io.ReaderAt as a input binary instead of a file path.

Cherry-picked: https://go-review.googlesource.com/c/exp/+/363013

Change-Id: If0fdfef87a57463cd7007a6b8fd2c97c427df752
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/395042
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/vulncheck/binary.go b/vulncheck/binary.go
new file mode 100644
index 0000000..65ea431
--- /dev/null
+++ b/vulncheck/binary.go
@@ -0,0 +1,47 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vulncheck
+
+import (
+	"io"
+
+	"golang.org/x/vuln/vulncheck/internal/binscan"
+)
+
+// Binary detects presence of vulnerable symbols in exe. The
+// imports, require, and call graph are all unavailable (nil).
+func Binary(exe io.ReaderAt, cfg *Config) (*Result, error) {
+	modules, packageSymbols, err := binscan.ExtractPackagesAndSymbols(exe)
+	if err != nil {
+		return nil, err
+	}
+	modVulns, err := fetchVulnerabilities(cfg.Client, modules)
+	if err != nil {
+		return nil, err
+	}
+
+	result := &Result{}
+	for pkg, symbols := range packageSymbols {
+		for _, symbol := range symbols {
+			for _, osv := range modVulns.VulnsForSymbol(pkg, symbol) {
+				for _, affected := range osv.Affected {
+					if affected.Package.Name != pkg {
+						continue
+					}
+					for _, symbol := range affected.EcosystemSpecific.Symbols {
+						vuln := &Vuln{
+							OSV:     osv,
+							Symbol:  symbol,
+							PkgPath: pkg,
+							// TODO(zpavlinovic): infer mod path from PkgPath and modules?
+						}
+						result.Vulns = append(result.Vulns, vuln)
+					}
+				}
+			}
+		}
+	}
+	return result, nil
+}
diff --git a/vulncheck/binary_test.go b/vulncheck/binary_test.go
new file mode 100644
index 0000000..ffcce1d
--- /dev/null
+++ b/vulncheck/binary_test.go
@@ -0,0 +1,7 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vulncheck
+
+// TODO(zpavlinovic): add tests.
diff --git a/vulncheck/internal/binscan/exe.go b/vulncheck/internal/binscan/exe.go
new file mode 100644
index 0000000..37399e1
--- /dev/null
+++ b/vulncheck/internal/binscan/exe.go
@@ -0,0 +1,349 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binscan
+
+// This file is a somewhat modified version of cmd/go/internal/version/exe.go
+// that adds functionality for extracting the PCLN table.
+
+import (
+	"bytes"
+	"debug/elf"
+	"debug/macho"
+	"debug/pe"
+	"encoding/binary"
+	"fmt"
+
+	// "internal/xcoff"
+	"io"
+)
+
+// An exe is a generic interface to an OS executable (ELF, Mach-O, PE, XCOFF).
+type exe interface {
+	// ReadData reads and returns up to size byte starting at virtual address addr.
+	ReadData(addr, size uint64) ([]byte, error)
+
+	// DataStart returns the writable data segment start address.
+	DataStart() uint64
+
+	PCLNTab() ([]byte, uint64)
+}
+
+// openExe returns reader r as an exe.
+func openExe(r io.ReaderAt) (exe, error) {
+	data := make([]byte, 16)
+	if _, err := r.ReadAt(data, 0); err != nil {
+		return nil, err
+	}
+	if bytes.HasPrefix(data, []byte("\x7FELF")) {
+		e, err := elf.NewFile(r)
+		if err != nil {
+			return nil, err
+		}
+		return &elfExe{e}, nil
+	}
+	if bytes.HasPrefix(data, []byte("MZ")) {
+		e, err := pe.NewFile(r)
+		if err != nil {
+			return nil, err
+		}
+		return &peExe{r, e}, nil
+	}
+	if bytes.HasPrefix(data, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(data[1:], []byte("\xFA\xED\xFE")) {
+		e, err := macho.NewFile(r)
+		if err != nil {
+			return nil, err
+		}
+		return &machoExe{e}, nil
+	}
+	// TODO(rolandshoemaker): we cannot support XCOFF files due to the usage of internal/xcoff.
+	// Once this code is moved into the stdlib, this support can be re-enabled.
+	// if bytes.HasPrefix(data, []byte{0x01, 0xDF}) || bytes.HasPrefix(data, []byte{0x01, 0xF7}) {
+	// 	e, err := xcoff.NewFile(r)
+	// 	if err != nil {
+	// 		return nil, err
+	// 	}
+	// 	return &xcoffExe{e}, nil
+
+	// }
+	return nil, fmt.Errorf("unrecognized executable format")
+}
+
+// elfExe is the ELF implementation of the exe interface.
+type elfExe struct {
+	f *elf.File
+}
+
+func (x *elfExe) ReadData(addr, size uint64) ([]byte, error) {
+	for _, prog := range x.f.Progs {
+		if prog.Vaddr <= addr && addr <= prog.Vaddr+prog.Filesz-1 {
+			n := prog.Vaddr + prog.Filesz - addr
+			if n > size {
+				n = size
+			}
+			data := make([]byte, n)
+			_, err := prog.ReadAt(data, int64(addr-prog.Vaddr))
+			if err != nil {
+				return nil, err
+			}
+			return data, nil
+		}
+	}
+	return nil, fmt.Errorf("address not mapped")
+}
+
+func (x *elfExe) DataStart() uint64 {
+	for _, s := range x.f.Sections {
+		if s.Name == ".go.buildinfo" {
+			return s.Addr
+		}
+	}
+	for _, p := range x.f.Progs {
+		if p.Type == elf.PT_LOAD && p.Flags&(elf.PF_X|elf.PF_W) == elf.PF_W {
+			return p.Vaddr
+		}
+	}
+	return 0
+}
+
+const go12magic = 0xfffffffb
+const go116magic = 0xfffffffa
+
+func (x *elfExe) PCLNTab() ([]byte, uint64) {
+	var offset uint64
+	text := x.f.Section(".text")
+	if text != nil {
+		offset = text.Offset
+	}
+	pclntab := x.f.Section(".gopclntab")
+	if pclntab == nil {
+		pclntab = x.f.Section(".data.rel.ro.gopclntab")
+		if pclntab == nil {
+			pclntab = x.f.Section(".data.rel.ro")
+			if pclntab == nil {
+				return nil, 0
+			}
+			// Possibly the PCLN table has been stuck in the .data.rel.ro section, but without
+			// its own section header. We can search for for the start by looking for the four
+			// byte magic and the go magic.
+			b, err := pclntab.Data()
+			if err != nil {
+				return nil, 0
+			}
+			// TODO(rolandshoemaker): I'm not sure if the 16 byte increment during the search is
+			// actually correct. During testing it worked, but that may be because I got lucky
+			// with the binary I was using, and we need to do four byte jumps to exhaustively
+			// search the section?
+			for i := 0; i < len(b); i += 16 {
+				if len(b)-i > 16 && b[i+4] == 0 && b[i+5] == 0 &&
+					(b[i+6] == 1 || b[i+6] == 2 || b[i+6] == 4) &&
+					(b[i+7] == 4 || b[i+7] == 8) {
+					// Also check for the go magic
+					leMagic := binary.LittleEndian.Uint32(b[i:])
+					beMagic := binary.BigEndian.Uint32(b[i:])
+					switch {
+					case leMagic == go12magic:
+						fallthrough
+					case beMagic == go12magic:
+						fallthrough
+					case leMagic == go116magic:
+						fallthrough
+					case beMagic == go116magic:
+						return b[i:], offset
+					}
+				}
+			}
+		}
+	}
+	b, err := pclntab.Data()
+	if err != nil {
+		return nil, 0
+	}
+	return b, offset
+}
+
+// peExe is the PE (Windows Portable Executable) implementation of the exe interface.
+type peExe struct {
+	r io.ReaderAt
+	f *pe.File
+}
+
+func (x *peExe) imageBase() uint64 {
+	switch oh := x.f.OptionalHeader.(type) {
+	case *pe.OptionalHeader32:
+		return uint64(oh.ImageBase)
+	case *pe.OptionalHeader64:
+		return oh.ImageBase
+	}
+	return 0
+}
+
+func (x *peExe) ReadData(addr, size uint64) ([]byte, error) {
+	addr -= x.imageBase()
+	for _, sect := range x.f.Sections {
+		if uint64(sect.VirtualAddress) <= addr && addr <= uint64(sect.VirtualAddress+sect.Size-1) {
+			n := uint64(sect.VirtualAddress+sect.Size) - addr
+			if n > size {
+				n = size
+			}
+			data := make([]byte, n)
+			_, err := sect.ReadAt(data, int64(addr-uint64(sect.VirtualAddress)))
+			if err != nil {
+				return nil, err
+			}
+			return data, nil
+		}
+	}
+	return nil, fmt.Errorf("address not mapped")
+}
+
+func (x *peExe) DataStart() uint64 {
+	// Assume data is first writable section.
+	const (
+		IMAGE_SCN_CNT_CODE               = 0x00000020
+		IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040
+		IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080
+		IMAGE_SCN_MEM_EXECUTE            = 0x20000000
+		IMAGE_SCN_MEM_READ               = 0x40000000
+		IMAGE_SCN_MEM_WRITE              = 0x80000000
+		IMAGE_SCN_MEM_DISCARDABLE        = 0x2000000
+		IMAGE_SCN_LNK_NRELOC_OVFL        = 0x1000000
+		IMAGE_SCN_ALIGN_32BYTES          = 0x600000
+	)
+	for _, sect := range x.f.Sections {
+		if sect.VirtualAddress != 0 && sect.Size != 0 &&
+			sect.Characteristics&^IMAGE_SCN_ALIGN_32BYTES == IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_MEM_READ|IMAGE_SCN_MEM_WRITE {
+			return uint64(sect.VirtualAddress) + x.imageBase()
+		}
+	}
+	return 0
+}
+
+func (x *peExe) PCLNTab() ([]byte, uint64) {
+	var textOffset uint64
+	for _, section := range x.f.Sections {
+		if section.Name == ".text" {
+			textOffset = uint64(section.Offset)
+			break
+		}
+	}
+	var start, end int64
+	var section int
+	for _, symbol := range x.f.Symbols {
+		if symbol.Name == "runtime.pclntab" {
+			start = int64(symbol.Value)
+			section = int(symbol.SectionNumber - 1)
+		} else if symbol.Name == "runtime.epclntab" {
+			end = int64(symbol.Value)
+			break
+		}
+	}
+	if start == 0 || end == 0 {
+		return nil, 0
+	}
+	offset := int64(x.f.Sections[section].Offset) + start
+	size := end - start
+
+	pclntab := make([]byte, size)
+	if _, err := x.r.ReadAt(pclntab, offset); err != nil {
+		return nil, 0
+	}
+
+	return pclntab, textOffset
+}
+
+// machoExe is the Mach-O (Apple macOS/iOS) implementation of the exe interface.
+type machoExe struct {
+	f *macho.File
+}
+
+func (x *machoExe) ReadData(addr, size uint64) ([]byte, error) {
+	for _, load := range x.f.Loads {
+		seg, ok := load.(*macho.Segment)
+		if !ok {
+			continue
+		}
+		if seg.Addr <= addr && addr <= seg.Addr+seg.Filesz-1 {
+			if seg.Name == "__PAGEZERO" {
+				continue
+			}
+			n := seg.Addr + seg.Filesz - addr
+			if n > size {
+				n = size
+			}
+			data := make([]byte, n)
+			_, err := seg.ReadAt(data, int64(addr-seg.Addr))
+			if err != nil {
+				return nil, err
+			}
+			return data, nil
+		}
+	}
+	return nil, fmt.Errorf("address not mapped")
+}
+
+func (x *machoExe) DataStart() uint64 {
+	// Look for section named "__go_buildinfo".
+	for _, sec := range x.f.Sections {
+		if sec.Name == "__go_buildinfo" {
+			return sec.Addr
+		}
+	}
+	// Try the first non-empty writable segment.
+	const RW = 3
+	for _, load := range x.f.Loads {
+		seg, ok := load.(*macho.Segment)
+		if ok && seg.Addr != 0 && seg.Filesz != 0 && seg.Prot == RW && seg.Maxprot == RW {
+			return seg.Addr
+		}
+	}
+	return 0
+}
+
+func (x *machoExe) PCLNTab() ([]byte, uint64) {
+	var textOffset uint64
+	text := x.f.Section("__text")
+	if text != nil {
+		textOffset = uint64(text.Offset)
+	}
+	pclntab := x.f.Section("__gopclntab")
+	if pclntab == nil {
+		return nil, 0
+	}
+	b, err := pclntab.Data()
+	if err != nil {
+		return nil, 0
+	}
+	return b, textOffset
+}
+
+// TODO(rolandshoemaker): we cannot support XCOFF files due to the usage of internal/xcoff.
+// Once this code is moved into the stdlib, this support can be re-enabled.
+
+// // xcoffExe is the XCOFF (AIX eXtended COFF) implementation of the exe interface.
+// type xcoffExe struct {
+// 	f  *xcoff.File
+// }
+//
+// func (x *xcoffExe) ReadData(addr, size uint64) ([]byte, error) {
+// 	for _, sect := range x.f.Sections {
+// 		if uint64(sect.VirtualAddress) <= addr && addr <= uint64(sect.VirtualAddress+sect.Size-1) {
+// 			n := uint64(sect.VirtualAddress+sect.Size) - addr
+// 			if n > size {
+// 				n = size
+// 			}
+// 			data := make([]byte, n)
+// 			_, err := sect.ReadAt(data, int64(addr-uint64(sect.VirtualAddress)))
+// 			if err != nil {
+// 				return nil, err
+// 			}
+// 			return data, nil
+// 		}
+// 	}
+// 	return nil, fmt.Errorf("address not mapped")
+// }
+//
+// func (x *xcoffExe) DataStart() uint64 {
+// 	return x.f.SectionByType(xcoff.STYP_DATA).VirtualAddress
+// }
diff --git a/vulncheck/internal/binscan/scan.go b/vulncheck/internal/binscan/scan.go
new file mode 100644
index 0000000..750079a
--- /dev/null
+++ b/vulncheck/internal/binscan/scan.go
@@ -0,0 +1,244 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package binscan contains methods for parsing Go binary files for the purpose
+// of extracting module dependency and symbol table information.
+package binscan
+
+// Code in this package is dervied from src/cmd/go/internal/version/version.go
+// and cmd/go/internal/version/exe.go.
+
+import (
+	"bytes"
+	"debug/gosym"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"io"
+	"net/url"
+	"runtime/debug"
+	"strings"
+
+	"golang.org/x/tools/go/packages"
+)
+
+// buildInfoMagic, findVers, and readString are copied from
+// cmd/go/internal/version
+
+// The build info blob left by the linker is identified by
+// a 16-byte header, consisting of buildInfoMagic (14 bytes),
+// the binary's pointer size (1 byte),
+// and whether the binary is big endian (1 byte).
+var buildInfoMagic = []byte("\xff Go buildinf:")
+
+// findVers finds and returns the Go version and module version information
+// in the executable x.
+func findVers(x exe) string {
+	// Read the first 64kB of text to find the build info blob.
+	text := x.DataStart()
+	data, err := x.ReadData(text, 64*1024)
+	if err != nil {
+		return ""
+	}
+	for ; !bytes.HasPrefix(data, buildInfoMagic); data = data[32:] {
+		if len(data) < 32 {
+			return ""
+		}
+	}
+
+	// Decode the blob.
+	ptrSize := int(data[14])
+	bigEndian := data[15] != 0
+	var bo binary.ByteOrder
+	if bigEndian {
+		bo = binary.BigEndian
+	} else {
+		bo = binary.LittleEndian
+	}
+	var readPtr func([]byte) uint64
+	if ptrSize == 4 {
+		readPtr = func(b []byte) uint64 { return uint64(bo.Uint32(b)) }
+	} else {
+		readPtr = bo.Uint64
+	}
+	vers := readString(x, ptrSize, readPtr, readPtr(data[16:]))
+	if vers == "" {
+		return ""
+	}
+	mod := readString(x, ptrSize, readPtr, readPtr(data[16+ptrSize:]))
+	if len(mod) >= 33 && mod[len(mod)-17] == '\n' {
+		// Strip module framing.
+		mod = mod[16 : len(mod)-16]
+	} else {
+		mod = ""
+	}
+	return mod
+}
+
+// readString returns the string at address addr in the executable x.
+func readString(x exe, ptrSize int, readPtr func([]byte) uint64, addr uint64) string {
+	hdr, err := x.ReadData(addr, uint64(2*ptrSize))
+	if err != nil || len(hdr) < 2*ptrSize {
+		return ""
+	}
+	dataAddr := readPtr(hdr)
+	dataLen := readPtr(hdr[ptrSize:])
+	data, err := x.ReadData(dataAddr, dataLen)
+	if err != nil || uint64(len(data)) < dataLen {
+		return ""
+	}
+	return string(data)
+}
+
+// readBuildInfo is copied from runtime/debug
+func readBuildInfo(data string) (*debug.BuildInfo, bool) {
+	if len(data) == 0 {
+		return nil, false
+	}
+
+	const (
+		pathLine = "path\t"
+		modLine  = "mod\t"
+		depLine  = "dep\t"
+		repLine  = "=>\t"
+	)
+
+	readEntryFirstLine := func(elem []string) (debug.Module, bool) {
+		if len(elem) != 2 && len(elem) != 3 {
+			return debug.Module{}, false
+		}
+		sum := ""
+		if len(elem) == 3 {
+			sum = elem[2]
+		}
+		return debug.Module{
+			Path:    elem[0],
+			Version: elem[1],
+			Sum:     sum,
+		}, true
+	}
+
+	var (
+		info = &debug.BuildInfo{}
+		last *debug.Module
+		line string
+		ok   bool
+	)
+	// Reverse of cmd/go/internal/modload.PackageBuildInfo
+	for len(data) > 0 {
+		i := strings.IndexByte(data, '\n')
+		if i < 0 {
+			break
+		}
+		line, data = data[:i], data[i+1:]
+		switch {
+		case strings.HasPrefix(line, pathLine):
+			elem := line[len(pathLine):]
+			info.Path = elem
+		case strings.HasPrefix(line, modLine):
+			elem := strings.Split(line[len(modLine):], "\t")
+			last = &info.Main
+			*last, ok = readEntryFirstLine(elem)
+			if !ok {
+				return nil, false
+			}
+		case strings.HasPrefix(line, depLine):
+			elem := strings.Split(line[len(depLine):], "\t")
+			last = new(debug.Module)
+			info.Deps = append(info.Deps, last)
+			*last, ok = readEntryFirstLine(elem)
+			if !ok {
+				return nil, false
+			}
+		case strings.HasPrefix(line, repLine):
+			elem := strings.Split(line[len(repLine):], "\t")
+			if len(elem) != 3 {
+				return nil, false
+			}
+			if last == nil {
+				return nil, false
+			}
+			last.Replace = &debug.Module{
+				Path:    elem[0],
+				Version: elem[1],
+				Sum:     elem[2],
+			}
+			last = nil
+		}
+	}
+	return info, true
+}
+
+func debugModulesToPackagesModules(debugModules []*debug.Module) []*packages.Module {
+	packagesModules := make([]*packages.Module, len(debugModules))
+	for i, mod := range debugModules {
+		packagesModules[i] = &packages.Module{
+			Path:    mod.Path,
+			Version: mod.Version,
+		}
+		if mod.Replace != nil {
+			packagesModules[i].Replace = &packages.Module{
+				Path:    mod.Replace.Path,
+				Version: mod.Replace.Version,
+			}
+		}
+	}
+	return packagesModules
+}
+
+// ExtractPackagesAndSymbols extracts the symbols, packages, and their associated module versions
+// from a Go binary. Stripped binaries are not supported.
+func ExtractPackagesAndSymbols(bin io.ReaderAt) ([]*packages.Module, map[string][]string, error) {
+	x, err := openExe(bin)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	pclntab, textOffset := x.PCLNTab()
+	if pclntab == nil {
+		// TODO(roland): if we have build information, but not PCLN table, we should be able to
+		// fall back to much higher granularity vulnerability checking.
+		return nil, nil, errors.New("unable to load the PCLN table")
+	}
+	lineTab := gosym.NewLineTable(pclntab, textOffset)
+	if lineTab == nil {
+		return nil, nil, errors.New("invalid line table")
+	}
+	tab, err := gosym.NewTable(nil, lineTab)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	packageSymbols := map[string][]string{}
+	for _, f := range tab.Funcs {
+		if f.Func == nil {
+			continue
+		}
+		symName := f.Func.BaseName()
+		if r := f.Func.ReceiverName(); r != "" {
+			if strings.HasPrefix(r, "(*") {
+				r = strings.Trim(r, "(*)")
+			}
+			symName = fmt.Sprintf("%s.%s", r, symName)
+		}
+
+		pkgName := f.Func.PackageName()
+		if pkgName == "" {
+			continue
+		}
+		pkgName, err := url.PathUnescape(pkgName)
+		if err != nil {
+			return nil, nil, err
+		}
+
+		packageSymbols[pkgName] = append(packageSymbols[pkgName], symName)
+	}
+
+	bi, ok := readBuildInfo(findVers(x))
+	if !ok {
+		return nil, nil, err
+	}
+
+	return debugModulesToPackagesModules(bi.Deps), packageSymbols, nil
+}
diff --git a/vulncheck/internal/binscan/scan_test.go b/vulncheck/internal/binscan/scan_test.go
new file mode 100644
index 0000000..b87554c
--- /dev/null
+++ b/vulncheck/internal/binscan/scan_test.go
@@ -0,0 +1,7 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binscan
+
+// TODO(zpavlinovic): add tests.