vulncheck: add vulnerability detection for binaries
The binscan package has been mostly copied from internal/audit. The only
change is to use io.ReaderAt as a input binary instead of a file path.
Cherry-picked: https://go-review.googlesource.com/c/exp/+/363013
Change-Id: If0fdfef87a57463cd7007a6b8fd2c97c427df752
Reviewed-on: https://go-review.googlesource.com/c/vuln/+/395042
Trust: Julie Qiu <julie@golang.org>
Run-TryBot: Julie Qiu <julie@golang.org>
Reviewed-by: Jonathan Amsterdam <jba@google.com>
diff --git a/vulncheck/binary.go b/vulncheck/binary.go
new file mode 100644
index 0000000..65ea431
--- /dev/null
+++ b/vulncheck/binary.go
@@ -0,0 +1,47 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vulncheck
+
+import (
+ "io"
+
+ "golang.org/x/vuln/vulncheck/internal/binscan"
+)
+
+// Binary detects presence of vulnerable symbols in exe. The
+// imports, require, and call graph are all unavailable (nil).
+func Binary(exe io.ReaderAt, cfg *Config) (*Result, error) {
+ modules, packageSymbols, err := binscan.ExtractPackagesAndSymbols(exe)
+ if err != nil {
+ return nil, err
+ }
+ modVulns, err := fetchVulnerabilities(cfg.Client, modules)
+ if err != nil {
+ return nil, err
+ }
+
+ result := &Result{}
+ for pkg, symbols := range packageSymbols {
+ for _, symbol := range symbols {
+ for _, osv := range modVulns.VulnsForSymbol(pkg, symbol) {
+ for _, affected := range osv.Affected {
+ if affected.Package.Name != pkg {
+ continue
+ }
+ for _, symbol := range affected.EcosystemSpecific.Symbols {
+ vuln := &Vuln{
+ OSV: osv,
+ Symbol: symbol,
+ PkgPath: pkg,
+ // TODO(zpavlinovic): infer mod path from PkgPath and modules?
+ }
+ result.Vulns = append(result.Vulns, vuln)
+ }
+ }
+ }
+ }
+ }
+ return result, nil
+}
diff --git a/vulncheck/binary_test.go b/vulncheck/binary_test.go
new file mode 100644
index 0000000..ffcce1d
--- /dev/null
+++ b/vulncheck/binary_test.go
@@ -0,0 +1,7 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package vulncheck
+
+// TODO(zpavlinovic): add tests.
diff --git a/vulncheck/internal/binscan/exe.go b/vulncheck/internal/binscan/exe.go
new file mode 100644
index 0000000..37399e1
--- /dev/null
+++ b/vulncheck/internal/binscan/exe.go
@@ -0,0 +1,349 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binscan
+
+// This file is a somewhat modified version of cmd/go/internal/version/exe.go
+// that adds functionality for extracting the PCLN table.
+
+import (
+ "bytes"
+ "debug/elf"
+ "debug/macho"
+ "debug/pe"
+ "encoding/binary"
+ "fmt"
+
+ // "internal/xcoff"
+ "io"
+)
+
+// An exe is a generic interface to an OS executable (ELF, Mach-O, PE, XCOFF).
+type exe interface {
+ // ReadData reads and returns up to size byte starting at virtual address addr.
+ ReadData(addr, size uint64) ([]byte, error)
+
+ // DataStart returns the writable data segment start address.
+ DataStart() uint64
+
+ PCLNTab() ([]byte, uint64)
+}
+
+// openExe returns reader r as an exe.
+func openExe(r io.ReaderAt) (exe, error) {
+ data := make([]byte, 16)
+ if _, err := r.ReadAt(data, 0); err != nil {
+ return nil, err
+ }
+ if bytes.HasPrefix(data, []byte("\x7FELF")) {
+ e, err := elf.NewFile(r)
+ if err != nil {
+ return nil, err
+ }
+ return &elfExe{e}, nil
+ }
+ if bytes.HasPrefix(data, []byte("MZ")) {
+ e, err := pe.NewFile(r)
+ if err != nil {
+ return nil, err
+ }
+ return &peExe{r, e}, nil
+ }
+ if bytes.HasPrefix(data, []byte("\xFE\xED\xFA")) || bytes.HasPrefix(data[1:], []byte("\xFA\xED\xFE")) {
+ e, err := macho.NewFile(r)
+ if err != nil {
+ return nil, err
+ }
+ return &machoExe{e}, nil
+ }
+ // TODO(rolandshoemaker): we cannot support XCOFF files due to the usage of internal/xcoff.
+ // Once this code is moved into the stdlib, this support can be re-enabled.
+ // if bytes.HasPrefix(data, []byte{0x01, 0xDF}) || bytes.HasPrefix(data, []byte{0x01, 0xF7}) {
+ // e, err := xcoff.NewFile(r)
+ // if err != nil {
+ // return nil, err
+ // }
+ // return &xcoffExe{e}, nil
+
+ // }
+ return nil, fmt.Errorf("unrecognized executable format")
+}
+
+// elfExe is the ELF implementation of the exe interface.
+type elfExe struct {
+ f *elf.File
+}
+
+func (x *elfExe) ReadData(addr, size uint64) ([]byte, error) {
+ for _, prog := range x.f.Progs {
+ if prog.Vaddr <= addr && addr <= prog.Vaddr+prog.Filesz-1 {
+ n := prog.Vaddr + prog.Filesz - addr
+ if n > size {
+ n = size
+ }
+ data := make([]byte, n)
+ _, err := prog.ReadAt(data, int64(addr-prog.Vaddr))
+ if err != nil {
+ return nil, err
+ }
+ return data, nil
+ }
+ }
+ return nil, fmt.Errorf("address not mapped")
+}
+
+func (x *elfExe) DataStart() uint64 {
+ for _, s := range x.f.Sections {
+ if s.Name == ".go.buildinfo" {
+ return s.Addr
+ }
+ }
+ for _, p := range x.f.Progs {
+ if p.Type == elf.PT_LOAD && p.Flags&(elf.PF_X|elf.PF_W) == elf.PF_W {
+ return p.Vaddr
+ }
+ }
+ return 0
+}
+
+const go12magic = 0xfffffffb
+const go116magic = 0xfffffffa
+
+func (x *elfExe) PCLNTab() ([]byte, uint64) {
+ var offset uint64
+ text := x.f.Section(".text")
+ if text != nil {
+ offset = text.Offset
+ }
+ pclntab := x.f.Section(".gopclntab")
+ if pclntab == nil {
+ pclntab = x.f.Section(".data.rel.ro.gopclntab")
+ if pclntab == nil {
+ pclntab = x.f.Section(".data.rel.ro")
+ if pclntab == nil {
+ return nil, 0
+ }
+ // Possibly the PCLN table has been stuck in the .data.rel.ro section, but without
+ // its own section header. We can search for for the start by looking for the four
+ // byte magic and the go magic.
+ b, err := pclntab.Data()
+ if err != nil {
+ return nil, 0
+ }
+ // TODO(rolandshoemaker): I'm not sure if the 16 byte increment during the search is
+ // actually correct. During testing it worked, but that may be because I got lucky
+ // with the binary I was using, and we need to do four byte jumps to exhaustively
+ // search the section?
+ for i := 0; i < len(b); i += 16 {
+ if len(b)-i > 16 && b[i+4] == 0 && b[i+5] == 0 &&
+ (b[i+6] == 1 || b[i+6] == 2 || b[i+6] == 4) &&
+ (b[i+7] == 4 || b[i+7] == 8) {
+ // Also check for the go magic
+ leMagic := binary.LittleEndian.Uint32(b[i:])
+ beMagic := binary.BigEndian.Uint32(b[i:])
+ switch {
+ case leMagic == go12magic:
+ fallthrough
+ case beMagic == go12magic:
+ fallthrough
+ case leMagic == go116magic:
+ fallthrough
+ case beMagic == go116magic:
+ return b[i:], offset
+ }
+ }
+ }
+ }
+ }
+ b, err := pclntab.Data()
+ if err != nil {
+ return nil, 0
+ }
+ return b, offset
+}
+
+// peExe is the PE (Windows Portable Executable) implementation of the exe interface.
+type peExe struct {
+ r io.ReaderAt
+ f *pe.File
+}
+
+func (x *peExe) imageBase() uint64 {
+ switch oh := x.f.OptionalHeader.(type) {
+ case *pe.OptionalHeader32:
+ return uint64(oh.ImageBase)
+ case *pe.OptionalHeader64:
+ return oh.ImageBase
+ }
+ return 0
+}
+
+func (x *peExe) ReadData(addr, size uint64) ([]byte, error) {
+ addr -= x.imageBase()
+ for _, sect := range x.f.Sections {
+ if uint64(sect.VirtualAddress) <= addr && addr <= uint64(sect.VirtualAddress+sect.Size-1) {
+ n := uint64(sect.VirtualAddress+sect.Size) - addr
+ if n > size {
+ n = size
+ }
+ data := make([]byte, n)
+ _, err := sect.ReadAt(data, int64(addr-uint64(sect.VirtualAddress)))
+ if err != nil {
+ return nil, err
+ }
+ return data, nil
+ }
+ }
+ return nil, fmt.Errorf("address not mapped")
+}
+
+func (x *peExe) DataStart() uint64 {
+ // Assume data is first writable section.
+ const (
+ IMAGE_SCN_CNT_CODE = 0x00000020
+ IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040
+ IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080
+ IMAGE_SCN_MEM_EXECUTE = 0x20000000
+ IMAGE_SCN_MEM_READ = 0x40000000
+ IMAGE_SCN_MEM_WRITE = 0x80000000
+ IMAGE_SCN_MEM_DISCARDABLE = 0x2000000
+ IMAGE_SCN_LNK_NRELOC_OVFL = 0x1000000
+ IMAGE_SCN_ALIGN_32BYTES = 0x600000
+ )
+ for _, sect := range x.f.Sections {
+ if sect.VirtualAddress != 0 && sect.Size != 0 &&
+ sect.Characteristics&^IMAGE_SCN_ALIGN_32BYTES == IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_MEM_READ|IMAGE_SCN_MEM_WRITE {
+ return uint64(sect.VirtualAddress) + x.imageBase()
+ }
+ }
+ return 0
+}
+
+func (x *peExe) PCLNTab() ([]byte, uint64) {
+ var textOffset uint64
+ for _, section := range x.f.Sections {
+ if section.Name == ".text" {
+ textOffset = uint64(section.Offset)
+ break
+ }
+ }
+ var start, end int64
+ var section int
+ for _, symbol := range x.f.Symbols {
+ if symbol.Name == "runtime.pclntab" {
+ start = int64(symbol.Value)
+ section = int(symbol.SectionNumber - 1)
+ } else if symbol.Name == "runtime.epclntab" {
+ end = int64(symbol.Value)
+ break
+ }
+ }
+ if start == 0 || end == 0 {
+ return nil, 0
+ }
+ offset := int64(x.f.Sections[section].Offset) + start
+ size := end - start
+
+ pclntab := make([]byte, size)
+ if _, err := x.r.ReadAt(pclntab, offset); err != nil {
+ return nil, 0
+ }
+
+ return pclntab, textOffset
+}
+
+// machoExe is the Mach-O (Apple macOS/iOS) implementation of the exe interface.
+type machoExe struct {
+ f *macho.File
+}
+
+func (x *machoExe) ReadData(addr, size uint64) ([]byte, error) {
+ for _, load := range x.f.Loads {
+ seg, ok := load.(*macho.Segment)
+ if !ok {
+ continue
+ }
+ if seg.Addr <= addr && addr <= seg.Addr+seg.Filesz-1 {
+ if seg.Name == "__PAGEZERO" {
+ continue
+ }
+ n := seg.Addr + seg.Filesz - addr
+ if n > size {
+ n = size
+ }
+ data := make([]byte, n)
+ _, err := seg.ReadAt(data, int64(addr-seg.Addr))
+ if err != nil {
+ return nil, err
+ }
+ return data, nil
+ }
+ }
+ return nil, fmt.Errorf("address not mapped")
+}
+
+func (x *machoExe) DataStart() uint64 {
+ // Look for section named "__go_buildinfo".
+ for _, sec := range x.f.Sections {
+ if sec.Name == "__go_buildinfo" {
+ return sec.Addr
+ }
+ }
+ // Try the first non-empty writable segment.
+ const RW = 3
+ for _, load := range x.f.Loads {
+ seg, ok := load.(*macho.Segment)
+ if ok && seg.Addr != 0 && seg.Filesz != 0 && seg.Prot == RW && seg.Maxprot == RW {
+ return seg.Addr
+ }
+ }
+ return 0
+}
+
+func (x *machoExe) PCLNTab() ([]byte, uint64) {
+ var textOffset uint64
+ text := x.f.Section("__text")
+ if text != nil {
+ textOffset = uint64(text.Offset)
+ }
+ pclntab := x.f.Section("__gopclntab")
+ if pclntab == nil {
+ return nil, 0
+ }
+ b, err := pclntab.Data()
+ if err != nil {
+ return nil, 0
+ }
+ return b, textOffset
+}
+
+// TODO(rolandshoemaker): we cannot support XCOFF files due to the usage of internal/xcoff.
+// Once this code is moved into the stdlib, this support can be re-enabled.
+
+// // xcoffExe is the XCOFF (AIX eXtended COFF) implementation of the exe interface.
+// type xcoffExe struct {
+// f *xcoff.File
+// }
+//
+// func (x *xcoffExe) ReadData(addr, size uint64) ([]byte, error) {
+// for _, sect := range x.f.Sections {
+// if uint64(sect.VirtualAddress) <= addr && addr <= uint64(sect.VirtualAddress+sect.Size-1) {
+// n := uint64(sect.VirtualAddress+sect.Size) - addr
+// if n > size {
+// n = size
+// }
+// data := make([]byte, n)
+// _, err := sect.ReadAt(data, int64(addr-uint64(sect.VirtualAddress)))
+// if err != nil {
+// return nil, err
+// }
+// return data, nil
+// }
+// }
+// return nil, fmt.Errorf("address not mapped")
+// }
+//
+// func (x *xcoffExe) DataStart() uint64 {
+// return x.f.SectionByType(xcoff.STYP_DATA).VirtualAddress
+// }
diff --git a/vulncheck/internal/binscan/scan.go b/vulncheck/internal/binscan/scan.go
new file mode 100644
index 0000000..750079a
--- /dev/null
+++ b/vulncheck/internal/binscan/scan.go
@@ -0,0 +1,244 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package binscan contains methods for parsing Go binary files for the purpose
+// of extracting module dependency and symbol table information.
+package binscan
+
+// Code in this package is dervied from src/cmd/go/internal/version/version.go
+// and cmd/go/internal/version/exe.go.
+
+import (
+ "bytes"
+ "debug/gosym"
+ "encoding/binary"
+ "errors"
+ "fmt"
+ "io"
+ "net/url"
+ "runtime/debug"
+ "strings"
+
+ "golang.org/x/tools/go/packages"
+)
+
+// buildInfoMagic, findVers, and readString are copied from
+// cmd/go/internal/version
+
+// The build info blob left by the linker is identified by
+// a 16-byte header, consisting of buildInfoMagic (14 bytes),
+// the binary's pointer size (1 byte),
+// and whether the binary is big endian (1 byte).
+var buildInfoMagic = []byte("\xff Go buildinf:")
+
+// findVers finds and returns the Go version and module version information
+// in the executable x.
+func findVers(x exe) string {
+ // Read the first 64kB of text to find the build info blob.
+ text := x.DataStart()
+ data, err := x.ReadData(text, 64*1024)
+ if err != nil {
+ return ""
+ }
+ for ; !bytes.HasPrefix(data, buildInfoMagic); data = data[32:] {
+ if len(data) < 32 {
+ return ""
+ }
+ }
+
+ // Decode the blob.
+ ptrSize := int(data[14])
+ bigEndian := data[15] != 0
+ var bo binary.ByteOrder
+ if bigEndian {
+ bo = binary.BigEndian
+ } else {
+ bo = binary.LittleEndian
+ }
+ var readPtr func([]byte) uint64
+ if ptrSize == 4 {
+ readPtr = func(b []byte) uint64 { return uint64(bo.Uint32(b)) }
+ } else {
+ readPtr = bo.Uint64
+ }
+ vers := readString(x, ptrSize, readPtr, readPtr(data[16:]))
+ if vers == "" {
+ return ""
+ }
+ mod := readString(x, ptrSize, readPtr, readPtr(data[16+ptrSize:]))
+ if len(mod) >= 33 && mod[len(mod)-17] == '\n' {
+ // Strip module framing.
+ mod = mod[16 : len(mod)-16]
+ } else {
+ mod = ""
+ }
+ return mod
+}
+
+// readString returns the string at address addr in the executable x.
+func readString(x exe, ptrSize int, readPtr func([]byte) uint64, addr uint64) string {
+ hdr, err := x.ReadData(addr, uint64(2*ptrSize))
+ if err != nil || len(hdr) < 2*ptrSize {
+ return ""
+ }
+ dataAddr := readPtr(hdr)
+ dataLen := readPtr(hdr[ptrSize:])
+ data, err := x.ReadData(dataAddr, dataLen)
+ if err != nil || uint64(len(data)) < dataLen {
+ return ""
+ }
+ return string(data)
+}
+
+// readBuildInfo is copied from runtime/debug
+func readBuildInfo(data string) (*debug.BuildInfo, bool) {
+ if len(data) == 0 {
+ return nil, false
+ }
+
+ const (
+ pathLine = "path\t"
+ modLine = "mod\t"
+ depLine = "dep\t"
+ repLine = "=>\t"
+ )
+
+ readEntryFirstLine := func(elem []string) (debug.Module, bool) {
+ if len(elem) != 2 && len(elem) != 3 {
+ return debug.Module{}, false
+ }
+ sum := ""
+ if len(elem) == 3 {
+ sum = elem[2]
+ }
+ return debug.Module{
+ Path: elem[0],
+ Version: elem[1],
+ Sum: sum,
+ }, true
+ }
+
+ var (
+ info = &debug.BuildInfo{}
+ last *debug.Module
+ line string
+ ok bool
+ )
+ // Reverse of cmd/go/internal/modload.PackageBuildInfo
+ for len(data) > 0 {
+ i := strings.IndexByte(data, '\n')
+ if i < 0 {
+ break
+ }
+ line, data = data[:i], data[i+1:]
+ switch {
+ case strings.HasPrefix(line, pathLine):
+ elem := line[len(pathLine):]
+ info.Path = elem
+ case strings.HasPrefix(line, modLine):
+ elem := strings.Split(line[len(modLine):], "\t")
+ last = &info.Main
+ *last, ok = readEntryFirstLine(elem)
+ if !ok {
+ return nil, false
+ }
+ case strings.HasPrefix(line, depLine):
+ elem := strings.Split(line[len(depLine):], "\t")
+ last = new(debug.Module)
+ info.Deps = append(info.Deps, last)
+ *last, ok = readEntryFirstLine(elem)
+ if !ok {
+ return nil, false
+ }
+ case strings.HasPrefix(line, repLine):
+ elem := strings.Split(line[len(repLine):], "\t")
+ if len(elem) != 3 {
+ return nil, false
+ }
+ if last == nil {
+ return nil, false
+ }
+ last.Replace = &debug.Module{
+ Path: elem[0],
+ Version: elem[1],
+ Sum: elem[2],
+ }
+ last = nil
+ }
+ }
+ return info, true
+}
+
+func debugModulesToPackagesModules(debugModules []*debug.Module) []*packages.Module {
+ packagesModules := make([]*packages.Module, len(debugModules))
+ for i, mod := range debugModules {
+ packagesModules[i] = &packages.Module{
+ Path: mod.Path,
+ Version: mod.Version,
+ }
+ if mod.Replace != nil {
+ packagesModules[i].Replace = &packages.Module{
+ Path: mod.Replace.Path,
+ Version: mod.Replace.Version,
+ }
+ }
+ }
+ return packagesModules
+}
+
+// ExtractPackagesAndSymbols extracts the symbols, packages, and their associated module versions
+// from a Go binary. Stripped binaries are not supported.
+func ExtractPackagesAndSymbols(bin io.ReaderAt) ([]*packages.Module, map[string][]string, error) {
+ x, err := openExe(bin)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ pclntab, textOffset := x.PCLNTab()
+ if pclntab == nil {
+ // TODO(roland): if we have build information, but not PCLN table, we should be able to
+ // fall back to much higher granularity vulnerability checking.
+ return nil, nil, errors.New("unable to load the PCLN table")
+ }
+ lineTab := gosym.NewLineTable(pclntab, textOffset)
+ if lineTab == nil {
+ return nil, nil, errors.New("invalid line table")
+ }
+ tab, err := gosym.NewTable(nil, lineTab)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ packageSymbols := map[string][]string{}
+ for _, f := range tab.Funcs {
+ if f.Func == nil {
+ continue
+ }
+ symName := f.Func.BaseName()
+ if r := f.Func.ReceiverName(); r != "" {
+ if strings.HasPrefix(r, "(*") {
+ r = strings.Trim(r, "(*)")
+ }
+ symName = fmt.Sprintf("%s.%s", r, symName)
+ }
+
+ pkgName := f.Func.PackageName()
+ if pkgName == "" {
+ continue
+ }
+ pkgName, err := url.PathUnescape(pkgName)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ packageSymbols[pkgName] = append(packageSymbols[pkgName], symName)
+ }
+
+ bi, ok := readBuildInfo(findVers(x))
+ if !ok {
+ return nil, nil, err
+ }
+
+ return debugModulesToPackagesModules(bi.Deps), packageSymbols, nil
+}
diff --git a/vulncheck/internal/binscan/scan_test.go b/vulncheck/internal/binscan/scan_test.go
new file mode 100644
index 0000000..b87554c
--- /dev/null
+++ b/vulncheck/internal/binscan/scan_test.go
@@ -0,0 +1,7 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package binscan
+
+// TODO(zpavlinovic): add tests.