blob: ad19ad083581da1daaa0f8ce39edeb7239640b0c [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// The core library is used to process ELF core dump files. You can
// open a core dump file and read from addresses in the process that
// dumped core, called the "inferior". Some ancillary information
// about the inferior is also provided, like architecture and OS
// thread state.
//
// There's nothing Go-specific about this library, it could
// just as easily be used to read a C++ core dump. See ../gocore
// for the next layer up, a Go-specific core dump reader.
//
// The Read* operations all panic with an error (the builtin Go type)
// if the inferior is not readable at the address requested.
package core
import (
"bytes"
"debug/dwarf"
"debug/elf" // TODO: use golang.org/x/debug/elf instead?
"encoding/binary"
"fmt"
"os"
"path/filepath"
"sort"
"strings"
"syscall"
)
// TODO: add these to debug/elf?
const (
_NT_FILE elf.NType = 0x46494c45
_NT_AUXV elf.NType = 0x6 // auxv
)
// A Process represents the state of the process that core dumped.
type Process struct {
meta metadata // basic metadata about the core
entryPoint Address
staticBase uint64 // Offset at which the executable was loaded in memory. 0 when binary is not-PIE.
args string // first part of args retrieved from NT_PRPSINFO
threads []*Thread // os threads (TODO: map from pid?)
memory splicedMemory // virtual address mappings
pageTable pageTable4 // for fast address->mapping lookups
syms map[string]Address // symbols (could be empty if executable is stripped)
symErr error // an error encountered while reading symbols
dwarf *dwarf.Data // debugging info (could be nil)
dwarfErr error // an error encountered while reading DWARF
dwarfLoc []byte // .debug_loc section
dwarfLocLists []byte // .debug_loclists section
dwarfAddr []byte // .debug_addr section
warnings []string // warnings generated during loading
}
type metadata struct {
arch string // amd64, ...
ptrSize int64 // 4 or 8
logPtrSize uint // 2 or 3
byteOrder binary.ByteOrder //
littleEndian bool // redundant with byteOrder
}
func newMetadata(coreElf *elf.File) (metadata, error) {
if coreElf.Type != elf.ET_CORE {
return metadata{}, fmt.Errorf("not a core file")
}
var meta metadata
switch coreElf.Class {
case elf.ELFCLASS32:
meta.ptrSize = 4
meta.logPtrSize = 2
case elf.ELFCLASS64:
meta.ptrSize = 8
meta.logPtrSize = 3
default:
return metadata{}, fmt.Errorf("unknown elf class %s", coreElf.Class)
}
switch coreElf.Machine {
case elf.EM_386:
meta.arch = "386"
case elf.EM_X86_64:
meta.arch = "amd64"
case elf.EM_ARM:
meta.arch = "arm"
case elf.EM_AARCH64:
meta.arch = "arm64"
case elf.EM_MIPS:
meta.arch = "mips"
case elf.EM_MIPS_RS3_LE:
meta.arch = "mipsle"
// TODO: value for mips64?
case elf.EM_PPC64:
if coreElf.ByteOrder.String() == "LittleEndian" {
meta.arch = "ppc64le"
} else {
meta.arch = "ppc64"
}
case elf.EM_S390:
meta.arch = "s390x"
default:
return metadata{}, fmt.Errorf("unknown arch %s\n", coreElf.Machine)
}
meta.byteOrder = coreElf.ByteOrder
// We also compute explicitly what byte order the inferior is.
// Just using p.byteOrder to decode fields makes any arguments passed to it
// escape to the heap. We use explicit binary.{Little,Big}Endian.UintXX
// calls when we want to avoid heap-allocating the buffer.
meta.littleEndian = meta.byteOrder.String() == "LittleEndian"
return meta, nil
}
// Mappings returns a list of virtual memory mappings for p.
func (p *Process) Mappings() []*Mapping {
return p.memory.mappings
}
// Readable reports whether the address a is readable.
func (p *Process) Readable(a Address) bool {
return p.pageTable.findMapping(a) != nil
}
// ReadableN reports whether the n bytes starting at address a are readable.
func (p *Process) ReadableN(a Address, n int64) bool {
for {
m := p.pageTable.findMapping(a)
if m == nil || m.perm&Read == 0 {
return false
}
c := m.max.Sub(a)
if n <= c {
return true
}
n -= c
a = a.Add(c)
}
}
// Writeable reports whether the address a was writeable (by the inferior at the time of the core dump).
func (p *Process) Writeable(a Address) bool {
m := p.pageTable.findMapping(a)
if m == nil {
return false
}
return m.perm&Write != 0
}
// Threads returns information about each OS thread in the inferior.
func (p *Process) Threads() []*Thread {
return p.threads
}
func (p *Process) Arch() string {
return p.meta.arch
}
// PtrSize returns the size in bytes of a pointer in the inferior.
func (p *Process) PtrSize() int64 {
return p.meta.ptrSize
}
func (p *Process) LogPtrSize() uint {
return p.meta.logPtrSize
}
func (p *Process) ByteOrder() binary.ByteOrder {
return p.meta.byteOrder
}
func (p *Process) DWARF() (*dwarf.Data, error) {
return p.dwarf, p.dwarfErr
}
func (p *Process) DWARFLoc() ([]byte, error) {
return p.dwarfLoc, p.dwarfErr
}
func (p *Process) DWARFLocLists() ([]byte, error) {
return p.dwarfLocLists, p.dwarfErr
}
func (p *Process) DWARFAddr() ([]byte, error) {
return p.dwarfAddr, p.dwarfErr
}
// Symbols returns a mapping from name to inferior address, along with
// any error encountered during reading the symbol information.
// (There may be both an error and some returned symbols.)
// Symbols might not be available with core files from stripped binaries.
func (p *Process) Symbols() (map[string]Address, error) {
return p.syms, p.symErr
}
// StaticBase returns the offset at which the main executable was loaded in
// memory. For example, it should be used when dereferencing DWARF locations.
func (p *Process) StaticBase() uint64 {
return p.staticBase
}
var mapFile = func(fd int, offset int64, length int) (data []byte, err error) {
return nil, fmt.Errorf("file mapping is not implemented yet")
}
// Core takes the path to a core file and returns a Process that
// represents the state of the inferior that generated the core file.
//
// base is the base directory from which files in the core can be found.
//
// exePath is the path of the main executable. If "", the path will be
// determined from the core itself.
func Core(corePath, base, exePath string) (*Process, error) {
coreFile, err := os.Open(corePath)
if err != nil {
return nil, fmt.Errorf("failed to open core file: %v", err)
}
defer coreFile.Close()
coreElf, err := elf.NewFile(coreFile)
if err != nil {
return nil, fmt.Errorf("failed to parse core: %v", err)
}
meta, err := newMetadata(coreElf)
if err != nil {
return nil, fmt.Errorf("error reading metadata: %v", err)
}
notes, err := readCoreNotes(coreFile, coreElf)
if err != nil {
return nil, err
}
entryPoint := readEntryPoint(meta, notes)
fileMappings := readFileMappings(meta, notes)
origExePath := findExe(fileMappings, entryPoint)
var exeFile *os.File
if exePath != "" {
var err error
exeFile, err = os.Open(exePath)
if err != nil {
return nil, fmt.Errorf("failed to open executable file: %v", err)
}
} else {
var err error
exeFile, err = os.Open(filepath.Join(base, origExePath))
if err != nil {
return nil, fmt.Errorf("failed to open executable file: %v", err)
}
}
defer exeFile.Close()
exeElf, err := elf.NewFile(exeFile)
if err != nil {
return nil, fmt.Errorf("failed to parse executable: %v", err)
}
staticBase := uint64(entryPoint) - exeElf.Entry // If not PIE, this is 0.
if exeElf.Entry > uint64(entryPoint) {
return nil, fmt.Errorf("malformed binary or core, core entry point (%d) - executable entry point (%d) is < 0", entryPoint, exeElf.Entry)
}
// The base memory layout is defined by the binary itself. Additional
// mappings from the core layer on top. This ordering is important to
// ensure that dirty data/bss pages from the core take priority over
// the initial state from the binary.
mem := readExecMappings(exeFile, exeElf, staticBase)
addCoreMappings(&mem, coreFile, coreElf)
// Add os.File references to mappings of files.
warnings := updateMappingFiles(&mem, fileMappings, base, exeFile, origExePath)
threads := readThreads(meta, notes)
args, err := readArgs(meta, notes)
if err != nil {
return nil, fmt.Errorf("error reading args: %v", err)
}
syms, symErr := readSymbols(staticBase, exeElf)
if symErr != nil {
symErr = fmt.Errorf("%v: from file %s", symErr, exeFile.Name())
}
dwarf, dwarfErr := exeElf.DWARF()
if dwarfErr != nil {
dwarfErr = fmt.Errorf("error reading DWARF info from %s: %v", exeFile.Name(), dwarfErr)
}
// Note that we expect to find .debug_loc for DWARF V4 binaries
// and .debug_loclists + .debug_addr for DWARF V5. If C code is
// mixed in, we may see both sections, since you're allowed to mix
// different DWARF versions by compilation unit.
var dwarfLoc, dwarfLocLists, dwarfAddr []byte
toRead := []struct {
name string
payload *[]byte
}{
{name: ".debug_loc", payload: &dwarfLoc},
{name: ".debug_loclists", payload: &dwarfLocLists},
{name: ".debug_addr", payload: &dwarfAddr},
}
for _, secitem := range toRead {
if section := exeElf.Section(secitem.name); section != nil {
payload, err := section.Data()
if err != nil && dwarfErr == nil {
dwarfErr = fmt.Errorf("error reading DWARF %s section from %s: %v", secitem.name, exeFile.Name(), err)
}
*(secitem.payload) = payload
}
}
// Sort then merge mappings, just to clean up a bit.
mappings := mem.mappings
sort.Slice(mappings, func(i, j int) bool {
return mappings[i].min < mappings[j].min
})
ms := mappings[1:]
mappings = mappings[:1]
for _, m := range ms {
k := mappings[len(mappings)-1]
if m.min == k.max &&
m.perm == k.perm &&
m.f == k.f &&
m.off == k.off+k.Size() {
k.max = m.max
// TODO: also check origF?
} else {
mappings = append(mappings, m)
}
}
mem.mappings = mappings
// Memory map all the mappings.
hostPageSize := int64(syscall.Getpagesize())
for _, m := range mem.mappings {
size := m.max.Sub(m.min)
if m.f == nil {
// We don't have any source for this data.
// Could be a mapped file that we couldn't find.
// Could be a mapping madvised as MADV_DONTDUMP.
// Pretend this is read-as-zero.
// The other option is to just throw away
// the mapping (and thus make Read*s of this
// mapping fail).
warnings = append(warnings,
fmt.Sprintf("Missing data at addresses [%x %x]. Assuming all zero.", m.min, m.max))
// TODO: this allocation could be large.
// Use mmap to avoid real backing store for all those zeros, or
// perhaps split the mapping up into chunks and share the zero contents among them.
m.contents = make([]byte, size)
continue
}
if m.perm&Write != 0 && m.f != coreFile {
warnings = append(warnings,
fmt.Sprintf("Writeable data at [%x %x] missing from core. Using possibly stale backup source %s.", m.min, m.max, m.f.Name()))
}
// Data in core file might not be aligned enough for the host.
// Expand memory range so we can map full pages.
minOff := m.off
maxOff := m.off + size
minOff -= minOff % hostPageSize
if maxOff%hostPageSize != 0 {
maxOff += hostPageSize - maxOff%hostPageSize
}
// Read data from file.
data, err := mapFile(int(m.f.Fd()), minOff, int(maxOff-minOff))
if err != nil {
return nil, fmt.Errorf("can't memory map %s at %x: %s\n", m.f.Name(), minOff, err)
}
// Trim any data we mapped but don't need.
data = data[m.off-minOff:]
data = data[:size]
m.contents = data
}
// Build page table for mapping lookup.
var pageTable pageTable4
for _, m := range mem.mappings {
err := pageTable.addMapping(m)
if err != nil {
return nil, err
}
}
p := &Process{
meta: meta,
entryPoint: entryPoint,
staticBase: staticBase,
args: args,
threads: threads,
memory: mem,
pageTable: pageTable,
syms: syms,
symErr: symErr,
dwarf: dwarf,
dwarfErr: dwarfErr,
dwarfLoc: dwarfLoc,
dwarfLocLists: dwarfLocLists,
dwarfAddr: dwarfAddr,
warnings: warnings,
}
return p, nil
}
// readExecMappings returns the memory mappings defined by the executable
// itself. staticBase should be the offset at which the executable was loaded in
// memory.
func readExecMappings(exeFile *os.File, exeElf *elf.File, staticBase uint64) splicedMemory {
// Load virtual memory mappings.
var mem splicedMemory
for _, prog := range exeElf.Progs {
if prog.Type == elf.PT_LOAD {
addProgMappings(&mem, prog, exeFile, staticBase)
}
}
return mem
}
// addCoreMappings adds memory mappings from the core file to mem.
func addCoreMappings(mem *splicedMemory, coreFile *os.File, coreElf *elf.File) {
for _, prog := range coreElf.Progs {
if prog.Type == elf.PT_LOAD {
addProgMappings(mem, prog, coreFile, 0)
}
}
}
// addProgMappings adds memory mappings for prog (from file f) to mem.
// staticBase is added to the p_vaddr [1].
//
// [1]: https://man7.org/linux/man-pages/man5/elf.5.html
func addProgMappings(mem *splicedMemory, prog *elf.Prog, f *os.File, staticBase uint64) {
min := Address(prog.Vaddr)
min = min.Add(int64(staticBase))
max := min.Add(int64(prog.Memsz))
var perm Perm
if prog.Flags&elf.PF_R != 0 {
perm |= Read
}
if prog.Flags&elf.PF_W != 0 {
perm |= Write
}
if prog.Flags&elf.PF_X != 0 {
perm |= Exec
}
if perm == 0 {
// TODO: keep these nothing-mapped mappings?
return
}
if prog.Filesz > 0 {
// Data backing this mapping is in the core file.
mem.Add(min, max, perm, f, int64(prog.Off))
} else {
mem.Add(min, max, perm, nil, 0)
}
if prog.Filesz < prog.Memsz {
// We only have partial data for this mapping in the core file.
// Trim the mapping and allocate an anonymous mapping for the remainder.
mem.Add(min.Add(int64(prog.Filesz)), max, perm, nil, 0)
}
}
// noteMap is a set of raw ELF note values.
//
// The value is a slice of byte-slice note descriptors, in the order they
// appear in the ELF.
type noteMap map[elf.NType][][]byte
// readCoreNotes returns contents of all CORE ELF notes from the core file.
func readCoreNotes(coreFile *os.File, coreElf *elf.File) (noteMap, error) {
notes := make(noteMap)
for _, prog := range coreElf.Progs {
if prog.Type != elf.PT_NOTE {
continue
}
b := make([]byte, prog.Filesz)
_, err := coreFile.ReadAt(b, int64(prog.Off))
if err != nil {
return nil, fmt.Errorf("error reading notes at offset %d: %v", prog.Off, err)
}
for len(b) > 0 {
namesz := coreElf.ByteOrder.Uint32(b)
b = b[4:]
descsz := coreElf.ByteOrder.Uint32(b)
b = b[4:]
typ := elf.NType(coreElf.ByteOrder.Uint32(b))
b = b[4:]
name := string(b[:namesz-1])
b = b[(namesz+3)/4*4:]
desc := b[:descsz]
b = b[(descsz+3)/4*4:]
if name != "CORE" {
continue
}
notes[typ] = append(notes[typ], desc)
}
}
return notes, nil
}
func readEntryPoint(meta metadata, notes noteMap) Address {
// amd64 only?
const _AT_ENTRY_AMD64 = 9
if len(notes[_NT_AUXV]) == 0 {
return 0
}
// We don't expect multiple NT_AUXV notes. Just use the first.
desc := notes[_NT_AUXV][0]
buf := bytes.NewBuffer(desc)
for {
var tag, val uint64
if err := binary.Read(buf, meta.byteOrder, &tag); err != nil {
panic(err)
}
if err := binary.Read(buf, meta.byteOrder, &val); err != nil {
panic(err)
}
if tag == _AT_ENTRY_AMD64 {
return Address(val)
}
}
return 0
}
func readFileMappings(meta metadata, notes noteMap) []namedMapping {
if len(notes[_NT_FILE]) == 0 {
return nil
}
// We don't expect multiple NT_FILE notes. Just use the first.
desc := notes[_NT_FILE][0]
// TODO: 4 instead of 8 for 32-bit machines?
count := meta.byteOrder.Uint64(desc)
desc = desc[8:]
pagesize := meta.byteOrder.Uint64(desc)
desc = desc[8:]
filenames := string(desc[3*8*count:])
desc = desc[:3*8*count]
var mappings []namedMapping
for i := uint64(0); i < count; i++ {
min := Address(meta.byteOrder.Uint64(desc))
desc = desc[8:]
max := Address(meta.byteOrder.Uint64(desc))
desc = desc[8:]
off := int64(meta.byteOrder.Uint64(desc) * pagesize)
desc = desc[8:]
var name string
j := strings.IndexByte(filenames, 0)
if j >= 0 {
name = filenames[:j]
filenames = filenames[j+1:]
} else {
name = filenames
filenames = ""
}
mappings = append(mappings, namedMapping{
min: min,
max: max,
f: name,
off: off,
})
}
return mappings
}
// findExe returns the filename of the mapped file containing entryPoint, if
// any.
func findExe(mappings []namedMapping, entryPoint Address) string {
for _, m := range mappings {
if m.min <= entryPoint && entryPoint < m.max {
return m.f
}
}
// TODO: add heuristic for "first executable mapping" if entry point
// isn't available? But why wouldn't the entry point be available?
return ""
}
// updateMappingFiles adds os.File references to mappings in mem of files in
// fileMappings.
//
// base is the base directory from which files in fileMappings can be found.
//
// exeFile is the reference to the executable, which is named origExePath in
// fileMappings.
func updateMappingFiles(mem *splicedMemory, fileMappings []namedMapping, base string, exeFile *os.File, origExePath string) []string {
type file struct {
f *os.File
err error
}
files := map[string]*file{
origExePath: &file{f: exeFile},
}
open := func(name string) (*os.File, error) {
if f, ok := files[name]; ok {
return f.f, f.err
}
f, err := os.Open(filepath.Join(base, name))
file := &file{f: f, err: err}
files[name] = file
return f, err
}
var warnings []string
for _, fm := range fileMappings {
// TODO: this is O(n^2). Shouldn't be a big problem in practice.
mem.splitMappingsAt(fm.min)
mem.splitMappingsAt(fm.max)
for _, m := range mem.mappings {
if m.max <= fm.min || m.min >= fm.max {
continue
}
// m should now be entirely in [min,max]
if !(m.min >= fm.min && m.max <= fm.max) {
panic("mapping overlapping end of file region")
}
f, err := open(fm.f)
if err != nil {
// Can't find mapped file.
// We don't want to make this a hard error because there are
// lots of possible missing files that probably aren't critical,
// like a random shared library.
warnings = append(warnings, fmt.Sprintf("Missing data for addresses [%x %x] because of failure to %s. Assuming all zero.", m.min, m.max, err))
}
if m.f == nil {
m.f = f
m.off = fm.off + m.min.Sub(fm.min)
} else {
// Data is both in the core file and in a mapped file.
// The mapped file may be stale (even if it is readonly now,
// it may have been writeable at some point).
// Keep the file+offset just for printing.
m.origF = f
m.origOff = fm.off + m.min.Sub(fm.min)
}
}
}
return warnings
}
func readArgs(meta metadata, notes noteMap) (string, error) {
if len(notes[elf.NT_PRPSINFO]) == 0 {
return "", nil
}
// We don't expect multiple NT_PRPSINFO notes. Just use the first.
desc := notes[elf.NT_PRPSINFO][0]
var args string
r := bytes.NewReader(desc)
switch meta.arch {
default:
// TODO: return error?
case "amd64":
prpsinfo := &linuxPrPsInfo{}
if err := binary.Read(r, binary.LittleEndian, prpsinfo); err != nil {
return "", fmt.Errorf("error decoding prpsinfo: %v", err)
}
args = strings.Trim(string(prpsinfo.Args[:]), "\x00 ")
}
return args, nil
}
func readThreads(meta metadata, notes noteMap) []*Thread {
var threads []*Thread
for _, desc := range notes[elf.NT_PRSTATUS] {
t := &Thread{}
threads = append(threads, t)
// Linux
// sys/procfs.h:
// struct elf_prstatus {
// ...
// pid_t pr_pid;
// ...
// elf_gregset_t pr_reg; /* GP registers */
// ...
// };
// typedef struct elf_prstatus prstatus_t;
// Register numberings are listed in sys/user.h.
// prstatus layout will probably be different for each arch/os combo.
switch meta.arch {
default:
// TODO: return error here?
case "amd64":
// 32 = offsetof(prstatus_t, pr_pid), 4 = sizeof(pid_t)
t.pid = uint64(meta.byteOrder.Uint32(desc[32 : 32+4]))
// 112 = offsetof(prstatus_t, pr_reg), 216 = sizeof(elf_gregset_t)
reg := desc[112 : 112+216]
i := 0
readReg := func(name string) uint64 {
value := meta.byteOrder.Uint64(reg[i:])
t.regs = append(t.regs, Register{Name: name, Value: value})
i += 8
return value
}
readReg("r15")
readReg("r14")
readReg("r13")
readReg("r12")
readReg("rbp")
readReg("rbx")
readReg("r11")
readReg("r10")
readReg("r9")
readReg("r8")
readReg("rax")
readReg("rcx")
readReg("rdx")
readReg("rsi")
readReg("rdi")
readReg("orig_rax")
t.pc = Address(readReg("rip"))
readReg("cs")
readReg("eflags")
t.sp = Address(readReg("rsp"))
readReg("ss")
readReg("fs_base")
readReg("gs_base")
readReg("ds")
readReg("es")
readReg("fs")
readReg("gs")
// TODO: NT_FPREGSET for floating-point registers.
//
// This will be a bit awkward with the notes map, as
// the NT_FPREGSET notes are implicitly associated with
// the thread described by the previous NT_PRSTATUS
// rather than directly denoting which thread they
// belong to.
}
}
return threads
}
// readSymbols loads all symbols from the SHT_SYMTAB section of the executable
// file.
//
// TODO(aktau): Should we read symbols from the files underlying all available
// executable mappings? This used to be done (see e.g.:
// https://go.dev/cl/137375) but currently viewcore supports PIE and mixed
// binaries without needing to read multiple files.
//
// NOTE: The core file itself does not contain a symbols section (SHT_SYMTAB),
// so we don't read from it.
func readSymbols(staticBase uint64, exeElf *elf.File) (map[string]Address, error) {
allSyms := make(map[string]Address)
syms, err := exeElf.Symbols()
if err != nil {
return allSyms, fmt.Errorf("can't read symbols from main executable: %v", err)
}
for _, s := range syms {
allSyms[s.Name] = Address(s.Value).Add(int64(staticBase))
}
return allSyms, nil
}
func (p *Process) Warnings() []string {
return p.warnings
}
// Args returns the initial part of the program arguments.
func (p *Process) Args() string {
return p.args
}
// ELF/Linux types
// linuxPrPsInfo is the info embedded in NT_PRPSINFO.
type linuxPrPsInfo struct {
State uint8
Sname int8
Zomb uint8
Nice int8
_ [4]uint8
Flag uint64
Uid, Gid uint32
Pid, Ppid, Pgrp, Sid int32
Fname [16]uint8 // filename of executables
Args [80]uint8 // first part of program args
}