blob: 14ad2452245571f7175d53e5001d31cf3a9af633 [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pe implements access to PE (Microsoft Windows Portable Executable) files.
package pe
import (
"bytes"
"compress/zlib"
"debug/dwarf"
"encoding/binary"
"fmt"
"io"
"os"
"strings"
)
// Avoid use of post-Go 1.4 io features, to make safe for toolchain bootstrap.
const seekStart = 0
// A File represents an open PE file.
type File struct {
FileHeader
OptionalHeader interface{} // of type *OptionalHeader32 or *OptionalHeader64
Sections []*Section
Symbols []*Symbol // COFF symbols with auxiliary symbol records removed
COFFSymbols []COFFSymbol // all COFF symbols (including auxiliary symbol records)
StringTable StringTable
closer io.Closer
}
// Open opens the named file using os.Open and prepares it for use as a PE binary.
func Open(name string) (*File, error) {
f, err := os.Open(name)
if err != nil {
return nil, err
}
ff, err := NewFile(f)
if err != nil {
f.Close()
return nil, err
}
ff.closer = f
return ff, nil
}
// Close closes the File.
// If the File was created using NewFile directly instead of Open,
// Close has no effect.
func (f *File) Close() error {
var err error
if f.closer != nil {
err = f.closer.Close()
f.closer = nil
}
return err
}
// TODO(brainman): add Load function, as a replacement for NewFile, that does not call removeAuxSymbols (for performance)
// NewFile creates a new File for accessing a PE binary in an underlying reader.
func NewFile(r io.ReaderAt) (*File, error) {
f := new(File)
sr := io.NewSectionReader(r, 0, 1<<63-1)
var dosheader [96]byte
if _, err := r.ReadAt(dosheader[0:], 0); err != nil {
return nil, err
}
var base int64
if dosheader[0] == 'M' && dosheader[1] == 'Z' {
signoff := int64(binary.LittleEndian.Uint32(dosheader[0x3c:]))
var sign [4]byte
r.ReadAt(sign[:], signoff)
if !(sign[0] == 'P' && sign[1] == 'E' && sign[2] == 0 && sign[3] == 0) {
return nil, fmt.Errorf("Invalid PE COFF file signature of %v.", sign)
}
base = signoff + 4
} else {
base = int64(0)
}
sr.Seek(base, seekStart)
if err := binary.Read(sr, binary.LittleEndian, &f.FileHeader); err != nil {
return nil, err
}
switch f.FileHeader.Machine {
case IMAGE_FILE_MACHINE_UNKNOWN, IMAGE_FILE_MACHINE_ARMNT, IMAGE_FILE_MACHINE_AMD64, IMAGE_FILE_MACHINE_I386:
default:
return nil, fmt.Errorf("Unrecognised COFF file header machine value of 0x%x.", f.FileHeader.Machine)
}
var err error
// Read string table.
f.StringTable, err = readStringTable(&f.FileHeader, sr)
if err != nil {
return nil, err
}
// Read symbol table.
f.COFFSymbols, err = readCOFFSymbols(&f.FileHeader, sr)
if err != nil {
return nil, err
}
f.Symbols, err = removeAuxSymbols(f.COFFSymbols, f.StringTable)
if err != nil {
return nil, err
}
// Seek past file header.
_, err = sr.Seek(base+int64(binary.Size(f.FileHeader)), seekStart)
if err != nil {
return nil, fmt.Errorf("failure to seek past the file header: %v", err)
}
// Read optional header.
f.OptionalHeader, err = readOptionalHeader(sr, f.FileHeader.SizeOfOptionalHeader)
if err != nil {
return nil, err
}
// Process sections.
f.Sections = make([]*Section, f.FileHeader.NumberOfSections)
for i := 0; i < int(f.FileHeader.NumberOfSections); i++ {
sh := new(SectionHeader32)
if err := binary.Read(sr, binary.LittleEndian, sh); err != nil {
return nil, err
}
name, err := sh.fullName(f.StringTable)
if err != nil {
return nil, err
}
s := new(Section)
s.SectionHeader = SectionHeader{
Name: name,
VirtualSize: sh.VirtualSize,
VirtualAddress: sh.VirtualAddress,
Size: sh.SizeOfRawData,
Offset: sh.PointerToRawData,
PointerToRelocations: sh.PointerToRelocations,
PointerToLineNumbers: sh.PointerToLineNumbers,
NumberOfRelocations: sh.NumberOfRelocations,
NumberOfLineNumbers: sh.NumberOfLineNumbers,
Characteristics: sh.Characteristics,
}
r2 := r
if sh.PointerToRawData == 0 { // .bss must have all 0s
r2 = zeroReaderAt{}
}
s.sr = io.NewSectionReader(r2, int64(s.SectionHeader.Offset), int64(s.SectionHeader.Size))
s.ReaderAt = s.sr
f.Sections[i] = s
}
for i := range f.Sections {
var err error
f.Sections[i].Relocs, err = readRelocs(&f.Sections[i].SectionHeader, sr)
if err != nil {
return nil, err
}
}
return f, nil
}
// zeroReaderAt is ReaderAt that reads 0s.
type zeroReaderAt struct{}
// ReadAt writes len(p) 0s into p.
func (w zeroReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
for i := range p {
p[i] = 0
}
return len(p), nil
}
// getString extracts a string from symbol string table.
func getString(section []byte, start int) (string, bool) {
if start < 0 || start >= len(section) {
return "", false
}
for end := start; end < len(section); end++ {
if section[end] == 0 {
return string(section[start:end]), true
}
}
return "", false
}
// Section returns the first section with the given name, or nil if no such
// section exists.
func (f *File) Section(name string) *Section {
for _, s := range f.Sections {
if s.Name == name {
return s
}
}
return nil
}
func (f *File) DWARF() (*dwarf.Data, error) {
dwarfSuffix := func(s *Section) string {
switch {
case strings.HasPrefix(s.Name, ".debug_"):
return s.Name[7:]
case strings.HasPrefix(s.Name, ".zdebug_"):
return s.Name[8:]
default:
return ""
}
}
// sectionData gets the data for s and checks its size.
sectionData := func(s *Section) ([]byte, error) {
b, err := s.Data()
if err != nil && uint32(len(b)) < s.Size {
return nil, err
}
if 0 < s.VirtualSize && s.VirtualSize < s.Size {
b = b[:s.VirtualSize]
}
if len(b) >= 12 && string(b[:4]) == "ZLIB" {
dlen := binary.BigEndian.Uint64(b[4:12])
dbuf := make([]byte, dlen)
r, err := zlib.NewReader(bytes.NewBuffer(b[12:]))
if err != nil {
return nil, err
}
if _, err := io.ReadFull(r, dbuf); err != nil {
return nil, err
}
if err := r.Close(); err != nil {
return nil, err
}
b = dbuf
}
return b, nil
}
// There are many other DWARF sections, but these
// are the ones the debug/dwarf package uses.
// Don't bother loading others.
var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil}
for _, s := range f.Sections {
suffix := dwarfSuffix(s)
if suffix == "" {
continue
}
if _, ok := dat[suffix]; !ok {
continue
}
b, err := sectionData(s)
if err != nil {
return nil, err
}
dat[suffix] = b
}
d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"])
if err != nil {
return nil, err
}
// Look for DWARF4 .debug_types sections.
for i, s := range f.Sections {
suffix := dwarfSuffix(s)
if suffix != "types" {
continue
}
b, err := sectionData(s)
if err != nil {
return nil, err
}
err = d.AddTypes(fmt.Sprintf("types-%d", i), b)
if err != nil {
return nil, err
}
}
return d, nil
}
// TODO(brainman): document ImportDirectory once we decide what to do with it.
type ImportDirectory struct {
OriginalFirstThunk uint32
TimeDateStamp uint32
ForwarderChain uint32
Name uint32
FirstThunk uint32
dll string
}
// ImportedSymbols returns the names of all symbols
// referred to by the binary f that are expected to be
// satisfied by other libraries at dynamic load time.
// It does not return weak symbols.
func (f *File) ImportedSymbols() ([]string, error) {
if f.OptionalHeader == nil {
return nil, nil
}
pe64 := f.Machine == IMAGE_FILE_MACHINE_AMD64
// grab the number of data directory entries
var dd_length uint32
if pe64 {
dd_length = f.OptionalHeader.(*OptionalHeader64).NumberOfRvaAndSizes
} else {
dd_length = f.OptionalHeader.(*OptionalHeader32).NumberOfRvaAndSizes
}
// check that the length of data directory entries is large
// enough to include the imports directory.
if dd_length < IMAGE_DIRECTORY_ENTRY_IMPORT+1 {
return nil, nil
}
// grab the import data directory entry
var idd DataDirectory
if pe64 {
idd = f.OptionalHeader.(*OptionalHeader64).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
} else {
idd = f.OptionalHeader.(*OptionalHeader32).DataDirectory[IMAGE_DIRECTORY_ENTRY_IMPORT]
}
// figure out which section contains the import directory table
var ds *Section
ds = nil
for _, s := range f.Sections {
if s.VirtualAddress <= idd.VirtualAddress && idd.VirtualAddress < s.VirtualAddress+s.VirtualSize {
ds = s
break
}
}
// didn't find a section, so no import libraries were found
if ds == nil {
return nil, nil
}
d, err := ds.Data()
if err != nil {
return nil, err
}
// seek to the virtual address specified in the import data directory
d = d[idd.VirtualAddress-ds.VirtualAddress:]
// start decoding the import directory
var ida []ImportDirectory
for len(d) >= 20 {
var dt ImportDirectory
dt.OriginalFirstThunk = binary.LittleEndian.Uint32(d[0:4])
dt.TimeDateStamp = binary.LittleEndian.Uint32(d[4:8])
dt.ForwarderChain = binary.LittleEndian.Uint32(d[8:12])
dt.Name = binary.LittleEndian.Uint32(d[12:16])
dt.FirstThunk = binary.LittleEndian.Uint32(d[16:20])
d = d[20:]
if dt.OriginalFirstThunk == 0 {
break
}
ida = append(ida, dt)
}
// TODO(brainman): this needs to be rewritten
// ds.Data() returns contents of section containing import table. Why store in variable called "names"?
// Why we are retrieving it second time? We already have it in "d", and it is not modified anywhere.
// getString does not extracts a string from symbol string table (as getString doco says).
// Why ds.Data() called again and again in the loop?
// Needs test before rewrite.
names, _ := ds.Data()
var all []string
for _, dt := range ida {
dt.dll, _ = getString(names, int(dt.Name-ds.VirtualAddress))
d, _ = ds.Data()
// seek to OriginalFirstThunk
d = d[dt.OriginalFirstThunk-ds.VirtualAddress:]
for len(d) > 0 {
if pe64 { // 64bit
va := binary.LittleEndian.Uint64(d[0:8])
d = d[8:]
if va == 0 {
break
}
if va&0x8000000000000000 > 0 { // is Ordinal
// TODO add dynimport ordinal support.
} else {
fn, _ := getString(names, int(uint32(va)-ds.VirtualAddress+2))
all = append(all, fn+":"+dt.dll)
}
} else { // 32bit
va := binary.LittleEndian.Uint32(d[0:4])
d = d[4:]
if va == 0 {
break
}
if va&0x80000000 > 0 { // is Ordinal
// TODO add dynimport ordinal support.
//ord := va&0x0000FFFF
} else {
fn, _ := getString(names, int(va-ds.VirtualAddress+2))
all = append(all, fn+":"+dt.dll)
}
}
}
}
return all, nil
}
// ImportedLibraries returns the names of all libraries
// referred to by the binary f that are expected to be
// linked with the binary at dynamic link time.
func (f *File) ImportedLibraries() ([]string, error) {
// TODO
// cgo -dynimport don't use this for windows PE, so just return.
return nil, nil
}
// FormatError is unused.
// The type is retained for compatibility.
type FormatError struct {
}
func (e *FormatError) Error() string {
return "unknown error"
}
// readOptionalHeader accepts a io.ReadSeeker pointing to optional header in the PE file
// and its size as seen in the file header.
// It parses the given size of bytes and returns optional header. It infers whether the
// bytes being parsed refer to 32 bit or 64 bit version of optional header.
func readOptionalHeader(r io.ReadSeeker, sz uint16) (interface{}, error) {
// If optional header size is 0, return empty optional header.
if sz == 0 {
return nil, nil
}
var (
// First couple of bytes in option header state its type.
// We need to read them first to determine the type and
// validity of optional header.
ohMagic uint16
ohMagicSz = binary.Size(ohMagic)
)
// If optional header size is greater than 0 but less than its magic size, return error.
if sz < uint16(ohMagicSz) {
return nil, fmt.Errorf("optional header size is less than optional header magic size")
}
// read reads from io.ReadSeeke, r, into data.
var err error
read := func(data interface{}) bool {
err = binary.Read(r, binary.LittleEndian, data)
return err == nil
}
if !read(&ohMagic) {
return nil, fmt.Errorf("failure to read optional header magic: %v", err)
}
switch ohMagic {
case 0x10b: // PE32
var (
oh32 OptionalHeader32
// There can be 0 or more data directories. So the minimum size of optional
// header is calculated by substracting oh32.DataDirectory size from oh32 size.
oh32MinSz = binary.Size(oh32) - binary.Size(oh32.DataDirectory)
)
if sz < uint16(oh32MinSz) {
return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) of PE32 optional header", sz, oh32MinSz)
}
// Init oh32 fields
oh32.Magic = ohMagic
if !read(&oh32.MajorLinkerVersion) ||
!read(&oh32.MinorLinkerVersion) ||
!read(&oh32.SizeOfCode) ||
!read(&oh32.SizeOfInitializedData) ||
!read(&oh32.SizeOfUninitializedData) ||
!read(&oh32.AddressOfEntryPoint) ||
!read(&oh32.BaseOfCode) ||
!read(&oh32.BaseOfData) ||
!read(&oh32.ImageBase) ||
!read(&oh32.SectionAlignment) ||
!read(&oh32.FileAlignment) ||
!read(&oh32.MajorOperatingSystemVersion) ||
!read(&oh32.MinorOperatingSystemVersion) ||
!read(&oh32.MajorImageVersion) ||
!read(&oh32.MinorImageVersion) ||
!read(&oh32.MajorSubsystemVersion) ||
!read(&oh32.MinorSubsystemVersion) ||
!read(&oh32.Win32VersionValue) ||
!read(&oh32.SizeOfImage) ||
!read(&oh32.SizeOfHeaders) ||
!read(&oh32.CheckSum) ||
!read(&oh32.Subsystem) ||
!read(&oh32.DllCharacteristics) ||
!read(&oh32.SizeOfStackReserve) ||
!read(&oh32.SizeOfStackCommit) ||
!read(&oh32.SizeOfHeapReserve) ||
!read(&oh32.SizeOfHeapCommit) ||
!read(&oh32.LoaderFlags) ||
!read(&oh32.NumberOfRvaAndSizes) {
return nil, fmt.Errorf("failure to read PE32 optional header: %v", err)
}
dd, err := readDataDirectories(r, sz-uint16(oh32MinSz), oh32.NumberOfRvaAndSizes)
if err != nil {
return nil, err
}
copy(oh32.DataDirectory[:], dd)
return &oh32, nil
case 0x20b: // PE32+
var (
oh64 OptionalHeader64
// There can be 0 or more data directories. So the minimum size of optional
// header is calculated by substracting oh64.DataDirectory size from oh64 size.
oh64MinSz = binary.Size(oh64) - binary.Size(oh64.DataDirectory)
)
if sz < uint16(oh64MinSz) {
return nil, fmt.Errorf("optional header size(%d) is less minimum size (%d) for PE32+ optional header", sz, oh64MinSz)
}
// Init oh64 fields
oh64.Magic = ohMagic
if !read(&oh64.MajorLinkerVersion) ||
!read(&oh64.MinorLinkerVersion) ||
!read(&oh64.SizeOfCode) ||
!read(&oh64.SizeOfInitializedData) ||
!read(&oh64.SizeOfUninitializedData) ||
!read(&oh64.AddressOfEntryPoint) ||
!read(&oh64.BaseOfCode) ||
!read(&oh64.ImageBase) ||
!read(&oh64.SectionAlignment) ||
!read(&oh64.FileAlignment) ||
!read(&oh64.MajorOperatingSystemVersion) ||
!read(&oh64.MinorOperatingSystemVersion) ||
!read(&oh64.MajorImageVersion) ||
!read(&oh64.MinorImageVersion) ||
!read(&oh64.MajorSubsystemVersion) ||
!read(&oh64.MinorSubsystemVersion) ||
!read(&oh64.Win32VersionValue) ||
!read(&oh64.SizeOfImage) ||
!read(&oh64.SizeOfHeaders) ||
!read(&oh64.CheckSum) ||
!read(&oh64.Subsystem) ||
!read(&oh64.DllCharacteristics) ||
!read(&oh64.SizeOfStackReserve) ||
!read(&oh64.SizeOfStackCommit) ||
!read(&oh64.SizeOfHeapReserve) ||
!read(&oh64.SizeOfHeapCommit) ||
!read(&oh64.LoaderFlags) ||
!read(&oh64.NumberOfRvaAndSizes) {
return nil, fmt.Errorf("failure to read PE32+ optional header: %v", err)
}
dd, err := readDataDirectories(r, sz-uint16(oh64MinSz), oh64.NumberOfRvaAndSizes)
if err != nil {
return nil, err
}
copy(oh64.DataDirectory[:], dd)
return &oh64, nil
default:
return nil, fmt.Errorf("optional header has unexpected Magic of 0x%x", ohMagic)
}
}
// readDataDirectories accepts a io.ReadSeeker pointing to data directories in the PE file,
// its size and number of data directories as seen in optional header.
// It parses the given size of bytes and returns given number of data directories.
func readDataDirectories(r io.ReadSeeker, sz uint16, n uint32) ([]DataDirectory, error) {
ddSz := binary.Size(DataDirectory{})
if uint32(sz) != n*uint32(ddSz) {
return nil, fmt.Errorf("size of data directories(%d) is inconsistent with number of data directories(%d)", sz, n)
}
dd := make([]DataDirectory, n)
if err := binary.Read(r, binary.LittleEndian, dd); err != nil {
return nil, fmt.Errorf("failure to read data directories: %v", err)
}
return dd, nil
}