| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| /* |
| Package macho implements access to Mach-O object files. |
| |
| # Security |
| |
| This package is not designed to be hardened against adversarial inputs, and is |
| outside the scope of https://go.dev/security/policy. In particular, only basic |
| validation is done when parsing object files. As such, care should be taken when |
| parsing untrusted inputs, as parsing malformed files may consume significant |
| resources, or cause panics. |
| */ |
| package macho |
| |
| // High level access to low level data structures. |
| |
| import ( |
| "bytes" |
| "compress/zlib" |
| "debug/dwarf" |
| "encoding/binary" |
| "fmt" |
| "internal/saferio" |
| "io" |
| "os" |
| "strings" |
| ) |
| |
| // A File represents an open Mach-O file. |
| type File struct { |
| FileHeader |
| ByteOrder binary.ByteOrder |
| Loads []Load |
| Sections []*Section |
| |
| Symtab *Symtab |
| Dysymtab *Dysymtab |
| |
| closer io.Closer |
| } |
| |
| // A Load represents any Mach-O load command. |
| type Load interface { |
| Raw() []byte |
| } |
| |
| // A LoadBytes is the uninterpreted bytes of a Mach-O load command. |
| type LoadBytes []byte |
| |
| func (b LoadBytes) Raw() []byte { return b } |
| |
| // A SegmentHeader is the header for a Mach-O 32-bit or 64-bit load segment command. |
| type SegmentHeader struct { |
| Cmd LoadCmd |
| Len uint32 |
| Name string |
| Addr uint64 |
| Memsz uint64 |
| Offset uint64 |
| Filesz uint64 |
| Maxprot uint32 |
| Prot uint32 |
| Nsect uint32 |
| Flag uint32 |
| } |
| |
| // A Segment represents a Mach-O 32-bit or 64-bit load segment command. |
| type Segment struct { |
| LoadBytes |
| SegmentHeader |
| |
| // Embed ReaderAt for ReadAt method. |
| // Do not embed SectionReader directly |
| // to avoid having Read and Seek. |
| // If a client wants Read and Seek it must use |
| // Open() to avoid fighting over the seek offset |
| // with other clients. |
| io.ReaderAt |
| sr *io.SectionReader |
| } |
| |
| // Data reads and returns the contents of the segment. |
| func (s *Segment) Data() ([]byte, error) { |
| return saferio.ReadDataAt(s.sr, s.Filesz, 0) |
| } |
| |
| // Open returns a new ReadSeeker reading the segment. |
| func (s *Segment) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) } |
| |
| type SectionHeader struct { |
| Name string |
| Seg string |
| Addr uint64 |
| Size uint64 |
| Offset uint32 |
| Align uint32 |
| Reloff uint32 |
| Nreloc uint32 |
| Flags uint32 |
| } |
| |
| // A Reloc represents a Mach-O relocation. |
| type Reloc struct { |
| Addr uint32 |
| Value uint32 |
| // when Scattered == false && Extern == true, Value is the symbol number. |
| // when Scattered == false && Extern == false, Value is the section number. |
| // when Scattered == true, Value is the value that this reloc refers to. |
| Type uint8 |
| Len uint8 // 0=byte, 1=word, 2=long, 3=quad |
| Pcrel bool |
| Extern bool // valid if Scattered == false |
| Scattered bool |
| } |
| |
| type Section struct { |
| SectionHeader |
| Relocs []Reloc |
| |
| // Embed ReaderAt for ReadAt method. |
| // Do not embed SectionReader directly |
| // to avoid having Read and Seek. |
| // If a client wants Read and Seek it must use |
| // Open() to avoid fighting over the seek offset |
| // with other clients. |
| io.ReaderAt |
| sr *io.SectionReader |
| } |
| |
| // Data reads and returns the contents of the Mach-O section. |
| func (s *Section) Data() ([]byte, error) { |
| return saferio.ReadDataAt(s.sr, s.Size, 0) |
| } |
| |
| // Open returns a new ReadSeeker reading the Mach-O section. |
| func (s *Section) Open() io.ReadSeeker { return io.NewSectionReader(s.sr, 0, 1<<63-1) } |
| |
| // A Dylib represents a Mach-O load dynamic library command. |
| type Dylib struct { |
| LoadBytes |
| Name string |
| Time uint32 |
| CurrentVersion uint32 |
| CompatVersion uint32 |
| } |
| |
| // A Symtab represents a Mach-O symbol table command. |
| type Symtab struct { |
| LoadBytes |
| SymtabCmd |
| Syms []Symbol |
| } |
| |
| // A Dysymtab represents a Mach-O dynamic symbol table command. |
| type Dysymtab struct { |
| LoadBytes |
| DysymtabCmd |
| IndirectSyms []uint32 // indices into Symtab.Syms |
| } |
| |
| // A Rpath represents a Mach-O rpath command. |
| type Rpath struct { |
| LoadBytes |
| Path string |
| } |
| |
| // A Symbol is a Mach-O 32-bit or 64-bit symbol table entry. |
| type Symbol struct { |
| Name string |
| Type uint8 |
| Sect uint8 |
| Desc uint16 |
| Value uint64 |
| } |
| |
| /* |
| * Mach-O reader |
| */ |
| |
| // FormatError is returned by some operations if the data does |
| // not have the correct format for an object file. |
| type FormatError struct { |
| off int64 |
| msg string |
| val any |
| } |
| |
| func (e *FormatError) Error() string { |
| msg := e.msg |
| if e.val != nil { |
| msg += fmt.Sprintf(" '%v'", e.val) |
| } |
| msg += fmt.Sprintf(" in record at byte %#x", e.off) |
| return msg |
| } |
| |
| // Open opens the named file using os.Open and prepares it for use as a Mach-O binary. |
| func Open(name string) (*File, error) { |
| f, err := os.Open(name) |
| if err != nil { |
| return nil, err |
| } |
| ff, err := NewFile(f) |
| if err != nil { |
| f.Close() |
| return nil, err |
| } |
| ff.closer = f |
| return ff, nil |
| } |
| |
| // Close closes the File. |
| // If the File was created using NewFile directly instead of Open, |
| // Close has no effect. |
| func (f *File) Close() error { |
| var err error |
| if f.closer != nil { |
| err = f.closer.Close() |
| f.closer = nil |
| } |
| return err |
| } |
| |
| // NewFile creates a new File for accessing a Mach-O binary in an underlying reader. |
| // The Mach-O binary is expected to start at position 0 in the ReaderAt. |
| func NewFile(r io.ReaderAt) (*File, error) { |
| f := new(File) |
| sr := io.NewSectionReader(r, 0, 1<<63-1) |
| |
| // Read and decode Mach magic to determine byte order, size. |
| // Magic32 and Magic64 differ only in the bottom bit. |
| var ident [4]byte |
| if _, err := r.ReadAt(ident[0:], 0); err != nil { |
| return nil, err |
| } |
| be := binary.BigEndian.Uint32(ident[0:]) |
| le := binary.LittleEndian.Uint32(ident[0:]) |
| switch Magic32 &^ 1 { |
| case be &^ 1: |
| f.ByteOrder = binary.BigEndian |
| f.Magic = be |
| case le &^ 1: |
| f.ByteOrder = binary.LittleEndian |
| f.Magic = le |
| default: |
| return nil, &FormatError{0, "invalid magic number", nil} |
| } |
| |
| // Read entire file header. |
| if err := binary.Read(sr, f.ByteOrder, &f.FileHeader); err != nil { |
| return nil, err |
| } |
| |
| // Then load commands. |
| offset := int64(fileHeaderSize32) |
| if f.Magic == Magic64 { |
| offset = fileHeaderSize64 |
| } |
| dat, err := saferio.ReadDataAt(r, uint64(f.Cmdsz), offset) |
| if err != nil { |
| return nil, err |
| } |
| c := saferio.SliceCap((*Load)(nil), uint64(f.Ncmd)) |
| if c < 0 { |
| return nil, &FormatError{offset, "too many load commands", nil} |
| } |
| f.Loads = make([]Load, 0, c) |
| bo := f.ByteOrder |
| for i := uint32(0); i < f.Ncmd; i++ { |
| // Each load command begins with uint32 command and length. |
| if len(dat) < 8 { |
| return nil, &FormatError{offset, "command block too small", nil} |
| } |
| cmd, siz := LoadCmd(bo.Uint32(dat[0:4])), bo.Uint32(dat[4:8]) |
| if siz < 8 || siz > uint32(len(dat)) { |
| return nil, &FormatError{offset, "invalid command block size", nil} |
| } |
| var cmddat []byte |
| cmddat, dat = dat[0:siz], dat[siz:] |
| offset += int64(siz) |
| var s *Segment |
| switch cmd { |
| default: |
| f.Loads = append(f.Loads, LoadBytes(cmddat)) |
| |
| case LoadCmdRpath: |
| var hdr RpathCmd |
| b := bytes.NewReader(cmddat) |
| if err := binary.Read(b, bo, &hdr); err != nil { |
| return nil, err |
| } |
| l := new(Rpath) |
| if hdr.Path >= uint32(len(cmddat)) { |
| return nil, &FormatError{offset, "invalid path in rpath command", hdr.Path} |
| } |
| l.Path = cstring(cmddat[hdr.Path:]) |
| l.LoadBytes = LoadBytes(cmddat) |
| f.Loads = append(f.Loads, l) |
| |
| case LoadCmdDylib: |
| var hdr DylibCmd |
| b := bytes.NewReader(cmddat) |
| if err := binary.Read(b, bo, &hdr); err != nil { |
| return nil, err |
| } |
| l := new(Dylib) |
| if hdr.Name >= uint32(len(cmddat)) { |
| return nil, &FormatError{offset, "invalid name in dynamic library command", hdr.Name} |
| } |
| l.Name = cstring(cmddat[hdr.Name:]) |
| l.Time = hdr.Time |
| l.CurrentVersion = hdr.CurrentVersion |
| l.CompatVersion = hdr.CompatVersion |
| l.LoadBytes = LoadBytes(cmddat) |
| f.Loads = append(f.Loads, l) |
| |
| case LoadCmdSymtab: |
| var hdr SymtabCmd |
| b := bytes.NewReader(cmddat) |
| if err := binary.Read(b, bo, &hdr); err != nil { |
| return nil, err |
| } |
| strtab := make([]byte, hdr.Strsize) |
| if _, err := r.ReadAt(strtab, int64(hdr.Stroff)); err != nil { |
| return nil, err |
| } |
| var symsz int |
| if f.Magic == Magic64 { |
| symsz = 16 |
| } else { |
| symsz = 12 |
| } |
| symdat, err := saferio.ReadDataAt(r, uint64(hdr.Nsyms)*uint64(symsz), int64(hdr.Symoff)) |
| if err != nil { |
| return nil, err |
| } |
| st, err := f.parseSymtab(symdat, strtab, cmddat, &hdr, offset) |
| if err != nil { |
| return nil, err |
| } |
| f.Loads = append(f.Loads, st) |
| f.Symtab = st |
| |
| case LoadCmdDysymtab: |
| var hdr DysymtabCmd |
| b := bytes.NewReader(cmddat) |
| if err := binary.Read(b, bo, &hdr); err != nil { |
| return nil, err |
| } |
| if hdr.Iundefsym > uint32(len(f.Symtab.Syms)) { |
| return nil, &FormatError{offset, fmt.Sprintf( |
| "undefined symbols index in dynamic symbol table command is greater than symbol table length (%d > %d)", |
| hdr.Iundefsym, len(f.Symtab.Syms)), nil} |
| } else if hdr.Iundefsym+hdr.Nundefsym > uint32(len(f.Symtab.Syms)) { |
| return nil, &FormatError{offset, fmt.Sprintf( |
| "number of undefined symbols after index in dynamic symbol table command is greater than symbol table length (%d > %d)", |
| hdr.Iundefsym+hdr.Nundefsym, len(f.Symtab.Syms)), nil} |
| } |
| dat := make([]byte, hdr.Nindirectsyms*4) |
| if _, err := r.ReadAt(dat, int64(hdr.Indirectsymoff)); err != nil { |
| return nil, err |
| } |
| x := make([]uint32, hdr.Nindirectsyms) |
| if err := binary.Read(bytes.NewReader(dat), bo, x); err != nil { |
| return nil, err |
| } |
| st := new(Dysymtab) |
| st.LoadBytes = LoadBytes(cmddat) |
| st.DysymtabCmd = hdr |
| st.IndirectSyms = x |
| f.Loads = append(f.Loads, st) |
| f.Dysymtab = st |
| |
| case LoadCmdSegment: |
| var seg32 Segment32 |
| b := bytes.NewReader(cmddat) |
| if err := binary.Read(b, bo, &seg32); err != nil { |
| return nil, err |
| } |
| s = new(Segment) |
| s.LoadBytes = cmddat |
| s.Cmd = cmd |
| s.Len = siz |
| s.Name = cstring(seg32.Name[0:]) |
| s.Addr = uint64(seg32.Addr) |
| s.Memsz = uint64(seg32.Memsz) |
| s.Offset = uint64(seg32.Offset) |
| s.Filesz = uint64(seg32.Filesz) |
| s.Maxprot = seg32.Maxprot |
| s.Prot = seg32.Prot |
| s.Nsect = seg32.Nsect |
| s.Flag = seg32.Flag |
| f.Loads = append(f.Loads, s) |
| for i := 0; i < int(s.Nsect); i++ { |
| var sh32 Section32 |
| if err := binary.Read(b, bo, &sh32); err != nil { |
| return nil, err |
| } |
| sh := new(Section) |
| sh.Name = cstring(sh32.Name[0:]) |
| sh.Seg = cstring(sh32.Seg[0:]) |
| sh.Addr = uint64(sh32.Addr) |
| sh.Size = uint64(sh32.Size) |
| sh.Offset = sh32.Offset |
| sh.Align = sh32.Align |
| sh.Reloff = sh32.Reloff |
| sh.Nreloc = sh32.Nreloc |
| sh.Flags = sh32.Flags |
| if err := f.pushSection(sh, r); err != nil { |
| return nil, err |
| } |
| } |
| |
| case LoadCmdSegment64: |
| var seg64 Segment64 |
| b := bytes.NewReader(cmddat) |
| if err := binary.Read(b, bo, &seg64); err != nil { |
| return nil, err |
| } |
| s = new(Segment) |
| s.LoadBytes = cmddat |
| s.Cmd = cmd |
| s.Len = siz |
| s.Name = cstring(seg64.Name[0:]) |
| s.Addr = seg64.Addr |
| s.Memsz = seg64.Memsz |
| s.Offset = seg64.Offset |
| s.Filesz = seg64.Filesz |
| s.Maxprot = seg64.Maxprot |
| s.Prot = seg64.Prot |
| s.Nsect = seg64.Nsect |
| s.Flag = seg64.Flag |
| f.Loads = append(f.Loads, s) |
| for i := 0; i < int(s.Nsect); i++ { |
| var sh64 Section64 |
| if err := binary.Read(b, bo, &sh64); err != nil { |
| return nil, err |
| } |
| sh := new(Section) |
| sh.Name = cstring(sh64.Name[0:]) |
| sh.Seg = cstring(sh64.Seg[0:]) |
| sh.Addr = sh64.Addr |
| sh.Size = sh64.Size |
| sh.Offset = sh64.Offset |
| sh.Align = sh64.Align |
| sh.Reloff = sh64.Reloff |
| sh.Nreloc = sh64.Nreloc |
| sh.Flags = sh64.Flags |
| if err := f.pushSection(sh, r); err != nil { |
| return nil, err |
| } |
| } |
| } |
| if s != nil { |
| if int64(s.Offset) < 0 { |
| return nil, &FormatError{offset, "invalid section offset", s.Offset} |
| } |
| if int64(s.Filesz) < 0 { |
| return nil, &FormatError{offset, "invalid section file size", s.Filesz} |
| } |
| s.sr = io.NewSectionReader(r, int64(s.Offset), int64(s.Filesz)) |
| s.ReaderAt = s.sr |
| } |
| } |
| return f, nil |
| } |
| |
| func (f *File) parseSymtab(symdat, strtab, cmddat []byte, hdr *SymtabCmd, offset int64) (*Symtab, error) { |
| bo := f.ByteOrder |
| c := saferio.SliceCap((*Symbol)(nil), uint64(hdr.Nsyms)) |
| if c < 0 { |
| return nil, &FormatError{offset, "too many symbols", nil} |
| } |
| symtab := make([]Symbol, 0, c) |
| b := bytes.NewReader(symdat) |
| for i := 0; i < int(hdr.Nsyms); i++ { |
| var n Nlist64 |
| if f.Magic == Magic64 { |
| if err := binary.Read(b, bo, &n); err != nil { |
| return nil, err |
| } |
| } else { |
| var n32 Nlist32 |
| if err := binary.Read(b, bo, &n32); err != nil { |
| return nil, err |
| } |
| n.Name = n32.Name |
| n.Type = n32.Type |
| n.Sect = n32.Sect |
| n.Desc = n32.Desc |
| n.Value = uint64(n32.Value) |
| } |
| if n.Name >= uint32(len(strtab)) { |
| return nil, &FormatError{offset, "invalid name in symbol table", n.Name} |
| } |
| // We add "_" to Go symbols. Strip it here. See issue 33808. |
| name := cstring(strtab[n.Name:]) |
| if strings.Contains(name, ".") && name[0] == '_' { |
| name = name[1:] |
| } |
| symtab = append(symtab, Symbol{ |
| Name: name, |
| Type: n.Type, |
| Sect: n.Sect, |
| Desc: n.Desc, |
| Value: n.Value, |
| }) |
| } |
| st := new(Symtab) |
| st.LoadBytes = LoadBytes(cmddat) |
| st.Syms = symtab |
| return st, nil |
| } |
| |
| type relocInfo struct { |
| Addr uint32 |
| Symnum uint32 |
| } |
| |
| func (f *File) pushSection(sh *Section, r io.ReaderAt) error { |
| f.Sections = append(f.Sections, sh) |
| sh.sr = io.NewSectionReader(r, int64(sh.Offset), int64(sh.Size)) |
| sh.ReaderAt = sh.sr |
| |
| if sh.Nreloc > 0 { |
| reldat, err := saferio.ReadDataAt(r, uint64(sh.Nreloc)*8, int64(sh.Reloff)) |
| if err != nil { |
| return err |
| } |
| b := bytes.NewReader(reldat) |
| |
| bo := f.ByteOrder |
| |
| sh.Relocs = make([]Reloc, sh.Nreloc) |
| for i := range sh.Relocs { |
| rel := &sh.Relocs[i] |
| |
| var ri relocInfo |
| if err := binary.Read(b, bo, &ri); err != nil { |
| return err |
| } |
| |
| if ri.Addr&(1<<31) != 0 { // scattered |
| rel.Addr = ri.Addr & (1<<24 - 1) |
| rel.Type = uint8((ri.Addr >> 24) & (1<<4 - 1)) |
| rel.Len = uint8((ri.Addr >> 28) & (1<<2 - 1)) |
| rel.Pcrel = ri.Addr&(1<<30) != 0 |
| rel.Value = ri.Symnum |
| rel.Scattered = true |
| } else { |
| switch bo { |
| case binary.LittleEndian: |
| rel.Addr = ri.Addr |
| rel.Value = ri.Symnum & (1<<24 - 1) |
| rel.Pcrel = ri.Symnum&(1<<24) != 0 |
| rel.Len = uint8((ri.Symnum >> 25) & (1<<2 - 1)) |
| rel.Extern = ri.Symnum&(1<<27) != 0 |
| rel.Type = uint8((ri.Symnum >> 28) & (1<<4 - 1)) |
| case binary.BigEndian: |
| rel.Addr = ri.Addr |
| rel.Value = ri.Symnum >> 8 |
| rel.Pcrel = ri.Symnum&(1<<7) != 0 |
| rel.Len = uint8((ri.Symnum >> 5) & (1<<2 - 1)) |
| rel.Extern = ri.Symnum&(1<<4) != 0 |
| rel.Type = uint8(ri.Symnum & (1<<4 - 1)) |
| default: |
| panic("unreachable") |
| } |
| } |
| } |
| } |
| |
| return nil |
| } |
| |
| func cstring(b []byte) string { |
| i := bytes.IndexByte(b, 0) |
| if i == -1 { |
| i = len(b) |
| } |
| return string(b[0:i]) |
| } |
| |
| // Segment returns the first Segment with the given name, or nil if no such segment exists. |
| func (f *File) Segment(name string) *Segment { |
| for _, l := range f.Loads { |
| if s, ok := l.(*Segment); ok && s.Name == name { |
| return s |
| } |
| } |
| return nil |
| } |
| |
| // Section returns the first section with the given name, or nil if no such |
| // section exists. |
| func (f *File) Section(name string) *Section { |
| for _, s := range f.Sections { |
| if s.Name == name { |
| return s |
| } |
| } |
| return nil |
| } |
| |
| // DWARF returns the DWARF debug information for the Mach-O file. |
| func (f *File) DWARF() (*dwarf.Data, error) { |
| dwarfSuffix := func(s *Section) string { |
| switch { |
| case strings.HasPrefix(s.Name, "__debug_"): |
| return s.Name[8:] |
| case strings.HasPrefix(s.Name, "__zdebug_"): |
| return s.Name[9:] |
| default: |
| return "" |
| } |
| |
| } |
| sectionData := func(s *Section) ([]byte, error) { |
| b, err := s.Data() |
| if err != nil && uint64(len(b)) < s.Size { |
| return nil, err |
| } |
| |
| if len(b) >= 12 && string(b[:4]) == "ZLIB" { |
| dlen := binary.BigEndian.Uint64(b[4:12]) |
| dbuf := make([]byte, dlen) |
| r, err := zlib.NewReader(bytes.NewBuffer(b[12:])) |
| if err != nil { |
| return nil, err |
| } |
| if _, err := io.ReadFull(r, dbuf); err != nil { |
| return nil, err |
| } |
| if err := r.Close(); err != nil { |
| return nil, err |
| } |
| b = dbuf |
| } |
| return b, nil |
| } |
| |
| // There are many other DWARF sections, but these |
| // are the ones the debug/dwarf package uses. |
| // Don't bother loading others. |
| var dat = map[string][]byte{"abbrev": nil, "info": nil, "str": nil, "line": nil, "ranges": nil} |
| for _, s := range f.Sections { |
| suffix := dwarfSuffix(s) |
| if suffix == "" { |
| continue |
| } |
| if _, ok := dat[suffix]; !ok { |
| continue |
| } |
| b, err := sectionData(s) |
| if err != nil { |
| return nil, err |
| } |
| dat[suffix] = b |
| } |
| |
| d, err := dwarf.New(dat["abbrev"], nil, nil, dat["info"], dat["line"], nil, dat["ranges"], dat["str"]) |
| if err != nil { |
| return nil, err |
| } |
| |
| // Look for DWARF4 .debug_types sections and DWARF5 sections. |
| for i, s := range f.Sections { |
| suffix := dwarfSuffix(s) |
| if suffix == "" { |
| continue |
| } |
| if _, ok := dat[suffix]; ok { |
| // Already handled. |
| continue |
| } |
| |
| b, err := sectionData(s) |
| if err != nil { |
| return nil, err |
| } |
| |
| if suffix == "types" { |
| err = d.AddTypes(fmt.Sprintf("types-%d", i), b) |
| } else { |
| err = d.AddSection(".debug_"+suffix, b) |
| } |
| if err != nil { |
| return nil, err |
| } |
| } |
| |
| return d, nil |
| } |
| |
| // ImportedSymbols returns the names of all symbols |
| // referred to by the binary f that are expected to be |
| // satisfied by other libraries at dynamic load time. |
| func (f *File) ImportedSymbols() ([]string, error) { |
| if f.Dysymtab == nil || f.Symtab == nil { |
| return nil, &FormatError{0, "missing symbol table", nil} |
| } |
| |
| st := f.Symtab |
| dt := f.Dysymtab |
| var all []string |
| for _, s := range st.Syms[dt.Iundefsym : dt.Iundefsym+dt.Nundefsym] { |
| all = append(all, s.Name) |
| } |
| return all, nil |
| } |
| |
| // ImportedLibraries returns the paths of all libraries |
| // referred to by the binary f that are expected to be |
| // linked with the binary at dynamic link time. |
| func (f *File) ImportedLibraries() ([]string, error) { |
| var all []string |
| for _, l := range f.Loads { |
| if lib, ok := l.(*Dylib); ok { |
| all = append(all, lib.Name) |
| } |
| } |
| return all, nil |
| } |