| // Copyright 2014 Google Inc. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| // Package elfexec provides utility routines to examine ELF binaries. |
| package elfexec |
| |
| import ( |
| "bufio" |
| "debug/elf" |
| "encoding/binary" |
| "fmt" |
| "io" |
| ) |
| |
| const ( |
| maxNoteSize = 1 << 20 // in bytes |
| noteTypeGNUBuildID = 3 |
| ) |
| |
| // elfNote is the payload of a Note Section in an ELF file. |
| type elfNote struct { |
| Name string // Contents of the "name" field, omitting the trailing zero byte. |
| Desc []byte // Contents of the "desc" field. |
| Type uint32 // Contents of the "type" field. |
| } |
| |
| // parseNotes returns the notes from a SHT_NOTE section or PT_NOTE segment. |
| func parseNotes(reader io.Reader, alignment int, order binary.ByteOrder) ([]elfNote, error) { |
| r := bufio.NewReader(reader) |
| |
| // padding returns the number of bytes required to pad the given size to an |
| // alignment boundary. |
| padding := func(size int) int { |
| return ((size + (alignment - 1)) &^ (alignment - 1)) - size |
| } |
| |
| var notes []elfNote |
| for { |
| noteHeader := make([]byte, 12) // 3 4-byte words |
| if _, err := io.ReadFull(r, noteHeader); err == io.EOF { |
| break |
| } else if err != nil { |
| return nil, err |
| } |
| namesz := order.Uint32(noteHeader[0:4]) |
| descsz := order.Uint32(noteHeader[4:8]) |
| typ := order.Uint32(noteHeader[8:12]) |
| |
| if uint64(namesz) > uint64(maxNoteSize) { |
| return nil, fmt.Errorf("note name too long (%d bytes)", namesz) |
| } |
| var name string |
| if namesz > 0 { |
| // Documentation differs as to whether namesz is meant to include the |
| // trailing zero, but everyone agrees that name is null-terminated. |
| // So we'll just determine the actual length after the fact. |
| var err error |
| name, err = r.ReadString('\x00') |
| if err == io.EOF { |
| return nil, fmt.Errorf("missing note name (want %d bytes)", namesz) |
| } else if err != nil { |
| return nil, err |
| } |
| namesz = uint32(len(name)) |
| name = name[:len(name)-1] |
| } |
| |
| // Drop padding bytes until the desc field. |
| for n := padding(len(noteHeader) + int(namesz)); n > 0; n-- { |
| if _, err := r.ReadByte(); err == io.EOF { |
| return nil, fmt.Errorf( |
| "missing %d bytes of padding after note name", n) |
| } else if err != nil { |
| return nil, err |
| } |
| } |
| |
| if uint64(descsz) > uint64(maxNoteSize) { |
| return nil, fmt.Errorf("note desc too long (%d bytes)", descsz) |
| } |
| desc := make([]byte, int(descsz)) |
| if _, err := io.ReadFull(r, desc); err == io.EOF { |
| return nil, fmt.Errorf("missing desc (want %d bytes)", len(desc)) |
| } else if err != nil { |
| return nil, err |
| } |
| |
| notes = append(notes, elfNote{Name: name, Desc: desc, Type: typ}) |
| |
| // Drop padding bytes until the next note or the end of the section, |
| // whichever comes first. |
| for n := padding(len(desc)); n > 0; n-- { |
| if _, err := r.ReadByte(); err == io.EOF { |
| // We hit the end of the section before an alignment boundary. |
| // This can happen if this section is at the end of the file or the next |
| // section has a smaller alignment requirement. |
| break |
| } else if err != nil { |
| return nil, err |
| } |
| } |
| } |
| return notes, nil |
| } |
| |
| // GetBuildID returns the GNU build-ID for an ELF binary. |
| // |
| // If no build-ID was found but the binary was read without error, it returns |
| // (nil, nil). |
| func GetBuildID(binary io.ReaderAt) ([]byte, error) { |
| f, err := elf.NewFile(binary) |
| if err != nil { |
| return nil, err |
| } |
| |
| findBuildID := func(notes []elfNote) ([]byte, error) { |
| var buildID []byte |
| for _, note := range notes { |
| if note.Name == "GNU" && note.Type == noteTypeGNUBuildID { |
| if buildID == nil { |
| buildID = note.Desc |
| } else { |
| return nil, fmt.Errorf("multiple build ids found, don't know which to use") |
| } |
| } |
| } |
| return buildID, nil |
| } |
| |
| for _, p := range f.Progs { |
| if p.Type != elf.PT_NOTE { |
| continue |
| } |
| notes, err := parseNotes(p.Open(), int(p.Align), f.ByteOrder) |
| if err != nil { |
| return nil, err |
| } |
| if b, err := findBuildID(notes); b != nil || err != nil { |
| return b, err |
| } |
| } |
| for _, s := range f.Sections { |
| if s.Type != elf.SHT_NOTE { |
| continue |
| } |
| notes, err := parseNotes(s.Open(), int(s.Addralign), f.ByteOrder) |
| if err != nil { |
| return nil, err |
| } |
| if b, err := findBuildID(notes); b != nil || err != nil { |
| return b, err |
| } |
| } |
| return nil, nil |
| } |
| |
| // GetBase determines the base address to subtract from virtual |
| // address to get symbol table address. For an executable, the base |
| // is 0. Otherwise, it's a shared library, and the base is the |
| // address where the mapping starts. The kernel is special, and may |
| // use the address of the _stext symbol as the mmap start. _stext |
| // offset can be obtained with `nm vmlinux | grep _stext` |
| func GetBase(fh *elf.FileHeader, loadSegment *elf.ProgHeader, stextOffset *uint64, start, limit, offset uint64) (uint64, error) { |
| const ( |
| pageSize = 4096 |
| // PAGE_OFFSET for PowerPC64, see arch/powerpc/Kconfig in the kernel sources. |
| pageOffsetPpc64 = 0xc000000000000000 |
| ) |
| |
| if start == 0 && offset == 0 && (limit == ^uint64(0) || limit == 0) { |
| // Some tools may introduce a fake mapping that spans the entire |
| // address space. Assume that the address has already been |
| // adjusted, so no additional base adjustment is necessary. |
| return 0, nil |
| } |
| |
| switch fh.Type { |
| case elf.ET_EXEC: |
| if loadSegment == nil { |
| // Assume fixed-address executable and so no adjustment. |
| return 0, nil |
| } |
| if stextOffset == nil && start > 0 && start < 0x8000000000000000 { |
| // A regular user-mode executable. Compute the base offset using same |
| // arithmetics as in ET_DYN case below, see the explanation there. |
| // Ideally, the condition would just be "stextOffset == nil" as that |
| // represents the address of _stext symbol in the vmlinux image. Alas, |
| // the caller may skip reading it from the binary (it's expensive to scan |
| // all the symbols) and so it may be nil even for the kernel executable. |
| // So additionally check that the start is within the user-mode half of |
| // the 64-bit address space. |
| return start - offset + loadSegment.Off - loadSegment.Vaddr, nil |
| } |
| // Various kernel heuristics and cases follow. |
| if loadSegment.Vaddr == start-offset { |
| return offset, nil |
| } |
| if start == 0 && limit != 0 { |
| // ChromeOS remaps its kernel to 0. Nothing else should come |
| // down this path. Empirical values: |
| // VADDR=0xffffffff80200000 |
| // stextOffset=0xffffffff80200198 |
| if stextOffset != nil { |
| return -*stextOffset, nil |
| } |
| return -loadSegment.Vaddr, nil |
| } |
| if start >= loadSegment.Vaddr && limit > start && (offset == 0 || offset == pageOffsetPpc64 || offset == start) { |
| // Some kernels look like: |
| // VADDR=0xffffffff80200000 |
| // stextOffset=0xffffffff80200198 |
| // Start=0xffffffff83200000 |
| // Limit=0xffffffff84200000 |
| // Offset=0 (0xc000000000000000 for PowerPC64) (== Start for ASLR kernel) |
| // So the base should be: |
| if stextOffset != nil && (start%pageSize) == (*stextOffset%pageSize) { |
| // perf uses the address of _stext as start. Some tools may |
| // adjust for this before calling GetBase, in which case the page |
| // alignment should be different from that of stextOffset. |
| return start - *stextOffset, nil |
| } |
| |
| return start - loadSegment.Vaddr, nil |
| } else if start%pageSize != 0 && stextOffset != nil && *stextOffset%pageSize == start%pageSize { |
| // ChromeOS remaps its kernel to 0 + start%pageSize. Nothing |
| // else should come down this path. Empirical values: |
| // start=0x198 limit=0x2f9fffff offset=0 |
| // VADDR=0xffffffff81000000 |
| // stextOffset=0xffffffff81000198 |
| return start - *stextOffset, nil |
| } |
| |
| return 0, fmt.Errorf("don't know how to handle EXEC segment: %v start=0x%x limit=0x%x offset=0x%x", *loadSegment, start, limit, offset) |
| case elf.ET_REL: |
| if offset != 0 { |
| return 0, fmt.Errorf("don't know how to handle mapping.Offset") |
| } |
| return start, nil |
| case elf.ET_DYN: |
| // The process mapping information, start = start of virtual address range, |
| // and offset = offset in the executable file of the start address, tells us |
| // that a runtime virtual address x maps to a file offset |
| // fx = x - start + offset. |
| if loadSegment == nil { |
| return start - offset, nil |
| } |
| // The program header, if not nil, indicates the offset in the file where |
| // the executable segment is located (loadSegment.Off), and the base virtual |
| // address where the first byte of the segment is loaded |
| // (loadSegment.Vaddr). A file offset fx maps to a virtual (symbol) address |
| // sx = fx - loadSegment.Off + loadSegment.Vaddr. |
| // |
| // Thus, a runtime virtual address x maps to a symbol address |
| // sx = x - start + offset - loadSegment.Off + loadSegment.Vaddr. |
| return start - offset + loadSegment.Off - loadSegment.Vaddr, nil |
| } |
| return 0, fmt.Errorf("don't know how to handle FileHeader.Type %v", fh.Type) |
| } |
| |
| // FindTextProgHeader finds the program segment header containing the .text |
| // section or nil if the segment cannot be found. |
| func FindTextProgHeader(f *elf.File) *elf.ProgHeader { |
| for _, s := range f.Sections { |
| if s.Name == ".text" { |
| // Find the LOAD segment containing the .text section. |
| for _, p := range f.Progs { |
| if p.Type == elf.PT_LOAD && p.Flags&elf.PF_X != 0 && s.Addr >= p.Vaddr && s.Addr < p.Vaddr+p.Memsz { |
| return &p.ProgHeader |
| } |
| } |
| } |
| } |
| return nil |
| } |
| |
| // FindProgHeaderForMapping returns the loadable program segment header that is |
| // fully contained in the runtime mapping with file offset pgoff and memory size |
| // memsz, or an error if the segment cannot be determined. The function returns |
| // a nil program header and no error if the ELF binary has no loadable segments. |
| func FindProgHeaderForMapping(f *elf.File, pgoff, memsz uint64) (*elf.ProgHeader, error) { |
| var headers []*elf.ProgHeader |
| loadables := 0 |
| for _, p := range f.Progs { |
| if p.Type == elf.PT_LOAD && pgoff <= p.Off && p.Off+p.Memsz <= pgoff+memsz { |
| headers = append(headers, &p.ProgHeader) |
| } |
| if p.Type == elf.PT_LOAD { |
| loadables++ |
| } |
| } |
| if len(headers) == 1 { |
| return headers[0], nil |
| } |
| // Some ELF files don't contain any program segments, e.g. .ko loadable kernel |
| // modules. Don't return an error in such cases. |
| if loadables == 0 { |
| return nil, nil |
| } |
| if len(headers) == 0 { |
| return nil, fmt.Errorf("no program header matches file offset %x and memory size %x", pgoff, memsz) |
| } |
| |
| // Segments are mapped page aligned. In some cases, segments may be smaller |
| // than a page, which causes the next segment to start at a file offset that |
| // is logically on the same page if we were to align file offsets by page. |
| // Example: |
| // LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000 |
| // 0x00000000000006fc 0x00000000000006fc R E 0x200000 |
| // LOAD 0x0000000000000e10 0x0000000000600e10 0x0000000000600e10 |
| // 0x0000000000000230 0x0000000000000238 RW 0x200000 |
| // |
| // In this case, perf records the following mappings for this executable: |
| // 0 0 [0xc0]: PERF_RECORD_MMAP2 87867/87867: [0x400000(0x1000) @ 0 00:3c 512041 0]: r-xp exename |
| // 0 0 [0xc0]: PERF_RECORD_MMAP2 87867/87867: [0x600000(0x2000) @ 0 00:3c 512041 0]: rw-p exename |
| // |
| // Both mappings have file offset 0. The first mapping is one page length and |
| // it can include only the first loadable segment. Due to page alignment, the |
| // second mapping starts also at file offset 0, and it spans two pages. It can |
| // include both the first and the second loadable segments. We must return the |
| // correct program header to compute the correct base offset. |
| // |
| // We cannot use the mapping protections to distinguish between segments, |
| // because protections are not passed through to this function. |
| // We cannot use the start address to differentiate between segments, because |
| // with ASLR, the mapping start address can be any value. |
| // |
| // We use a heuristic to compute the minimum mapping size required for a |
| // segment, assuming mappings are 4k page aligned, and return the segment that |
| // matches the given mapping size. |
| const pageSize = 4096 |
| |
| // The memory size based heuristic makes sense only if the mapping size is a |
| // multiple of 4k page size. |
| if memsz%pageSize != 0 { |
| return nil, fmt.Errorf("mapping size = %x and %d segments match the passed in mapping", memsz, len(headers)) |
| } |
| |
| // Return an error if no segment, or multiple segments match the size, so we can debug. |
| var ph *elf.ProgHeader |
| pageMask := ^uint64(pageSize - 1) |
| for _, h := range headers { |
| wantSize := (h.Vaddr+h.Memsz+pageSize-1)&pageMask - (h.Vaddr & pageMask) |
| if wantSize != memsz { |
| continue |
| } |
| if ph != nil { |
| return nil, fmt.Errorf("found second program header (%#v) that matches memsz %x, first program header is %#v", *h, memsz, *ph) |
| } |
| ph = h |
| } |
| if ph == nil { |
| return nil, fmt.Errorf("found %d matching program headers, but none matches mapping size %x", len(headers), memsz) |
| } |
| return ph, nil |
| } |