ogle: use the DWARF PC/line table instead of the gosym one
New code in debug/dwarf/line.go to interpret the .debug_line section.
LGTM=nigeltao
R=nigeltao
https://golang.org/cl/89060043
diff --git a/debug/dwarf/line.go b/debug/dwarf/line.go
new file mode 100644
index 0000000..5a23922
--- /dev/null
+++ b/debug/dwarf/line.go
@@ -0,0 +1,421 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package dwarf
+
+// Mapping from PC to lines.
+// http://www.dwarfstd.org/doc/DWARF4.pdf Section 6.2 page 108
+
+// TODO: Convert the I/O to use the buffer interface defined in buf.go.
+// TODO: Find a way to test this properly.
+
+import (
+ "encoding/binary"
+ "fmt"
+)
+
+// PCToLine returns the file and line number corresponding to the PC value.
+// If a correspondence cannot be found, ok will be false.
+// TODO: Return a function descriptor as well.
+func (d *Data) PCToLine(pc uint64) (file string, line int, err error) {
+ if len(d.line) == 0 {
+ return
+ }
+ var m lineMachine
+ for offset := 0; offset < len(d.line); {
+ var found bool
+ offset, found, err = m.evalCompilationUnit(d.line, offset, pc)
+ if err != nil {
+ return "", 0, err
+ }
+ if found {
+ return m.prologue.file[m.file].name, int(m.line), nil
+ }
+ }
+ return "", 0, fmt.Errorf("no source line defined for PC %#x", pc)
+}
+
+// Standard opcodes. Figure 37, page 178.
+// If an opcode >= lineMachine.prologue.opcodeBase, it is a special
+// opcode rather than the opcode defined in this table.
+const (
+ lineStdCopy = 0x01
+ lineStdAdvancePC = 0x02
+ lineStdAdvanceLine = 0x03
+ lineStdSetFile = 0x04
+ lineStdSetColumn = 0x05
+ lineStdNegateStmt = 0x06
+ lineStdSetBasicBlock = 0x07
+ lineStdConstAddPC = 0x08
+ lineStdFixedAdvancePC = 0x09
+ lineStdSetPrologueEnd = 0x0a
+ lineStdSetEpilogueBegin = 0x0b
+ lineStdSetISA = 0x0c
+)
+
+// Extended opcodes. Figure 38, page 179.
+const (
+ lineStartExtendedOpcode = 0x00 // Not defined as a named constant in the spec.
+ lineExtEndSequence = 0x01
+ lineExtSetAddress = 0x02
+ lineExtDefineFile = 0x03
+ lineExtSetDiscriminator = 0x04 // New in version 4.
+ lineExtLoUser = 0x80
+ lineExtHiUser = 0xff
+)
+
+// linePrologue holds the information stored in the prologue of the line
+// table for a single compilation unit. Also called the header.
+// Section 6.2.4, page 112.
+type linePrologue struct {
+ unitLength int
+ version int
+ headerLength int
+ minInstructionLength int
+ maxOpsPerInstruction int
+ defaultIsStmt bool
+ lineBase int
+ lineRange int
+ opcodeBase byte
+ stdOpcodeLengths []byte
+ include []string // entry 0 is empty; means current directory
+ file []lineFile // entry 0 is empty.
+}
+
+// lineFile represents a file name stored in the PC/line table, usually the prologue.
+type lineFile struct {
+ name string
+ index int // index into include directories
+ time int // implementation-defined time of last modification
+ length int // length in bytes, 0 if not available.
+}
+
+// lineMachine holds the registers evaluated during executing of the PC/line mapping engine.
+// Section 6.2.2, page 109.
+type lineMachine struct {
+ // The program-counter value corresponding to a machine instruction generated by the compiler.
+ address uint64
+
+ // An unsigned integer representing the index of an operation within a VLIW
+ // instruction. The index of the first operation is 0. For non-VLIW
+ // architectures, this register will always be 0.
+ // The address and op_index registers, taken together, form an operation
+ // pointer that can reference any individual operation with the instruction
+ // stream.
+ opIndex uint64
+
+ // An unsigned integer indicating the identity of the source file corresponding to a machine instruction.
+ file uint64
+
+ // An unsigned integer indicating a source line number. Lines are numbered
+ // beginning at 1. The compiler may emit the value 0 in cases where an
+ // instruction cannot be attributed to any source line.
+ line uint64
+
+ // An unsigned integer indicating a column number within a source line.
+ // Columns are numbered beginning at 1. The value 0 is reserved to indicate
+ // that a statement begins at the “left edge” of the line.
+ column uint64
+
+ // A boolean indicating that the current instruction is a recommended
+ // breakpoint location. A recommended breakpoint location is intended to
+ // “represent” a line, a statement and/or a semantically distinct subpart of a
+ // statement.
+ isStmt bool
+
+ // A boolean indicating that the current instruction is the beginning of a basic
+ // block.
+ basicBlock bool
+
+ // A boolean indicating that the current address is that of the first byte after
+ // the end of a sequence of target machine instructions. end_sequence
+ // terminates a sequence of lines; therefore other information in the same
+ // row is not meaningful.
+ endSequence bool
+
+ // A boolean indicating that the current address is one (of possibly many)
+ // where execution should be suspended for an entry breakpoint of a
+ // function.
+ prologueEnd bool
+
+ // A boolean indicating that the current address is one (of possibly many)
+ // where execution should be suspended for an exit breakpoint of a function.
+ epilogueBegin bool
+
+ // An unsigned integer whose value encodes the applicable instruction set
+ // architecture for the current instruction.
+ // The encoding of instruction sets should be shared by all users of a given
+ // architecture. It is recommended that this encoding be defined by the ABI
+ // authoring committee for each architecture.
+ isa uint64
+
+ // An unsigned integer identifying the block to which the current instruction
+ // belongs. Discriminator values are assigned arbitrarily by the DWARF
+ // producer and serve to distinguish among multiple blocks that may all be
+ // associated with the same source file, line, and column. Where only one
+ // block exists for a given source position, the discriminator value should be
+ // zero.
+ discriminator uint64
+
+ // The prologue for the current compilation unit.
+ // Not an actual register, but stored here for cleanlineness.
+ prologue linePrologue
+}
+
+// parseLinePrologue parses the prologue/header describing the compilation
+// unit in the line table starting at the specified offset.
+func (m *lineMachine) parseLinePrologue(data []byte, offset int) (int, error) {
+ // TODO: Assumes little endian
+ m.prologue = linePrologue{}
+ m.prologue.unitLength = int(binary.LittleEndian.Uint32(data[offset:]))
+ if m.prologue.unitLength > len(data)-4 {
+ return 0, fmt.Errorf("DWARF: bad PC/line header length")
+ }
+ offset += 4
+ m.prologue.version = int(binary.LittleEndian.Uint16(data[offset:]))
+ offset += 2
+ m.prologue.headerLength = int(binary.LittleEndian.Uint32(data[offset:]))
+ offset += 4
+ m.prologue.minInstructionLength = int(data[offset])
+ offset += 1
+ if m.prologue.version >= 4 {
+ m.prologue.maxOpsPerInstruction = int(data[offset])
+ offset += 1
+ } else {
+ m.prologue.maxOpsPerInstruction = 1
+ }
+ m.prologue.defaultIsStmt = data[offset] != 0
+ offset += 1
+ m.prologue.lineBase = int(int8(data[offset]))
+ offset += 1
+ m.prologue.lineRange = int(data[offset])
+ offset += 1
+ m.prologue.opcodeBase = data[offset]
+ offset += 1
+ m.prologue.stdOpcodeLengths = make([]byte, m.prologue.opcodeBase-1)
+ copy(m.prologue.stdOpcodeLengths, data[offset:])
+ offset += int(m.prologue.opcodeBase - 1)
+ m.prologue.include = make([]string, 1) // First entry is empty; file index entries are 1-indexed.
+ // Includes
+ for {
+ if data[offset] == 0 {
+ offset++
+ break
+ }
+ startOfName := offset
+ for data[offset] != 0 {
+ offset++
+ }
+ m.prologue.include = append(m.prologue.include, string(data[startOfName:offset]))
+ offset++ // terminal NUL
+ }
+ // Files
+ m.prologue.file = make([]lineFile, 1, 10) // entries are 1-indexed in line number program.
+ for {
+ if data[offset] == 0 {
+ offset++
+ break
+ }
+ startOfName := offset
+ for data[offset] != 0 {
+ offset++
+ }
+ name := data[startOfName:offset]
+ offset++ // terminal NUL
+ index, w := uleb128(data[offset:])
+ offset += w
+ time, w := uleb128(data[offset:])
+ offset += w
+ length, w := uleb128(data[offset:])
+ offset += w
+ f := lineFile{
+ name: string(name),
+ index: int(index),
+ time: int(time),
+ length: int(length),
+ }
+ m.prologue.file = append(m.prologue.file, f)
+ }
+ return offset, nil
+}
+
+// Special opcodes, page 117.
+func (m *lineMachine) specialOpcode(opcode byte) {
+ adjustedOpcode := int(opcode - m.prologue.opcodeBase)
+ advance := adjustedOpcode / m.prologue.lineRange
+ delta := (int(m.opIndex) + advance) / m.prologue.maxOpsPerInstruction
+ m.address += uint64(m.prologue.minInstructionLength * delta)
+ m.opIndex = (m.opIndex + uint64(advance)) % uint64(m.prologue.maxOpsPerInstruction)
+ lineAdvance := m.prologue.lineBase + (adjustedOpcode % m.prologue.lineRange)
+ m.line += uint64(lineAdvance)
+ m.basicBlock = false
+ m.prologueEnd = false
+ m.epilogueBegin = false
+ m.discriminator = 0
+}
+
+// evalCompilationUnit scans the compilation unit starting at the specified offset to see if it contains the PC.
+// It returns when it finds the PC or at the end of the compilation unit.
+// The return values are the offset where it stops and whether the PC was found; if so,
+// the machine's registers contain the relevant information.
+func (m *lineMachine) evalCompilationUnit(data []byte, startOffset int, pc uint64) (int, bool, error) {
+ offset, err := m.parseLinePrologue(data, startOffset)
+ if err != nil {
+ return offset, false, err
+ }
+ m.reset()
+ for offset < len(data) {
+ op := data[offset]
+ offset++
+ if op >= m.prologue.opcodeBase {
+ m.specialOpcode(op)
+ continue
+ }
+ switch op {
+ case lineStartExtendedOpcode:
+ if len(data) == 0 {
+ return offset, false, fmt.Errorf("DWARF: short extended opcode (1)")
+ }
+ size, wid := uleb128(data[offset:])
+ if uint64(len(data)) < size {
+ return offset, false, fmt.Errorf("DWARF: short extended opcode (2)")
+ }
+ offset += int(wid)
+ op = data[offset]
+ offset++
+ switch op {
+ case lineExtEndSequence:
+ m.endSequence = true
+ m.reset()
+ return offset, false, nil
+ case lineExtSetAddress:
+ var addr uint64
+ // TODO: Assumes little-endian.
+ switch size {
+ case 1 + 4: // TODO: How should we do this?
+ addr = uint64(binary.LittleEndian.Uint32(data[offset:]))
+ offset += 4
+ case 1 + 8:
+ addr = binary.LittleEndian.Uint64(data[offset:])
+ offset += 8
+ }
+ m.address = addr
+ m.opIndex = 0
+ case lineExtDefineFile:
+ return offset, false, fmt.Errorf("DWARF: unimplemented define_file op")
+ case lineExtSetDiscriminator:
+ discriminator, wid := uleb128(data[offset:])
+ m.line = discriminator
+ offset += wid
+ default:
+ return offset, false, fmt.Errorf("DWARF: unknown extended opcode %#x", op)
+ }
+ case lineStdCopy:
+ m.discriminator = 0
+ m.basicBlock = false
+ m.prologueEnd = false
+ m.epilogueBegin = false
+ if m.address >= pc {
+ // TODO: if m.address > pc, is this one step too far?
+ return offset, true, nil
+ }
+ case lineStdAdvancePC:
+ advance, wid := uleb128(data[offset:])
+ delta := (int(m.opIndex) + int(advance)) / m.prologue.maxOpsPerInstruction
+ m.address += uint64(m.prologue.minInstructionLength * delta)
+ m.opIndex = (m.opIndex + uint64(advance)) % uint64(m.prologue.maxOpsPerInstruction)
+ offset += wid
+ m.basicBlock = false
+ m.prologueEnd = false
+ m.epilogueBegin = false
+ m.discriminator = 0
+ case lineStdAdvanceLine:
+ advance, wid := sleb128(data[offset:])
+ m.line = uint64(int64(m.line) + advance)
+ offset += wid
+ case lineStdSetFile:
+ index, wid := uleb128(data[offset:])
+ m.file = index
+ offset += wid
+ case lineStdSetColumn:
+ column, wid := uleb128(data[offset:])
+ m.column = column
+ offset += wid
+ case lineStdNegateStmt:
+ m.isStmt = !m.isStmt
+ case lineStdSetBasicBlock:
+ m.basicBlock = true
+ case lineStdFixedAdvancePC:
+ delta := binary.LittleEndian.Uint16(data[offset:])
+ m.address += uint64(delta)
+ m.opIndex = 0
+ offset += 2
+ case lineStdSetPrologueEnd:
+ m.prologueEnd = true
+ case lineStdSetEpilogueBegin:
+ m.epilogueBegin = true
+ case lineStdSetISA:
+ isa, wid := uleb128(data[offset:])
+ m.isa = isa
+ offset += wid
+ case lineStdConstAddPC:
+ // TODO: Is this right? Seems crazy - why not just use 255 as a special opcode?
+ m.specialOpcode(255)
+ default:
+ panic("not reached")
+ }
+ }
+ panic("not reached")
+}
+
+// reset sets the machine's registers to the initial state. Page 111.
+func (m *lineMachine) reset() {
+ m.address = 0
+ m.opIndex = 0
+ m.file = 1
+ m.line = 1
+ m.column = 0
+ m.isStmt = m.prologue.defaultIsStmt
+ m.basicBlock = false
+ m.endSequence = false
+ m.prologueEnd = false
+ m.epilogueBegin = false
+ m.isa = 0
+ m.discriminator = 0
+}
+
+// uleb128 decodes a varint-encoded unsigned integer.
+// TODO: use the buffer interface.
+func uleb128(v []uint8) (u uint64, length int) {
+ var shift uint
+ var x byte
+ for length, x = range v {
+ u |= (uint64(x) & 0x7F) << shift
+ shift += 7
+ if x&0x80 == 0 {
+ break
+ }
+ }
+ return u, length + 1
+}
+
+// sleb128 decodes a varint-encoded signed integer.
+// TODO: use the buffer interface.
+func sleb128(v []uint8) (s int64, length int) {
+ var shift uint
+ var sign int64 = -1
+ var x byte
+ for length, x = range v {
+ s |= (int64(x) & 0x7F) << shift
+ shift += 7
+ sign <<= 7
+ if x&0x80 == 0 {
+ if x&0x40 != 0 {
+ s |= sign
+ }
+ break
+ }
+ }
+ return s, length + 1
+}
diff --git a/debug/elf/file.go b/debug/elf/file.go
index 134b28b..65c6b89 100644
--- a/debug/elf/file.go
+++ b/debug/elf/file.go
@@ -575,7 +575,8 @@
// There are many other DWARF sections, but these
// are the required ones, and the debug/dwarf package
// does not use the others, so don't bother loading them.
- var names = [...]string{"abbrev", "info", "str"}
+ // r: added line.
+ var names = [...]string{"abbrev", "info", "line", "str"}
var dat [len(names)][]byte
for i, name := range names {
name = ".debug_" + name
@@ -604,8 +605,8 @@
}
}
- abbrev, info, str := dat[0], dat[1], dat[2]
- d, err := dwarf.New(abbrev, nil, nil, info, nil, nil, nil, str)
+ abbrev, info, line, str := dat[0], dat[1], dat[2], dat[3]
+ d, err := dwarf.New(abbrev, nil, nil, info, line, nil, nil, str)
if err != nil {
return nil, err
}
diff --git a/program/server/server.go b/program/server/server.go
index 57824a1..bf7150b 100644
--- a/program/server/server.go
+++ b/program/server/server.go
@@ -144,7 +144,7 @@
}
// parseElf returns the gosym.Table representation of the old symbol tables.
-// TODO: Delete this once we know how to get PC/line data out of DWARF.
+// TODO: Delete this once we know how to get PC/SPoff data out of DWARF.
func parseElf(f *elf.File) (*gosym.Table, error) {
symdat, err := f.Section(".gosymtab").Data() // TODO unused.
if err != nil {
@@ -426,9 +426,9 @@
if err != nil {
return nil, err
}
- file, line, ok := s.lookupSource(addr)
- if !ok {
- return nil, fmt.Errorf("no PC/line data for: %q", expr)
+ file, line, err := s.lookupSource(addr)
+ if err != nil {
+ return nil, err
}
return []string{fmt.Sprintf("%s:%d", file, line)}, nil
@@ -448,12 +448,13 @@
return nil, fmt.Errorf("bad expression syntax: %q", expr)
}
-func (s *Server) lookupSource(pc uint64) (file string, line int, ok bool) {
- if s.table == nil {
+func (s *Server) lookupSource(pc uint64) (file string, line int, err error) {
+ if s.dwarfData == nil {
return
}
- file, line, fn := s.table.PCToLine(pc)
- return file, line, fn != nil
+ // TODO: The gosym equivalent also returns the relevant Func. Do that when
+ // DWARF has the same facility.
+ return s.dwarfData.PCToLine(pc)
}
// evalAddress takes a simple expression, either a symbol or hex value,