ogle: use the DWARF PC/line table instead of the gosym one New code in debug/dwarf/line.go to interpret the .debug_line section. LGTM=nigeltao R=nigeltao https://golang.org/cl/89060043

commit: 52775c4a90778c8795e130a81899b1b5cb473bd7 [log] [tgz]
author: Rob Pike <r@golang.org> Fri Apr 18 10:38:40 2014 -0700
committer: Rob Pike <r@golang.org> Fri Apr 18 10:38:40 2014 -0700
tree: 8b2e0a71b691dbf3fd91cf66f2263e075f42b526
parent: 416736215d983d9df75718ec406fc47d305f0cc5 [diff]
diff --git a/debug/dwarf/line.go b/debug/dwarf/line.go
new file mode 100644
index 0000000..5a23922
--- /dev/null
+++ b/debug/dwarf/line.go

@@ -0,0 +1,421 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package dwarf
+
+// Mapping from PC to lines.
+// http://www.dwarfstd.org/doc/DWARF4.pdf Section 6.2 page 108
+
+// TODO: Convert the I/O to use the buffer interface defined in buf.go.
+// TODO: Find a way to test this properly.
+
+import (
+	"encoding/binary"
+	"fmt"
+)
+
+// PCToLine returns the file and line number corresponding to the PC value.
+// If a correspondence cannot be found, ok will be false.
+// TODO: Return a function descriptor as well.
+func (d *Data) PCToLine(pc uint64) (file string, line int, err error) {
+	if len(d.line) == 0 {
+		return
+	}
+	var m lineMachine
+	for offset := 0; offset < len(d.line); {
+		var found bool
+		offset, found, err = m.evalCompilationUnit(d.line, offset, pc)
+		if err != nil {
+			return "", 0, err
+		}
+		if found {
+			return m.prologue.file[m.file].name, int(m.line), nil
+		}
+	}
+	return "", 0, fmt.Errorf("no source line defined for PC %#x", pc)
+}
+
+// Standard opcodes. Figure 37, page 178.
+// If an opcode >= lineMachine.prologue.opcodeBase, it is a special
+// opcode rather than the opcode defined in this table.
+const (
+	lineStdCopy             = 0x01
+	lineStdAdvancePC        = 0x02
+	lineStdAdvanceLine      = 0x03
+	lineStdSetFile          = 0x04
+	lineStdSetColumn        = 0x05
+	lineStdNegateStmt       = 0x06
+	lineStdSetBasicBlock    = 0x07
+	lineStdConstAddPC       = 0x08
+	lineStdFixedAdvancePC   = 0x09
+	lineStdSetPrologueEnd   = 0x0a
+	lineStdSetEpilogueBegin = 0x0b
+	lineStdSetISA           = 0x0c
+)
+
+// Extended opcodes. Figure 38, page 179.
+const (
+	lineStartExtendedOpcode = 0x00 // Not defined as a named constant in the spec.
+	lineExtEndSequence      = 0x01
+	lineExtSetAddress       = 0x02
+	lineExtDefineFile       = 0x03
+	lineExtSetDiscriminator = 0x04 // New in version 4.
+	lineExtLoUser           = 0x80
+	lineExtHiUser           = 0xff
+)
+
+// linePrologue holds the information stored in the prologue of the line
+// table for a single compilation unit. Also called the header.
+// Section 6.2.4, page 112.
+type linePrologue struct {
+	unitLength           int
+	version              int
+	headerLength         int
+	minInstructionLength int
+	maxOpsPerInstruction int
+	defaultIsStmt        bool
+	lineBase             int
+	lineRange            int
+	opcodeBase           byte
+	stdOpcodeLengths     []byte
+	include              []string   // entry 0 is empty; means current directory
+	file                 []lineFile // entry 0 is empty.
+}
+
+// lineFile represents a file name stored in the PC/line table, usually the prologue.
+type lineFile struct {
+	name   string
+	index  int // index into include directories
+	time   int // implementation-defined time of last modification
+	length int // length in bytes, 0 if not available.
+}
+
+// lineMachine holds the registers evaluated during executing of the PC/line mapping engine.
+// Section 6.2.2, page 109.
+type lineMachine struct {
+	// The program-counter value corresponding to a machine instruction generated by the compiler.
+	address uint64
+
+	// An unsigned integer representing the index of an operation within a VLIW
+	// instruction. The index of the first operation is 0. For non-VLIW
+	// architectures, this register will always be 0.
+	// The address and op_index registers, taken together, form an operation
+	// pointer that can reference any individual operation with the instruction
+	// stream.
+	opIndex uint64
+
+	// An unsigned integer indicating the identity of the source file corresponding to a machine instruction.
+	file uint64
+
+	// An unsigned integer indicating a source line number. Lines are numbered
+	// beginning at 1. The compiler may emit the value 0 in cases where an
+	// instruction cannot be attributed to any source line.
+	line uint64
+
+	// An unsigned integer indicating a column number within a source line.
+	// Columns are numbered beginning at 1. The value 0 is reserved to indicate
+	// that a statement begins at the “left edge” of the line.
+	column uint64
+
+	// A boolean indicating that the current instruction is a recommended
+	// breakpoint location. A recommended breakpoint location is intended to
+	// “represent” a line, a statement and/or a semantically distinct subpart of a
+	// statement.
+	isStmt bool
+
+	// A boolean indicating that the current instruction is the beginning of a basic
+	// block.
+	basicBlock bool
+
+	// A boolean indicating that the current address is that of the first byte after
+	// the end of a sequence of target machine instructions. end_sequence
+	// terminates a sequence of lines; therefore other information in the same
+	// row is not meaningful.
+	endSequence bool
+
+	// A boolean indicating that the current address is one (of possibly many)
+	// where execution should be suspended for an entry breakpoint of a
+	// function.
+	prologueEnd bool
+
+	// A boolean indicating that the current address is one (of possibly many)
+	// where execution should be suspended for an exit breakpoint of a function.
+	epilogueBegin bool
+
+	// An unsigned integer whose value encodes the applicable instruction set
+	// architecture for the current instruction.
+	// The encoding of instruction sets should be shared by all users of a given
+	// architecture. It is recommended that this encoding be defined by the ABI
+	// authoring committee for each architecture.
+	isa uint64
+
+	// An unsigned integer identifying the block to which the current instruction
+	// belongs. Discriminator values are assigned arbitrarily by the DWARF
+	// producer and serve to distinguish among multiple blocks that may all be
+	// associated with the same source file, line, and column. Where only one
+	// block exists for a given source position, the discriminator value should be
+	// zero.
+	discriminator uint64
+
+	// The prologue for the current compilation unit.
+	// Not an actual register, but stored here for cleanlineness.
+	prologue linePrologue
+}
+
+// parseLinePrologue parses the prologue/header describing the compilation
+// unit in the line table starting at the specified offset.
+func (m *lineMachine) parseLinePrologue(data []byte, offset int) (int, error) {
+	// TODO: Assumes little endian
+	m.prologue = linePrologue{}
+	m.prologue.unitLength = int(binary.LittleEndian.Uint32(data[offset:]))
+	if m.prologue.unitLength > len(data)-4 {
+		return 0, fmt.Errorf("DWARF: bad PC/line header length")
+	}
+	offset += 4
+	m.prologue.version = int(binary.LittleEndian.Uint16(data[offset:]))
+	offset += 2
+	m.prologue.headerLength = int(binary.LittleEndian.Uint32(data[offset:]))
+	offset += 4
+	m.prologue.minInstructionLength = int(data[offset])
+	offset += 1
+	if m.prologue.version >= 4 {
+		m.prologue.maxOpsPerInstruction = int(data[offset])
+		offset += 1
+	} else {
+		m.prologue.maxOpsPerInstruction = 1
+	}
+	m.prologue.defaultIsStmt = data[offset] != 0
+	offset += 1
+	m.prologue.lineBase = int(int8(data[offset]))
+	offset += 1
+	m.prologue.lineRange = int(data[offset])
+	offset += 1
+	m.prologue.opcodeBase = data[offset]
+	offset += 1
+	m.prologue.stdOpcodeLengths = make([]byte, m.prologue.opcodeBase-1)
+	copy(m.prologue.stdOpcodeLengths, data[offset:])
+	offset += int(m.prologue.opcodeBase - 1)
+	m.prologue.include = make([]string, 1) // First entry is empty; file index entries are 1-indexed.
+	// Includes
+	for {
+		if data[offset] == 0 {
+			offset++
+			break
+		}
+		startOfName := offset
+		for data[offset] != 0 {
+			offset++
+		}
+		m.prologue.include = append(m.prologue.include, string(data[startOfName:offset]))
+		offset++ // terminal NUL
+	}
+	// Files
+	m.prologue.file = make([]lineFile, 1, 10) // entries are 1-indexed in line number program.
+	for {
+		if data[offset] == 0 {
+			offset++
+			break
+		}
+		startOfName := offset
+		for data[offset] != 0 {
+			offset++
+		}
+		name := data[startOfName:offset]
+		offset++ // terminal NUL
+		index, w := uleb128(data[offset:])
+		offset += w
+		time, w := uleb128(data[offset:])
+		offset += w
+		length, w := uleb128(data[offset:])
+		offset += w
+		f := lineFile{
+			name:   string(name),
+			index:  int(index),
+			time:   int(time),
+			length: int(length),
+		}
+		m.prologue.file = append(m.prologue.file, f)
+	}
+	return offset, nil
+}
+
+// Special opcodes, page 117.
+func (m *lineMachine) specialOpcode(opcode byte) {
+	adjustedOpcode := int(opcode - m.prologue.opcodeBase)
+	advance := adjustedOpcode / m.prologue.lineRange
+	delta := (int(m.opIndex) + advance) / m.prologue.maxOpsPerInstruction
+	m.address += uint64(m.prologue.minInstructionLength * delta)
+	m.opIndex = (m.opIndex + uint64(advance)) % uint64(m.prologue.maxOpsPerInstruction)
+	lineAdvance := m.prologue.lineBase + (adjustedOpcode % m.prologue.lineRange)
+	m.line += uint64(lineAdvance)
+	m.basicBlock = false
+	m.prologueEnd = false
+	m.epilogueBegin = false
+	m.discriminator = 0
+}
+
+// evalCompilationUnit scans the compilation unit starting at the specified offset to see if it contains the PC.
+// It returns when it finds the PC or at the end of the compilation unit.
+// The return values are the offset where it stops and whether the PC was found; if so,
+// the machine's registers contain the relevant information.
+func (m *lineMachine) evalCompilationUnit(data []byte, startOffset int, pc uint64) (int, bool, error) {
+	offset, err := m.parseLinePrologue(data, startOffset)
+	if err != nil {
+		return offset, false, err
+	}
+	m.reset()
+	for offset < len(data) {
+		op := data[offset]
+		offset++
+		if op >= m.prologue.opcodeBase {
+			m.specialOpcode(op)
+			continue
+		}
+		switch op {
+		case lineStartExtendedOpcode:
+			if len(data) == 0 {
+				return offset, false, fmt.Errorf("DWARF: short extended opcode (1)")
+			}
+			size, wid := uleb128(data[offset:])
+			if uint64(len(data)) < size {
+				return offset, false, fmt.Errorf("DWARF: short extended opcode (2)")
+			}
+			offset += int(wid)
+			op = data[offset]
+			offset++
+			switch op {
+			case lineExtEndSequence:
+				m.endSequence = true
+				m.reset()
+				return offset, false, nil
+			case lineExtSetAddress:
+				var addr uint64
+				// TODO: Assumes little-endian.
+				switch size {
+				case 1 + 4: // TODO: How should we do this?
+					addr = uint64(binary.LittleEndian.Uint32(data[offset:]))
+					offset += 4
+				case 1 + 8:
+					addr = binary.LittleEndian.Uint64(data[offset:])
+					offset += 8
+				}
+				m.address = addr
+				m.opIndex = 0
+			case lineExtDefineFile:
+				return offset, false, fmt.Errorf("DWARF: unimplemented define_file op")
+			case lineExtSetDiscriminator:
+				discriminator, wid := uleb128(data[offset:])
+				m.line = discriminator
+				offset += wid
+			default:
+				return offset, false, fmt.Errorf("DWARF: unknown extended opcode %#x", op)
+			}
+		case lineStdCopy:
+			m.discriminator = 0
+			m.basicBlock = false
+			m.prologueEnd = false
+			m.epilogueBegin = false
+			if m.address >= pc {
+				// TODO: if m.address > pc, is this one step too far?
+				return offset, true, nil
+			}
+		case lineStdAdvancePC:
+			advance, wid := uleb128(data[offset:])
+			delta := (int(m.opIndex) + int(advance)) / m.prologue.maxOpsPerInstruction
+			m.address += uint64(m.prologue.minInstructionLength * delta)
+			m.opIndex = (m.opIndex + uint64(advance)) % uint64(m.prologue.maxOpsPerInstruction)
+			offset += wid
+			m.basicBlock = false
+			m.prologueEnd = false
+			m.epilogueBegin = false
+			m.discriminator = 0
+		case lineStdAdvanceLine:
+			advance, wid := sleb128(data[offset:])
+			m.line = uint64(int64(m.line) + advance)
+			offset += wid
+		case lineStdSetFile:
+			index, wid := uleb128(data[offset:])
+			m.file = index
+			offset += wid
+		case lineStdSetColumn:
+			column, wid := uleb128(data[offset:])
+			m.column = column
+			offset += wid
+		case lineStdNegateStmt:
+			m.isStmt = !m.isStmt
+		case lineStdSetBasicBlock:
+			m.basicBlock = true
+		case lineStdFixedAdvancePC:
+			delta := binary.LittleEndian.Uint16(data[offset:])
+			m.address += uint64(delta)
+			m.opIndex = 0
+			offset += 2
+		case lineStdSetPrologueEnd:
+			m.prologueEnd = true
+		case lineStdSetEpilogueBegin:
+			m.epilogueBegin = true
+		case lineStdSetISA:
+			isa, wid := uleb128(data[offset:])
+			m.isa = isa
+			offset += wid
+		case lineStdConstAddPC:
+			// TODO: Is this right? Seems crazy - why not just use 255 as a special opcode?
+			m.specialOpcode(255)
+		default:
+			panic("not reached")
+		}
+	}
+	panic("not reached")
+}
+
+// reset sets the machine's registers to the initial state. Page 111.
+func (m *lineMachine) reset() {
+	m.address = 0
+	m.opIndex = 0
+	m.file = 1
+	m.line = 1
+	m.column = 0
+	m.isStmt = m.prologue.defaultIsStmt
+	m.basicBlock = false
+	m.endSequence = false
+	m.prologueEnd = false
+	m.epilogueBegin = false
+	m.isa = 0
+	m.discriminator = 0
+}
+
+// uleb128 decodes a varint-encoded unsigned integer.
+// TODO: use the buffer interface.
+func uleb128(v []uint8) (u uint64, length int) {
+	var shift uint
+	var x byte
+	for length, x = range v {
+		u |= (uint64(x) & 0x7F) << shift
+		shift += 7
+		if x&0x80 == 0 {
+			break
+		}
+	}
+	return u, length + 1
+}
+
+// sleb128 decodes a varint-encoded signed integer.
+// TODO: use the buffer interface.
+func sleb128(v []uint8) (s int64, length int) {
+	var shift uint
+	var sign int64 = -1
+	var x byte
+	for length, x = range v {
+		s |= (int64(x) & 0x7F) << shift
+		shift += 7
+		sign <<= 7
+		if x&0x80 == 0 {
+			if x&0x40 != 0 {
+				s |= sign
+			}
+			break
+		}
+	}
+	return s, length + 1
+}

diff --git a/debug/elf/file.go b/debug/elf/file.go
index 134b28b..65c6b89 100644
--- a/debug/elf/file.go
+++ b/debug/elf/file.go

@@ -575,7 +575,8 @@
 	// There are many other DWARF sections, but these
 	// are the required ones, and the debug/dwarf package
 	// does not use the others, so don't bother loading them.
-	var names = [...]string{"abbrev", "info", "str"}
+	// r: added line.
+	var names = [...]string{"abbrev", "info", "line", "str"}
 	var dat [len(names)][]byte
 	for i, name := range names {
 		name = ".debug_" + name
@@ -604,8 +605,8 @@
 		}
 	}
 
-	abbrev, info, str := dat[0], dat[1], dat[2]
-	d, err := dwarf.New(abbrev, nil, nil, info, nil, nil, nil, str)
+	abbrev, info, line, str := dat[0], dat[1], dat[2], dat[3]
+	d, err := dwarf.New(abbrev, nil, nil, info, line, nil, nil, str)
 	if err != nil {
 		return nil, err
 	}

diff --git a/program/server/server.go b/program/server/server.go
index 57824a1..bf7150b 100644
--- a/program/server/server.go
+++ b/program/server/server.go

@@ -144,7 +144,7 @@
 }
 
 // parseElf returns the gosym.Table representation of the old symbol tables.
-// TODO: Delete this once we know how to get PC/line data out of DWARF.
+// TODO: Delete this once we know how to get PC/SPoff data out of DWARF.
 func parseElf(f *elf.File) (*gosym.Table, error) {
 	symdat, err := f.Section(".gosymtab").Data() // TODO unused.
 	if err != nil {
@@ -426,9 +426,9 @@
 		if err != nil {
 			return nil, err
 		}
-		file, line, ok := s.lookupSource(addr)
-		if !ok {
-			return nil, fmt.Errorf("no PC/line data for: %q", expr)
+		file, line, err := s.lookupSource(addr)
+		if err != nil {
+			return nil, err
 		}
 		return []string{fmt.Sprintf("%s:%d", file, line)}, nil
 
@@ -448,12 +448,13 @@
 	return nil, fmt.Errorf("bad expression syntax: %q", expr)
 }
 
-func (s *Server) lookupSource(pc uint64) (file string, line int, ok bool) {
-	if s.table == nil {
+func (s *Server) lookupSource(pc uint64) (file string, line int, err error) {
+	if s.dwarfData == nil {
 		return
 	}
-	file, line, fn := s.table.PCToLine(pc)
-	return file, line, fn != nil
+	// TODO: The gosym equivalent also returns the relevant Func. Do that when
+	// DWARF has the same facility.
+	return s.dwarfData.PCToLine(pc)
 }
 
 // evalAddress takes a simple expression, either a symbol or hex value,
commit	52775c4a90778c8795e130a81899b1b5cb473bd7	[log] [tgz]
author	Rob Pike <r@golang.org>	Fri Apr 18 10:38:40 2014 -0700
committer	Rob Pike <r@golang.org>	Fri Apr 18 10:38:40 2014 -0700
tree	8b2e0a71b691dbf3fd91cf66f2263e075f42b526
parent	416736215d983d9df75718ec406fc47d305f0cc5 [diff]