cmd/compile: generate debug_lines in compiler

This is mostly a copy-paste jobs from the linker to generate the debug
information in the compiler instead of the linker. The new data is
inserted into the debug line numbers symbol defined in CL 188238.

Generating the debug information BEFORE deadcode results in one subtle
difference, and that is that the state machine needs to be reset at the
end of every function's debug line table. The reasoning is that
generating the table AFTER dead code allows the producer and consumer of
the table to agree on the state of the state machine, and since these
blocks will (eventually) be concatenated in the linker, we don't KNOW
the state of the state machine unless we reset it. So,
generateDebugLinesSymbol resets the state machine at the end of every
function.

Right now, we don't do anything with this line information, or the file
table -- we just populate the symbols.

Change-Id: If9103eda6cc5f1f7a11e7e1a97184a060a4ad7fb
Reviewed-on: https://go-review.googlesource.com/c/go/+/188317
Run-TryBot: Jeremy Faller <jeremy@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Austin Clements <austin@google.com>
diff --git a/src/cmd/internal/obj/dwarf.go b/src/cmd/internal/obj/dwarf.go
new file mode 100644
index 0000000..ebe69f8
--- /dev/null
+++ b/src/cmd/internal/obj/dwarf.go
@@ -0,0 +1,249 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Writes dwarf information to object files.
+
+package obj
+
+import (
+	"cmd/internal/dwarf"
+	"fmt"
+)
+
+// Generate a sequence of opcodes that is as short as possible.
+// See section 6.2.5
+const (
+	LINE_BASE   = -4
+	LINE_RANGE  = 10
+	PC_RANGE    = (255 - OPCODE_BASE) / LINE_RANGE
+	OPCODE_BASE = 11
+)
+
+// generateDebugLinesSymbol fills the debug lines symbol of a given function.
+//
+// It's worth noting that this function doesn't generate the full debug_lines
+// DWARF section, saving that for the linker. This function just generates the
+// state machine part of debug_lines. The full table is generated by the
+// linker.  Also, we use the file numbers from the full package (not just the
+// function in question) when generating the state machine. We do this so we
+// don't have to do a fixup on the indices when writing the full section.
+func (ctxt *Link) generateDebugLinesSymbol(s, lines *LSym) {
+	dctxt := dwCtxt{ctxt}
+
+	// The Pcfile table is used to generate the debug_lines section, and the file
+	// indices for that data could differ from the files we write out for the
+	// debug_lines section. Here we generate a LUT between those two indices.
+	fileNums := make(map[int32]int64)
+	for i, filename := range s.Func.Pcln.File {
+		if symbolIndex := ctxt.PosTable.FileIndex(filename); symbolIndex >= 0 {
+			fileNums[int32(i)] = int64(symbolIndex) + 1
+		} else {
+			panic(fmt.Sprintf("First time we've seen filename: %q", filename))
+		}
+	}
+
+	// Set up the debug_lines state machine.
+	// NB: This state machine is reset to this state when we've finished
+	// generating the line table. See below.
+	// TODO: Once delve can support multiple DW_LNS_end_statements, we don't have
+	// to do this.
+	is_stmt := uint8(1)
+	pc := s.Func.Text.Pc
+	line := 1
+	file := 1
+
+	dctxt.AddUint8(lines, 0) // start extended opcode
+	dwarf.Uleb128put(dctxt, lines, 1+int64(ctxt.Arch.PtrSize))
+	dctxt.AddUint8(lines, dwarf.DW_LNE_set_address)
+	dctxt.AddAddress(lines, nil, pc)
+
+	// Generate the actual line information.
+	// We use the pcline and pcfile to generate this section, and it's suboptimal.
+	// Likely better would be to generate this dirrectly from the progs and not
+	// parse those tables.
+	// TODO: Generate from the progs if it's faster.
+	pcfile := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
+	pcline := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
+	pcstmt := NewPCIter(uint32(ctxt.Arch.Arch.MinLC))
+	pcfile.Init(s.Func.Pcln.Pcfile.P)
+	pcline.Init(s.Func.Pcln.Pcline.P)
+	var pctostmtData Pcdata
+	funcpctab(ctxt, &pctostmtData, s, "pctostmt", pctostmt, nil)
+	pcstmt.Init(pctostmtData.P)
+	var thispc uint32
+
+	for !pcfile.Done && !pcline.Done {
+		// Only changed if it advanced
+		if int32(file) != pcfile.Value {
+			dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
+			dwarf.Uleb128put(dctxt, lines, fileNums[pcfile.Value])
+			file = int(pcfile.Value)
+		}
+
+		// Only changed if it advanced
+		if is_stmt != uint8(pcstmt.Value) {
+			new_stmt := uint8(pcstmt.Value)
+			switch new_stmt &^ 1 {
+			case PrologueEnd:
+				dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_set_prologue_end))
+			case EpilogueBegin:
+				// TODO if there is a use for this, add it.
+				// Don't forget to increase OPCODE_BASE by 1 and add entry for standard_opcode_lengths[11]
+				panic("unsupported EpilogueBegin")
+			}
+			new_stmt &= 1
+			if is_stmt != new_stmt {
+				is_stmt = new_stmt
+				dctxt.AddUint8(lines, uint8(dwarf.DW_LNS_negate_stmt))
+			}
+		}
+
+		// putpcldelta makes a row in the DWARF matrix, always, even if line is unchanged.
+		putpclcdelta(ctxt, dctxt, lines, uint64(s.Func.Text.Pc+int64(thispc)-pc), int64(pcline.Value)-int64(line))
+
+		pc = s.Func.Text.Pc + int64(thispc)
+		line = int(pcline.Value)
+
+		// Take the minimum step forward for the three iterators
+		thispc = pcfile.NextPC
+		if pcline.NextPC < thispc {
+			thispc = pcline.NextPC
+		}
+		if !pcstmt.Done && pcstmt.NextPC < thispc {
+			thispc = pcstmt.NextPC
+		}
+
+		if pcfile.NextPC == thispc {
+			pcfile.Next()
+		}
+		if !pcstmt.Done && pcstmt.NextPC == thispc {
+			pcstmt.Next()
+		}
+		if pcline.NextPC == thispc {
+			pcline.Next()
+		}
+	}
+
+	// Because these symbols will be concatenated together by the linker, we need
+	// to reset the state machine that controls the debug symbols. The fields in
+	// the state machine that need to be reset are:
+	//   file = 1
+	//   line = 1
+	//   column = 0
+	//   is_stmt = set in header, we assume true
+	//   basic_block = false
+	// Careful readers of the DWARF specification will note that we don't reset
+	// the address of the state machine -- but this will happen at the beginning
+	// of the NEXT block of opcodes. (See the SetAddress call above.)
+	dctxt.AddUint8(lines, dwarf.DW_LNS_set_file)
+	dwarf.Uleb128put(dctxt, lines, 1)
+	dctxt.AddUint8(lines, dwarf.DW_LNS_advance_line)
+	dwarf.Sleb128put(dctxt, lines, int64(1-line))
+	if is_stmt != 1 {
+		dctxt.AddUint8(lines, dwarf.DW_LNS_negate_stmt)
+	}
+	dctxt.AddUint8(lines, dwarf.DW_LNS_copy)
+}
+
+func putpclcdelta(linkctxt *Link, dctxt dwCtxt, s *LSym, deltaPC uint64, deltaLC int64) {
+	// Choose a special opcode that minimizes the number of bytes needed to
+	// encode the remaining PC delta and LC delta.
+	var opcode int64
+	if deltaLC < LINE_BASE {
+		if deltaPC >= PC_RANGE {
+			opcode = OPCODE_BASE + (LINE_RANGE * PC_RANGE)
+		} else {
+			opcode = OPCODE_BASE + (LINE_RANGE * int64(deltaPC))
+		}
+	} else if deltaLC < LINE_BASE+LINE_RANGE {
+		if deltaPC >= PC_RANGE {
+			opcode = OPCODE_BASE + (deltaLC - LINE_BASE) + (LINE_RANGE * PC_RANGE)
+			if opcode > 255 {
+				opcode -= LINE_RANGE
+			}
+		} else {
+			opcode = OPCODE_BASE + (deltaLC - LINE_BASE) + (LINE_RANGE * int64(deltaPC))
+		}
+	} else {
+		if deltaPC <= PC_RANGE {
+			opcode = OPCODE_BASE + (LINE_RANGE - 1) + (LINE_RANGE * int64(deltaPC))
+			if opcode > 255 {
+				opcode = 255
+			}
+		} else {
+			// Use opcode 249 (pc+=23, lc+=5) or 255 (pc+=24, lc+=1).
+			//
+			// Let x=deltaPC-PC_RANGE.  If we use opcode 255, x will be the remaining
+			// deltaPC that we need to encode separately before emitting 255.  If we
+			// use opcode 249, we will need to encode x+1.  If x+1 takes one more
+			// byte to encode than x, then we use opcode 255.
+			//
+			// In all other cases x and x+1 take the same number of bytes to encode,
+			// so we use opcode 249, which may save us a byte in encoding deltaLC,
+			// for similar reasons.
+			switch deltaPC - PC_RANGE {
+			// PC_RANGE is the largest deltaPC we can encode in one byte, using
+			// DW_LNS_const_add_pc.
+			//
+			// (1<<16)-1 is the largest deltaPC we can encode in three bytes, using
+			// DW_LNS_fixed_advance_pc.
+			//
+			// (1<<(7n))-1 is the largest deltaPC we can encode in n+1 bytes for
+			// n=1,3,4,5,..., using DW_LNS_advance_pc.
+			case PC_RANGE, (1 << 7) - 1, (1 << 16) - 1, (1 << 21) - 1, (1 << 28) - 1,
+				(1 << 35) - 1, (1 << 42) - 1, (1 << 49) - 1, (1 << 56) - 1, (1 << 63) - 1:
+				opcode = 255
+			default:
+				opcode = OPCODE_BASE + LINE_RANGE*PC_RANGE - 1 // 249
+			}
+		}
+	}
+	if opcode < OPCODE_BASE || opcode > 255 {
+		panic(fmt.Sprintf("produced invalid special opcode %d", opcode))
+	}
+
+	// Subtract from deltaPC and deltaLC the amounts that the opcode will add.
+	deltaPC -= uint64((opcode - OPCODE_BASE) / LINE_RANGE)
+	deltaLC -= (opcode-OPCODE_BASE)%LINE_RANGE + LINE_BASE
+
+	// Encode deltaPC.
+	if deltaPC != 0 {
+		if deltaPC <= PC_RANGE {
+			// Adjust the opcode so that we can use the 1-byte DW_LNS_const_add_pc
+			// instruction.
+			opcode -= LINE_RANGE * int64(PC_RANGE-deltaPC)
+			if opcode < OPCODE_BASE {
+				panic(fmt.Sprintf("produced invalid special opcode %d", opcode))
+			}
+			dctxt.AddUint8(s, dwarf.DW_LNS_const_add_pc)
+		} else if (1<<14) <= deltaPC && deltaPC < (1<<16) {
+			dctxt.AddUint8(s, dwarf.DW_LNS_fixed_advance_pc)
+			dctxt.AddUint16(s, uint16(deltaPC))
+		} else {
+			dctxt.AddUint8(s, dwarf.DW_LNS_advance_pc)
+			dwarf.Uleb128put(dctxt, s, int64(deltaPC))
+		}
+	}
+
+	// Encode deltaLC.
+	if deltaLC != 0 {
+		dctxt.AddUint8(s, dwarf.DW_LNS_advance_line)
+		dwarf.Sleb128put(dctxt, s, deltaLC)
+	}
+
+	// Output the special opcode.
+	dctxt.AddUint8(s, uint8(opcode))
+}
+
+// createDebugLinesFileTable creates a new symbol holding the list of files
+// in our package.
+func (ctxt *Link) createDebugLinesFileTable() {
+	dctxt := dwCtxt{ctxt}
+
+	fileLUT := ctxt.PosTable.DebugLinesFileTable()
+	s := ctxt.dwarfFileTableSymbol()
+	for _, file := range fileLUT {
+		dctxt.AddString(s, file)
+	}
+}
diff --git a/src/cmd/internal/obj/objfile.go b/src/cmd/internal/obj/objfile.go
index 65803ae..863989c 100644
--- a/src/cmd/internal/obj/objfile.go
+++ b/src/cmd/internal/obj/objfile.go
@@ -460,6 +460,13 @@
 	ls := s.(*LSym)
 	ls.WriteInt(c.Link, ls.Size, size, i)
 }
+func (c dwCtxt) AddUint16(s dwarf.Sym, i uint16) {
+	c.AddInt(s, 2, int64(i))
+}
+func (c dwCtxt) AddUint8(s dwarf.Sym, i uint8) {
+	b := []byte{byte(i)}
+	c.AddBytes(s, b)
+}
 func (c dwCtxt) AddBytes(s dwarf.Sym, b []byte) {
 	ls := s.(*LSym)
 	ls.WriteBytes(c.Link, ls.Size, b)
@@ -577,7 +584,7 @@
 // TEXT symbol 's'. The various DWARF symbols must already have been
 // initialized in InitTextSym.
 func (ctxt *Link) populateDWARF(curfn interface{}, s *LSym, myimportpath string) {
-	info, loc, ranges, absfunc, _, _ := ctxt.dwarfSym(s)
+	info, loc, ranges, absfunc, _, lines := ctxt.dwarfSym(s)
 	if info.Size != 0 {
 		ctxt.Diag("makeFuncDebugEntry double process %v", s)
 	}
@@ -617,6 +624,8 @@
 	if err != nil {
 		ctxt.Diag("emitting DWARF for %s failed: %v", s.Name, err)
 	}
+	// Fill in the debug lines symbol.
+	ctxt.generateDebugLinesSymbol(s, lines)
 }
 
 // DwarfIntConst creates a link symbol for an integer constant with the
@@ -632,6 +641,23 @@
 	dwarf.PutIntConst(dwCtxt{ctxt}, s, ctxt.Lookup(dwarf.InfoPrefix+typename), myimportpath+"."+name, val)
 }
 
+// dwarfFileTableSymbol creates (or finds) the symbol for holding the line table for this package.
+//
+// The symbol WILL NOT be unique at the per package/archive level. For example,
+// when writing a package archive, we'll write this symbol for the Go code, and
+// one for each assembly file in the package. As such, we can't treat this
+// symbol the same when we read in the object files in the linker. This symbol
+// won't make it to the symbol table, and compilation units will keep track of
+// it.
+// TODO: Actually save this to the object file, and read it back in the linker.
+func (ctxt *Link) dwarfFileTableSymbol() *LSym {
+	s := ctxt.LookupInit(dwarf.DebugLinesPrefix+".package", func(s *LSym) {
+		s.Type = objabi.SDWARFLINES
+		//ctxt.Data = append(ctxt.Data, s)
+	})
+	return s
+}
+
 func (ctxt *Link) DwarfAbstractFunc(curfn interface{}, s *LSym, myimportpath string) {
 	absfn := ctxt.DwFixups.AbsFuncDwarfSym(s)
 	if absfn.Size != 0 {
diff --git a/src/cmd/internal/obj/plist.go b/src/cmd/internal/obj/plist.go
index ad08d15..7ecf96e 100644
--- a/src/cmd/internal/obj/plist.go
+++ b/src/cmd/internal/obj/plist.go
@@ -137,7 +137,7 @@
 	ctxt.Text = append(ctxt.Text, s)
 
 	// Set up DWARF entries for s.
-	info, loc, ranges, _, isstmt, _ := ctxt.dwarfSym(s)
+	info, loc, ranges, _, isstmt, lines := ctxt.dwarfSym(s)
 	info.Type = objabi.SDWARFINFO
 	info.Set(AttrDuplicateOK, s.DuplicateOK())
 	if loc != nil {
@@ -151,6 +151,9 @@
 	isstmt.Type = objabi.SDWARFMISC
 	isstmt.Set(AttrDuplicateOK, s.DuplicateOK())
 	ctxt.Data = append(ctxt.Data, isstmt)
+	lines.Type = objabi.SDWARFLINES
+	lines.Set(AttrDuplicateOK, s.DuplicateOK())
+	ctxt.Data = append(ctxt.Data, lines)
 }
 
 func (ctxt *Link) Globl(s *LSym, size int64, flag int) {
diff --git a/src/cmd/internal/objabi/symkind.go b/src/cmd/internal/objabi/symkind.go
index 0e763e4..7549163 100644
--- a/src/cmd/internal/objabi/symkind.go
+++ b/src/cmd/internal/objabi/symkind.go
@@ -59,8 +59,8 @@
 	SDWARFINFO
 	SDWARFRANGE
 	SDWARFLOC
-	SDWARFMISC
 	SDWARFLINES
+	SDWARFMISC
 	// ABI alias. An ABI alias symbol is an empty symbol with a
 	// single relocation with 0 size that references the native
 	// function implementation symbol.
diff --git a/src/cmd/internal/src/xpos.go b/src/cmd/internal/src/xpos.go
index d845433..da90ccd 100644
--- a/src/cmd/internal/src/xpos.go
+++ b/src/cmd/internal/src/xpos.go
@@ -109,6 +109,7 @@
 type PosTable struct {
 	baseList []*PosBase
 	indexMap map[*PosBase]int
+	nameMap  map[string]int // Maps file symbol name to index for debug information.
 }
 
 // XPos returns the corresponding XPos for the given pos,
@@ -121,12 +122,16 @@
 		t.baseList = append(t.baseList, nil)
 		m = map[*PosBase]int{nil: 0}
 		t.indexMap = m
+		t.nameMap = make(map[string]int)
 	}
 	i, ok := m[pos.base]
 	if !ok {
 		i = len(t.baseList)
 		t.baseList = append(t.baseList, pos.base)
 		t.indexMap[pos.base] = i
+		if _, ok := t.nameMap[pos.base.symFilename]; !ok {
+			t.nameMap[pos.base.symFilename] = len(t.nameMap)
+		}
 	}
 	return XPos{int32(i), pos.lico}
 }
@@ -140,3 +145,23 @@
 	}
 	return Pos{base, p.lico}
 }
+
+// FileIndex returns the index of the given filename(symbol) in the PosTable, or -1 if not found.
+func (t *PosTable) FileIndex(filename string) int {
+	if v, ok := t.nameMap[filename]; ok {
+		return v
+	}
+	return -1
+}
+
+// DebugLinesFiles returns the file table for the debug_lines DWARF section.
+func (t *PosTable) DebugLinesFileTable() []string {
+	// Create a LUT of the global package level file indices. This table is what
+	// is written in the debug_lines header, the file[N] will be referenced as
+	// N+1 in the debug_lines table.
+	fileLUT := make([]string, len(t.nameMap))
+	for str, i := range t.nameMap {
+		fileLUT[i] = str
+	}
+	return fileLUT
+}
diff --git a/src/cmd/link/internal/objfile/objfile.go b/src/cmd/link/internal/objfile/objfile.go
index b6bb864..840914f 100644
--- a/src/cmd/link/internal/objfile/objfile.go
+++ b/src/cmd/link/internal/objfile/objfile.go
@@ -416,7 +416,8 @@
 			// from the spot where the wrapper is needed.
 			whitelist := (strings.HasPrefix(dup.Name, "go.info.go.interface") ||
 				strings.HasPrefix(dup.Name, "go.info.go.builtin") ||
-				strings.HasPrefix(dup.Name, "go.isstmt.go.builtin"))
+				strings.HasPrefix(dup.Name, "go.isstmt.go.builtin") ||
+				strings.HasPrefix(dup.Name, "go.debuglines"))
 			if !whitelist {
 				r.strictDupMsgs++
 			}