[dev.link] cmd/{compile,link}: remove pcdata tables from pclntab_old

Move the pctables out of pclntab_old. Creates a new generator symbol,
runtime.pctab, which holds all the deduplicated pctables. Also, tightens
up some of the types in runtime.

Darwin, cmd/compile statistics:

alloc/op
Pclntab_GC                   26.4MB ± 0%    13.8MB ± 0%
allocs/op
Pclntab_GC                    89.9k ± 0%     86.4k ± 0%
liveB
Pclntab_GC                    25.5M ± 0%     24.2M ± 0%

No significant change in binary size.

Change-Id: I1560fd4421f8a210f8d4b508fbc54e1780e338f9
Reviewed-on: https://go-review.googlesource.com/c/go/+/248332
Run-TryBot: Jeremy Faller <jeremy@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/link/internal/ld/data.go b/src/cmd/link/internal/ld/data.go
index a551d46..2aecbfb 100644
--- a/src/cmd/link/internal/ld/data.go
+++ b/src/cmd/link/internal/ld/data.go
@@ -1925,6 +1925,7 @@
 	ldr.SetSymSect(ldr.LookupOrCreateSym("runtime.funcnametab", 0), sect)
 	ldr.SetSymSect(ldr.LookupOrCreateSym("runtime.cutab", 0), sect)
 	ldr.SetSymSect(ldr.LookupOrCreateSym("runtime.filetab", 0), sect)
+	ldr.SetSymSect(ldr.LookupOrCreateSym("runtime.pctab", 0), sect)
 	ldr.SetSymSect(ldr.LookupOrCreateSym("runtime.pclntab_old", 0), sect)
 	ldr.SetSymSect(ldr.LookupOrCreateSym("runtime.epclntab", 0), sect)
 	if ctxt.HeadType == objabi.Haix {
@@ -2511,6 +2512,7 @@
 	ctxt.defineInternal("runtime.funcnametab", sym.SRODATA)
 	ctxt.defineInternal("runtime.cutab", sym.SRODATA)
 	ctxt.defineInternal("runtime.filetab", sym.SRODATA)
+	ctxt.defineInternal("runtime.pctab", sym.SRODATA)
 	ctxt.defineInternal("runtime.pclntab_old", sym.SRODATA)
 	ctxt.xdefine("runtime.epclntab", sym.SRODATA, int64(pclntab.Vaddr+pclntab.Length))
 	ctxt.xdefine("runtime.noptrdata", sym.SNOPTRDATA, int64(noptr.Vaddr))
diff --git a/src/cmd/link/internal/ld/pcln.go b/src/cmd/link/internal/ld/pcln.go
index e9fd593..576f1c3 100644
--- a/src/cmd/link/internal/ld/pcln.go
+++ b/src/cmd/link/internal/ld/pcln.go
@@ -43,6 +43,7 @@
 	findfunctab loader.Sym
 	cutab       loader.Sym
 	filetab     loader.Sym
+	pctab       loader.Sym
 
 	// The number of functions + number of TEXT sections - 1. This is such an
 	// unexpected value because platforms that have more than one TEXT section
@@ -273,10 +274,11 @@
 		off = writeSymOffset(off, state.funcnametab)
 		off = writeSymOffset(off, state.cutab)
 		off = writeSymOffset(off, state.filetab)
+		off = writeSymOffset(off, state.pctab)
 		off = writeSymOffset(off, state.pclntab)
 	}
 
-	size := int64(8 + 6*ctxt.Arch.PtrSize)
+	size := int64(8 + 7*ctxt.Arch.PtrSize)
 	state.pcheader = state.addGeneratedSym(ctxt, "runtime.pcheader", size, writeHeader)
 }
 
@@ -463,6 +465,68 @@
 	return cuOffsets
 }
 
+// generatePctab creates the runtime.pctab variable, holding all the
+// deduplicated pcdata.
+func (state *pclntab) generatePctab(ctxt *Link, container loader.Bitmap) {
+	ldr := ctxt.loader
+
+	// Pctab offsets of 0 are considered invalid in the runtime. We respect
+	// that by just padding a single byte at the beginning of runtime.pctab,
+	// that way no real offsets can be zero.
+	size := int64(1)
+
+	// Walk the functions, finding offset to store each pcdata.
+	seen := make(map[loader.Sym]struct{})
+	saveOffset := func(pcSym loader.Sym) {
+		if _, ok := seen[pcSym]; !ok {
+			datSize := ldr.SymSize(pcSym)
+			if datSize != 0 {
+				ldr.SetSymValue(pcSym, size)
+			} else {
+				// Invalid PC data, record as zero.
+				ldr.SetSymValue(pcSym, 0)
+			}
+			size += datSize
+			seen[pcSym] = struct{}{}
+		}
+	}
+	for _, s := range ctxt.Textp {
+		if !emitPcln(ctxt, s, container) {
+			continue
+		}
+		fi := ldr.FuncInfo(s)
+		if !fi.Valid() {
+			continue
+		}
+		fi.Preload()
+
+		pcSyms := []loader.Sym{fi.Pcsp(), fi.Pcfile(), fi.Pcline()}
+		for _, pcSym := range pcSyms {
+			saveOffset(pcSym)
+		}
+		for _, pcSym := range fi.Pcdata() {
+			saveOffset(pcSym)
+		}
+		if fi.NumInlTree() > 0 {
+			saveOffset(fi.Pcinline())
+		}
+	}
+
+	// TODO: There is no reason we need a generator for this variable, and it
+	// could be moved to a carrier symbol. However, carrier symbols containing
+	// carrier symbols don't work yet (as of Aug 2020). Once this is fixed,
+	// runtime.pctab could just be a carrier sym.
+	writePctab := func(ctxt *Link, s loader.Sym) {
+		ldr := ctxt.loader
+		sb := ldr.MakeSymbolUpdater(s)
+		for sym := range seen {
+			sb.SetBytesAt(ldr.SymValue(sym), ldr.Data(sym))
+		}
+	}
+
+	state.pctab = state.addGeneratedSym(ctxt, "runtime.pctab", size, writePctab)
+}
+
 // pclntab initializes the pclntab symbol with
 // runtime function and file name information.
 
@@ -494,6 +558,9 @@
 	//      runtime.filetab
 	//        []null terminated filename strings
 	//
+	//      runtime.pctab
+	//        []byte of deduplicated pc data.
+	//
 	//      runtime.pclntab_old
 	//        function table, alternating PC and offset to func struct [each entry thearch.ptrsize bytes]
 	//        end PC [thearch.ptrsize bytes]
@@ -514,6 +581,7 @@
 	state.generatePCHeader(ctxt)
 	state.generateFuncnametab(ctxt, container)
 	cuOffsets := state.generateFilenameTabs(ctxt, compUnits, container)
+	state.generatePctab(ctxt, container)
 
 	funcdataBytes := int64(0)
 	ldr.SetCarrierSym(state.pclntab, state.carrier)
@@ -525,21 +593,6 @@
 
 	ftab.Grow(int64(state.nfunc)*2*int64(ctxt.Arch.PtrSize) + int64(ctxt.Arch.PtrSize) + 4)
 
-	szHint := len(ctxt.Textp) * 2
-	pctaboff := make(map[string]uint32, szHint)
-	writepctab := func(off int32, p []byte) int32 {
-		start, ok := pctaboff[string(p)]
-		if !ok {
-			if len(p) > 0 {
-				start = uint32(len(ftab.Data()))
-				ftab.AddBytes(p)
-			}
-			pctaboff[string(p)] = start
-		}
-		newoff := int32(ftab.SetUint32(ctxt.Arch, int64(off), start))
-		return newoff
-	}
-
 	setAddr := (*loader.SymbolBuilder).SetAddrPlus
 	if ctxt.IsExe() && ctxt.IsInternal() {
 		// Internal linking static executable. At this point the function
@@ -555,10 +608,6 @@
 		}
 	}
 
-	pcsp := sym.Pcdata{}
-	pcfile := sym.Pcdata{}
-	pcline := sym.Pcdata{}
-	pcdata := []sym.Pcdata{}
 	funcdata := []loader.Sym{}
 	funcdataoff := []int64{}
 
@@ -583,18 +632,13 @@
 		}
 		prevFunc = s
 
-		pcsp.P = pcsp.P[:0]
-		pcline.P = pcline.P[:0]
-		pcfile.P = pcfile.P[:0]
-		pcdata = pcdata[:0]
+		var numPCData int32
 		funcdataoff = funcdataoff[:0]
 		funcdata = funcdata[:0]
 		fi := ldr.FuncInfo(s)
 		if fi.Valid() {
 			fi.Preload()
-			for _, dataSym := range fi.Pcdata() {
-				pcdata = append(pcdata, sym.Pcdata{P: ldr.Data(dataSym)})
-			}
+			numPCData = int32(len(fi.Pcdata()))
 			nfd := fi.NumFuncdataoff()
 			for i := uint32(0); i < nfd; i++ {
 				funcdataoff = append(funcdataoff, fi.Funcdataoff(int(i)))
@@ -602,15 +646,12 @@
 			funcdata = fi.Funcdata(funcdata)
 		}
 
+		writeInlPCData := false
 		if fi.Valid() && fi.NumInlTree() > 0 {
-
-			if len(pcdata) <= objabi.PCDATA_InlTreeIndex {
-				// Create inlining pcdata table.
-				newpcdata := make([]sym.Pcdata, objabi.PCDATA_InlTreeIndex+1)
-				copy(newpcdata, pcdata)
-				pcdata = newpcdata
+			writeInlPCData = true
+			if numPCData <= objabi.PCDATA_InlTreeIndex {
+				numPCData = objabi.PCDATA_InlTreeIndex + 1
 			}
-
 			if len(funcdataoff) <= objabi.FUNCDATA_InlTree {
 				// Create inline tree funcdata.
 				newfuncdata := make([]loader.Sym, objabi.FUNCDATA_InlTree+1)
@@ -635,7 +676,7 @@
 		// fixed size of struct, checked below
 		off := funcstart
 
-		end := funcstart + int32(ctxt.Arch.PtrSize) + 3*4 + 6*4 + int32(len(pcdata))*4 + int32(len(funcdata))*int32(ctxt.Arch.PtrSize)
+		end := funcstart + int32(ctxt.Arch.PtrSize) + 3*4 + 6*4 + numPCData*4 + int32(len(funcdata))*int32(ctxt.Arch.PtrSize)
 		if len(funcdata) > 0 && (end&int32(ctxt.Arch.PtrSize-1) != 0) {
 			end += 4
 		}
@@ -664,23 +705,21 @@
 		off = int32(ftab.SetUint32(ctxt.Arch, int64(off), deferreturn))
 
 		cu := ldr.SymUnit(s)
-		if fi.Valid() {
-			pcsp = sym.Pcdata{P: ldr.Data(fi.Pcsp())}
-			pcfile = sym.Pcdata{P: ldr.Data(fi.Pcfile())}
-			pcline = sym.Pcdata{P: ldr.Data(fi.Pcline())}
-		}
 
 		if fi.Valid() && fi.NumInlTree() > 0 {
 			its := oldState.genInlTreeSym(cu, fi, ctxt.Arch, state)
 			funcdata[objabi.FUNCDATA_InlTree] = its
-			pcdata[objabi.PCDATA_InlTreeIndex] = sym.Pcdata{P: ldr.Data(fi.Pcinline())}
 		}
 
 		// pcdata
-		off = writepctab(off, pcsp.P)
-		off = writepctab(off, pcfile.P)
-		off = writepctab(off, pcline.P)
-		off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(len(pcdata))))
+		if fi.Valid() {
+			off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(ldr.SymValue(fi.Pcsp()))))
+			off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(ldr.SymValue(fi.Pcfile()))))
+			off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(ldr.SymValue(fi.Pcline()))))
+		} else {
+			off += 12
+		}
+		off = int32(ftab.SetUint32(ctxt.Arch, int64(off), uint32(numPCData)))
 
 		// Store the offset to compilation unit's file table.
 		cuIdx := ^uint32(0)
@@ -700,9 +739,17 @@
 
 		// nfuncdata must be the final entry.
 		off = int32(ftab.SetUint8(ctxt.Arch, int64(off), uint8(len(funcdata))))
-		for i := range pcdata {
-			off = writepctab(off, pcdata[i].P)
+
+		// Output the pcdata.
+		if fi.Valid() {
+			for i, pcSym := range fi.Pcdata() {
+				ftab.SetUint32(ctxt.Arch, int64(off+int32(i*4)), uint32(ldr.SymValue(pcSym)))
+			}
+			if writeInlPCData {
+				ftab.SetUint32(ctxt.Arch, int64(off+objabi.PCDATA_InlTreeIndex*4), uint32(ldr.SymValue(fi.Pcinline())))
+			}
 		}
+		off += numPCData * 4
 
 		// funcdata, must be pointer-aligned and we're only int32-aligned.
 		// Missing funcdata will be 0 (nil pointer).
@@ -724,7 +771,7 @@
 		}
 
 		if off != end {
-			ctxt.Errorf(s, "bad math in functab: funcstart=%d off=%d but end=%d (npcdata=%d nfuncdata=%d ptrsize=%d)", funcstart, off, end, len(pcdata), len(funcdata), ctxt.Arch.PtrSize)
+			ctxt.Errorf(s, "bad math in functab: funcstart=%d off=%d but end=%d (npcdata=%d nfuncdata=%d ptrsize=%d)", funcstart, off, end, numPCData, len(funcdata), ctxt.Arch.PtrSize)
 			errorexit()
 		}
 
diff --git a/src/cmd/link/internal/ld/symtab.go b/src/cmd/link/internal/ld/symtab.go
index d05b98f..520aaa4 100644
--- a/src/cmd/link/internal/ld/symtab.go
+++ b/src/cmd/link/internal/ld/symtab.go
@@ -627,6 +627,10 @@
 	moduledata.AddAddr(ctxt.Arch, pcln.filetab)
 	moduledata.AddUint(ctxt.Arch, uint64(ldr.SymSize(pcln.filetab)))
 	moduledata.AddUint(ctxt.Arch, uint64(ldr.SymSize(pcln.filetab)))
+	// The pctab slice
+	moduledata.AddAddr(ctxt.Arch, pcln.pctab)
+	moduledata.AddUint(ctxt.Arch, uint64(ldr.SymSize(pcln.pctab)))
+	moduledata.AddUint(ctxt.Arch, uint64(ldr.SymSize(pcln.pctab)))
 	// The pclntab slice
 	moduledata.AddAddr(ctxt.Arch, pcln.pclntab)
 	moduledata.AddUint(ctxt.Arch, uint64(ldr.SymSize(pcln.pclntab)))
diff --git a/src/cmd/link/internal/loader/symbolbuilder.go b/src/cmd/link/internal/loader/symbolbuilder.go
index e14d89a..c0c723d 100644
--- a/src/cmd/link/internal/loader/symbolbuilder.go
+++ b/src/cmd/link/internal/loader/symbolbuilder.go
@@ -336,6 +336,15 @@
 	return r
 }
 
+func (sb *SymbolBuilder) SetBytesAt(off int64, b []byte) int64 {
+	datLen := int64(len(b))
+	if off+datLen > int64(len(sb.data)) {
+		panic("attempt to write past end of buffer")
+	}
+	copy(sb.data[off:off+datLen], b)
+	return off + datLen
+}
+
 func (sb *SymbolBuilder) addSymRef(tgt Sym, add int64, typ objabi.RelocType, rsize int) int64 {
 	if sb.kind == 0 {
 		sb.kind = sym.SDATA
diff --git a/src/cmd/link/internal/sym/symbol.go b/src/cmd/link/internal/sym/symbol.go
index 1a4165e..70cf36a 100644
--- a/src/cmd/link/internal/sym/symbol.go
+++ b/src/cmd/link/internal/sym/symbol.go
@@ -33,7 +33,3 @@
 	}
 	return ^obj.ABI(0), false
 }
-
-type Pcdata struct {
-	P []byte
-}
diff --git a/src/debug/gosym/pclntab.go b/src/debug/gosym/pclntab.go
index 21edddd..a72f984 100644
--- a/src/debug/gosym/pclntab.go
+++ b/src/debug/gosym/pclntab.go
@@ -58,6 +58,7 @@
 	functab     []byte
 	nfunctab    uint32
 	filetab     []byte
+	pctab       []byte // points to the pctables.
 	nfiletab    uint32
 	funcNames   map[uint32]string // cache the function names
 	strings     map[uint32]string // interned substrings of Data, keyed by offset
@@ -235,6 +236,8 @@
 		offset = t.uintptr(t.Data[8+4*t.ptrsize:])
 		t.filetab = t.Data[offset:]
 		offset = t.uintptr(t.Data[8+5*t.ptrsize:])
+		t.pctab = t.Data[offset:]
+		offset = t.uintptr(t.Data[8+6*t.ptrsize:])
 		t.funcdata = t.Data[offset:]
 		t.functab = t.Data[offset:]
 		functabsize := t.nfunctab*2*t.ptrsize + t.ptrsize
@@ -244,6 +247,7 @@
 		t.funcdata = t.Data
 		t.funcnametab = t.Data
 		t.functab = t.Data[8+t.ptrsize:]
+		t.pctab = t.Data
 		functabsize := t.nfunctab*2*t.ptrsize + t.ptrsize
 		fileoff := t.binary.Uint32(t.functab[functabsize:])
 		t.functab = t.functab[:functabsize]
@@ -373,7 +377,7 @@
 // off is the offset to the beginning of the pc-value table,
 // and entry is the start PC for the corresponding function.
 func (t *LineTable) pcvalue(off uint32, entry, targetpc uint64) int32 {
-	p := t.funcdata[off:]
+	p := t.pctab[off:]
 
 	val := int32(-1)
 	pc := entry
@@ -396,8 +400,8 @@
 		return 0
 	}
 
-	fp := t.funcdata[filetab:]
-	fl := t.funcdata[linetab:]
+	fp := t.pctab[filetab:]
+	fl := t.pctab[linetab:]
 	fileVal := int32(-1)
 	filePC := entry
 	lineVal := int32(-1)
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
index 5a79c7e..755c409 100644
--- a/src/runtime/runtime2.go
+++ b/src/runtime/runtime2.go
@@ -800,10 +800,10 @@
 	args        int32  // in/out args size
 	deferreturn uint32 // offset of start of a deferreturn call instruction from entry, if any.
 
-	pcsp      int32
-	pcfile    int32
-	pcln      int32
-	npcdata   int32
+	pcsp      uint32
+	pcfile    uint32
+	pcln      uint32
+	npcdata   uint32
 	cuOffset  uint32  // runtime.cutab offset of this function's CU
 	funcID    funcID  // set for certain special runtime functions
 	_         [2]byte // pad
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index fbd9315..0610f75 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -345,6 +345,7 @@
 	funcnameOffset uintptr // offset to the funcnametab variable from pcHeader
 	cuOffset       uintptr // offset to the cutab variable from pcHeader
 	filetabOffset  uintptr // offset to the filetab variable from pcHeader
+	pctabOffset    uintptr // offset to the pctab varible from pcHeader
 	pclnOffset     uintptr // offset to the pclntab variable from pcHeader
 }
 
@@ -358,6 +359,7 @@
 	funcnametab  []byte
 	cutab        []uint32
 	filetab      []byte
+	pctab        []byte
 	pclntable    []byte
 	ftab         []functab
 	findfunctab  uintptr
@@ -721,7 +723,7 @@
 type pcvalueCacheEnt struct {
 	// targetpc and off together are the key of this cache entry.
 	targetpc uintptr
-	off      int32
+	off      uint32
 	// val is the value of this cached pcvalue entry.
 	val int32
 }
@@ -736,7 +738,7 @@
 
 // Returns the PCData value, and the PC where this value starts.
 // TODO: the start PC is returned only when cache is nil.
-func pcvalue(f funcInfo, off int32, targetpc uintptr, cache *pcvalueCache, strict bool) (int32, uintptr) {
+func pcvalue(f funcInfo, off uint32, targetpc uintptr, cache *pcvalueCache, strict bool) (int32, uintptr) {
 	if off == 0 {
 		return -1, 0
 	}
@@ -770,7 +772,7 @@
 		return -1, 0
 	}
 	datap := f.datap
-	p := datap.pclntable[off:]
+	p := datap.pctab[off:]
 	pc := f.entry
 	prevpc := pc
 	val := int32(-1)
@@ -812,7 +814,7 @@
 
 	print("runtime: invalid pc-encoded table f=", funcname(f), " pc=", hex(pc), " targetpc=", hex(targetpc), " tab=", p, "\n")
 
-	p = datap.pclntable[off:]
+	p = datap.pctab[off:]
 	pc = f.entry
 	val = -1
 	for {
@@ -893,7 +895,7 @@
 // funcMaxSPDelta returns the maximum spdelta at any point in f.
 func funcMaxSPDelta(f funcInfo) int32 {
 	datap := f.datap
-	p := datap.pclntable[f.pcsp:]
+	p := datap.pctab[f.pcsp:]
 	pc := f.entry
 	val := int32(-1)
 	max := int32(0)
@@ -909,20 +911,20 @@
 	}
 }
 
-func pcdatastart(f funcInfo, table int32) int32 {
-	return *(*int32)(add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(table)*4))
+func pcdatastart(f funcInfo, table uint32) uint32 {
+	return *(*uint32)(add(unsafe.Pointer(&f.nfuncdata), unsafe.Sizeof(f.nfuncdata)+uintptr(table)*4))
 }
 
-func pcdatavalue(f funcInfo, table int32, targetpc uintptr, cache *pcvalueCache) int32 {
-	if table < 0 || table >= f.npcdata {
+func pcdatavalue(f funcInfo, table uint32, targetpc uintptr, cache *pcvalueCache) int32 {
+	if table >= f.npcdata {
 		return -1
 	}
 	r, _ := pcvalue(f, pcdatastart(f, table), targetpc, cache, true)
 	return r
 }
 
-func pcdatavalue1(f funcInfo, table int32, targetpc uintptr, cache *pcvalueCache, strict bool) int32 {
-	if table < 0 || table >= f.npcdata {
+func pcdatavalue1(f funcInfo, table uint32, targetpc uintptr, cache *pcvalueCache, strict bool) int32 {
+	if table >= f.npcdata {
 		return -1
 	}
 	r, _ := pcvalue(f, pcdatastart(f, table), targetpc, cache, strict)
@@ -931,8 +933,8 @@
 
 // Like pcdatavalue, but also return the start PC of this PCData value.
 // It doesn't take a cache.
-func pcdatavalue2(f funcInfo, table int32, targetpc uintptr) (int32, uintptr) {
-	if table < 0 || table >= f.npcdata {
+func pcdatavalue2(f funcInfo, table uint32, targetpc uintptr) (int32, uintptr) {
+	if table >= f.npcdata {
 		return -1, 0
 	}
 	return pcvalue(f, pcdatastart(f, table), targetpc, nil, true)