gocore: use dwarf->abi.Type map and type-walk deeper for direct ifaces

This change makes two major changes to typing the heap.

1. It matches up DWARF names to internal/abi.Type addresses via the
   DW_AT_Go_runtime_type attribute, which links the internal/abi.Type
   address to the DWARF entry for a type. This allows us to fully and
   unambiguously give every type an identity.

2. It fixes what appears to be a minor bug in the type-walk: direct
   interfaces that contain pointers (type is a *T and the data is also
   a *T or a [1]*T or a struct{ *T }) were passed on as interior
   pointers to the walk queue, but this could easily result in
   unmergable interior chunks and loss of type information. Instead, we
   know it's a pointer type then and there, and we can just walk it
   directly, queuing up the pointed-to type instead. This makes the
   type-walk just a little more precise.

Change-Id: I8842cdef5e92596c3c0b1365c5f5297604b83113
Reviewed-on: https://go-review.googlesource.com/c/debug/+/635737
Reviewed-by: Nicolas Hillegeer <aktau@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
Reviewed-by: Michael Pratt <mpratt@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
diff --git a/internal/core/process.go b/internal/core/process.go
index 22bb8b5..92bf9b3 100644
--- a/internal/core/process.go
+++ b/internal/core/process.go
@@ -273,7 +273,7 @@
 		return nil, fmt.Errorf("error reading args: %v", err)
 	}
 
-	syms, symErr := readSymbols(&mem, coreFile)
+	syms, symErr := readSymbols(&mem, coreFile, staticBase)
 
 	dwarf, dwarfErr := exeElf.DWARF()
 	if dwarfErr != nil {
@@ -735,7 +735,7 @@
 	return threads
 }
 
-func readSymbols(mem *splicedMemory, coreFile *os.File) (map[string]Address, error) {
+func readSymbols(mem *splicedMemory, coreFile *os.File, staticBase uint64) (map[string]Address, error) {
 	seen := map[*os.File]struct{}{
 		// Don't bother trying to read symbols from the core itself.
 		coreFile: struct{}{},
@@ -766,7 +766,7 @@
 			continue
 		}
 		for _, s := range syms {
-			allSyms[s.Name] = Address(s.Value)
+			allSyms[s.Name] = Address(s.Value).Add(int64(staticBase))
 		}
 	}
 
diff --git a/internal/gocore/dwarf.go b/internal/gocore/dwarf.go
index b9fead4..e3fc321 100644
--- a/internal/gocore/dwarf.go
+++ b/internal/gocore/dwarf.go
@@ -19,16 +19,22 @@
 )
 
 const (
-	AttrGoKind dwarf.Attr = 0x2900
+	AttrGoKind        dwarf.Attr = 0x2900
+	AttrGoRuntimeType dwarf.Attr = 0x2904
 )
 
-// read DWARF types from core dump.
-func readDWARFTypes(p *core.Process) (map[dwarf.Type]*Type, error) {
+func readDWARFTypes(p *core.Process) (map[dwarf.Type]*Type, map[core.Address]*Type, error) {
 	d, err := p.DWARF()
 	if err != nil {
-		return nil, fmt.Errorf("failed to read DWARF: %v", err)
+		return nil, nil, fmt.Errorf("failed to read DWARF: %v", err)
 	}
 	dwarfMap := make(map[dwarf.Type]*Type)
+	addrMap := make(map[core.Address]*Type)
+	syms, _ := p.Symbols() // It's OK to ignore the error. If we don't have symbols, that's OK; it's a soft error.
+	// It's OK if typBase is 0 and not present. We won't be able to type the heap, probably,
+	// but it may still be useful (though painful) to someone to try and debug the core, so don't
+	// error out here.
+	typBase := syms["runtime.types"]
 
 	// Make one of our own Types for each dwarf type.
 	r := d.Reader()
@@ -48,6 +54,17 @@
 			if goKind, ok := e.Val(AttrGoKind).(int64); ok {
 				t.goKind = reflect.Kind(goKind)
 			}
+			// Guard against typBase being zero. In reality, it's very unlikely for the DWARF
+			// to be present but typBase to be zero, but let's just be defensive. addrMap is
+			// only really necessary for typing the heap, which is "optional."
+			if typBase != 0 {
+				if offset, ok := e.Val(AttrGoRuntimeType).(uint64); ok {
+					// N.B. AttrGoRuntimeType is not defined for typedefs, so addrMap will
+					// always refer to the base type.
+					t.goAddr = typBase.Add(int64(offset))
+					addrMap[typBase.Add(int64(offset))] = t
+				}
+			}
 			dwarfMap[dt] = t
 			types = append(types, t)
 		}
@@ -83,7 +100,7 @@
 							Elem:  dwarfMap[arr.Type],
 						}
 					} else {
-						return nil, fmt.Errorf(
+						return nil, nil, fmt.Errorf(
 							"found a nil ftype for field %s.%s, type %s (%s) on ",
 							x.StructName, f.Name, f.Type, reflect.TypeOf(f.Type))
 					}
@@ -105,7 +122,7 @@
 		case *dwarf.TypedefType:
 			// handle these types in the loop below
 		default:
-			return nil, fmt.Errorf("unknown type %s %T", dt, dt)
+			return nil, nil, fmt.Errorf("unknown type %s %T", dt, dt)
 		}
 	}
 
@@ -164,7 +181,7 @@
 			t.Fields = nil
 		}
 	}
-	return dwarfMap, nil
+	return dwarfMap, addrMap, nil
 }
 
 func isNonGoCU(e *dwarf.Entry) bool {
@@ -347,8 +364,17 @@
 	return roots, nil
 }
 
+type dwarfVarKind int
+
+const (
+	dwarfVarUnknown dwarfVarKind = iota
+	dwarfParam
+	dwarfLocal
+)
+
 type dwarfVar struct {
 	lowPC, highPC core.Address
+	kind          dwarfVarKind
 	name          string
 	instr         []byte
 	typ           *Type
@@ -398,6 +424,13 @@
 		if e.Tag != dwarf.TagVariable && e.Tag != dwarf.TagFormalParameter {
 			continue
 		}
+		var kind dwarfVarKind
+		switch e.Tag {
+		case dwarf.TagFormalParameter:
+			kind = dwarfParam
+		case dwarf.TagVariable:
+			kind = dwarfLocal
+		}
 		aloc := e.AttrField(dwarf.AttrLocation)
 		if aloc == nil {
 			continue
@@ -440,6 +473,7 @@
 			vars[curfn] = append(vars[curfn], dwarfVar{
 				lowPC:  core.Address(e.LowPC + base),
 				highPC: core.Address(e.HighPC + base),
+				kind:   kind,
 				instr:  e.Instr,
 				name:   name,
 				typ:    dwarfTypeMap[dt],
diff --git a/internal/gocore/goroutine.go b/internal/gocore/goroutine.go
index 053e274..b3eebb6 100644
--- a/internal/gocore/goroutine.go
+++ b/internal/gocore/goroutine.go
@@ -88,15 +88,16 @@
 
 // A Func represents a Go function.
 type Func struct {
-	r         region // inferior region holding a runtime._func
-	module    *module
-	name      string
-	entry     core.Address
-	frameSize pcTab // map from pc to frame size at that pc
-	pcdata    []int32
-	funcdata  []core.Address
-	stackMap  pcTab // map from pc to stack map # (index into locals and args bitmaps)
-	closure   *Type // the type to use for closures of this function. Lazily allocated.
+	r              region // inferior region holding a runtime._func
+	module         *module
+	name           string
+	entry          core.Address
+	frameSize      pcTab // map from pc to frame size at that pc
+	pcdata         []int32
+	funcdata       []core.Address
+	stackMap       pcTab // map from pc to stack map # (index into locals and args bitmaps)
+	methodValueFor *Func // if != nil, this Func is a method value wrapper, and *Func is the method
+	closure        *Type // the type to use for closures of this function. Lazily allocated.
 }
 
 // Name returns the name of the function, as reported by DWARF.
diff --git a/internal/gocore/module.go b/internal/gocore/module.go
index 84994dc..214c7c0 100644
--- a/internal/gocore/module.go
+++ b/internal/gocore/module.go
@@ -22,6 +22,7 @@
 	n := ms.SliceLen()
 	var modules []*module
 	var fnTab funcTab
+	fnTab.byName = make(map[string]*Func)
 	for i := int64(0); i < n; i++ {
 		md := ms.SliceIndex(i).Deref()
 		modules = append(modules, readModule(md, &fnTab, rtTypeByName, rtConsts))
@@ -145,10 +146,12 @@
 
 type funcTab struct {
 	entries []funcTabEntry
+	byName  map[string]*Func
 }
 
 // add records that PCs in the range [min,max) map to function f.
 func (t *funcTab) add(min, max core.Address, f *Func) {
+	t.byName[f.name] = f
 	t.entries = append(t.entries, funcTabEntry{min: min, max: max, f: f})
 }
 
@@ -159,7 +162,7 @@
 	})
 }
 
-// Finds a Func for the given address.  Sort must have been called already.
+// find finds a Func for the given address. sort must have been called already.
 func (t *funcTab) find(pc core.Address) *Func {
 	n := sort.Search(len(t.entries), func(i int) bool {
 		return t.entries[i].max > pc
@@ -170,6 +173,11 @@
 	return t.entries[n].f
 }
 
+// findByName finds a Func for the given name.
+func (t *funcTab) findByName(name string) *Func {
+	return t.byName[name]
+}
+
 // a pcTab maps from an offset in a function to an int64.
 type pcTab struct {
 	entries []pcTabEntry
diff --git a/internal/gocore/process.go b/internal/gocore/process.go
index b00d04d..3a5b55c 100644
--- a/internal/gocore/process.go
+++ b/internal/gocore/process.go
@@ -45,12 +45,13 @@
 	// Maps core.Address to functions.
 	funcTab *funcTab
 
+	// DWARF variables in each function.
+	dwarfVars map[*Func][]dwarfVar
+
 	// Fundamental type mappings extracted from the core.
 	dwarfTypeMap map[dwarf.Type]*Type
-	rtTypeByName map[string]*Type
-
-	// rtTypeMap maps a core.Address to a *Type. Constructed incrementally, on-demand.
-	rtTypeMap map[core.Address]*Type
+	rtTypeByName map[string]*Type // Core runtime types only, from DWARF.
+	rtTypeMap    map[core.Address]*Type
 
 	// Memory usage breakdown.
 	stats *Statistic
@@ -79,24 +80,17 @@
 	p = &Process{proc: proc}
 
 	// Initialize everything that just depends on DWARF.
-	p.dwarfTypeMap, err = readDWARFTypes(proc)
+	p.dwarfTypeMap, p.rtTypeMap, err = readDWARFTypes(proc)
 	if err != nil {
 		return nil, err
 	}
 	p.rtTypeByName = make(map[string]*Type)
 	for dt, t := range p.dwarfTypeMap {
-		name := gocoreName(dt)
-		if _, ok := p.rtTypeByName[name]; ok {
-			// If a runtime type matches more than one DWARF type,
-			// pick one arbitrarily.
-			//
-			// This looks mostly harmless. DWARF has some redundant entries.
-			// For example, [32]uint8 appears twice.
-			//
-			// TODO(mknyszek): Investigate the reason for this duplication.
-			continue
+		// Make an index of some low-level types we'll need unambiguous access to.
+		if name := gocoreName(dt); strings.HasPrefix(name, "runtime.") || strings.HasPrefix(name, "internal/abi.") || strings.HasPrefix(name, "unsafe.") || !strings.Contains(name, ".") {
+			// Sometimes there's duplicate types in the DWARF. That's fine, they're always the same.
+			p.rtTypeByName[name] = t
 		}
-		p.rtTypeByName[name] = t
 	}
 	p.rtConsts, err = readConstants(proc)
 	if err != nil {
@@ -134,13 +128,13 @@
 	}
 
 	// Read stack and register variables from DWARF.
-	dwarfVars, err := readDWARFVars(proc, p.funcTab, p.dwarfTypeMap)
+	p.dwarfVars, err = readDWARFVars(proc, p.funcTab, p.dwarfTypeMap)
 	if err != nil {
 		return nil, err
 	}
 
 	// Read goroutines.
-	p.goroutines, err = readGoroutines(p, dwarfVars)
+	p.goroutines, err = readGoroutines(p, p.dwarfVars)
 	if err != nil {
 		return nil, err
 	}
@@ -177,18 +171,6 @@
 	return p.funcTab.find(pc)
 }
 
-func (p *Process) findType(name string) *Type {
-	typ := p.tryFindType(name)
-	if typ == nil {
-		panic("can't find type " + name)
-	}
-	return typ
-}
-
-func (p *Process) tryFindType(name string) *Type {
-	return p.rtTypeByName[name]
-}
-
 // arena is a summary of the size of components of a heapArena.
 type arena struct {
 	heapMin      core.Address
@@ -324,7 +306,7 @@
 	mallocHeaderSize := int64(p.rtConsts.get("runtime.mallocHeaderSize"))
 	maxSmallSize := int64(p.rtConsts.get("runtime.maxSmallSize"))
 
-	abiType := p.tryFindType("internal/abi.Type")
+	abiType := p.rtTypeByName["internal/abi.Type"]
 
 	// Process spans.
 	if pageSize%heapInfoSize != 0 {
@@ -420,7 +402,7 @@
 					&Root{
 						Name:  fmt.Sprintf("finalizer for %x", obj),
 						Addr:  sp.a,
-						Type:  p.findType("runtime.specialfinalizer"),
+						Type:  p.rtTypeByName["runtime.specialfinalizer"],
 						Frame: nil,
 					})
 				// TODO: these aren't really "globals", as they
@@ -502,7 +484,7 @@
 		case spanManual:
 			stats.manualSpanSize += spanSize
 			stats.manualAllocSize += spanSize
-			for x := core.Address(s.Field("manualFreeList").Cast(p.findType("uintptr")).Uintptr()); x != 0; x = p.proc.ReadPtr(x) {
+			for x := core.Address(s.Field("manualFreeList").Cast(p.rtTypeByName["uintptr"]).Uintptr()); x != 0; x = p.proc.ReadPtr(x) {
 				stats.manualAllocSize -= elemSize
 				stats.manualFreeSize += elemSize
 			}
@@ -753,7 +735,7 @@
 			r := &Root{
 				Name:  "unk",
 				Addr:  a,
-				Type:  p.findType("unsafe.Pointer"),
+				Type:  p.rtTypeByName["unsafe.Pointer"],
 				Frame: f,
 			}
 			f.roots = append(f.roots, r)
@@ -915,7 +897,7 @@
 		// runtime.getStackSize to detect errors when we are missing
 		// the stackmap.
 		if addr != 0 {
-			locals := region{p: p.proc, a: addr, typ: p.findType("runtime.stackmap")}
+			locals := region{p: p.proc, a: addr, typ: p.rtTypeByName["runtime.stackmap"]}
 			n := locals.Field("n").Int32()       // # of bitmaps
 			nbit := locals.Field("nbit").Int32() // # of bits per bitmap
 			idx, err := f.stackMap.find(off)
@@ -944,7 +926,7 @@
 	if x := int(p.rtConsts.get("internal/abi.FUNCDATA_ArgsPointerMaps")); x < len(f.funcdata) {
 		addr := f.funcdata[x]
 		if addr != 0 {
-			args := region{p: p.proc, a: addr, typ: p.findType("runtime.stackmap")}
+			args := region{p: p.proc, a: addr, typ: p.rtTypeByName["runtime.stackmap"]}
 			n := args.Field("n").Int32()       // # of bitmaps
 			nbit := args.Field("nbit").Int32() // # of bits per bitmap
 			idx, err := f.stackMap.find(off)
diff --git a/internal/gocore/type.go b/internal/gocore/type.go
index 7f66383..1d63740 100644
--- a/internal/gocore/type.go
+++ b/internal/gocore/type.go
@@ -7,7 +7,6 @@
 import (
 	"fmt"
 	"reflect"
-	"regexp"
 	"strings"
 
 	"golang.org/x/debug/internal/core"
@@ -20,10 +19,13 @@
 	Name string
 	Size int64
 	Kind Kind // common dwarf types.
-	// go-specific types obtained from AttrGoKind, such as string and slice.
+	// Go-specific types obtained from AttrGoKind, such as string and slice.
 	// Kind and gokind are not correspond one to one, both need to be preserved now.
 	// For example, slices are described in dwarf by a 3-field struct, so its Kind is Struct and its goKind is Slice.
 	goKind reflect.Kind
+	// Go-specific types obtained from AttrGoRuntimeType.
+	// May be nil if this type is not referenced by the DWARF.
+	goAddr core.Address
 
 	// Fields only valid for a subset of kinds.
 	Count  int64   // for kind == KindArray
@@ -142,7 +144,7 @@
 // findRuntimeType finds either abi.Type (Go 1.21+) or runtime._type.
 func (p *Process) findRuntimeType(a core.Address) runtimeType {
 	return runtimeType{
-		reg: region{p: p.proc, a: a, typ: p.tryFindType("internal/abi.Type")},
+		reg: region{p: p.proc, a: a, typ: p.rtTypeByName["internal/abi.Type"]},
 	}
 }
 
@@ -185,7 +187,7 @@
 // findItab finds either abi.ITab (Go 1.21+) or runtime.itab.
 func (p *Process) findItab() runtimeItab {
 	return runtimeItab{
-		typ: p.tryFindType("internal/abi.ITab"),
+		typ: p.rtTypeByName["internal/abi.ITab"],
 	}
 }
 
@@ -202,6 +204,7 @@
 	if t := p.rtTypeMap[a]; t != nil {
 		return t
 	}
+	// There's no corresponding DWARF type. Make our own.
 
 	// Read runtime._type.size
 	r := p.findRuntimeType(a)
@@ -250,48 +253,36 @@
 		}
 	}
 
-	// Find a Type that matches this type.
-	// (The matched type will be one constructed from DWARF info.)
-	// It must match name, size, and pointer bits.
-	t := p.rtTypeByName[name]
-	if t != nil && (size != t.Size || !equal(ptrs, t.ptrs())) {
-		t = nil
+	// Build the type from the name, size, and ptr/nonptr bits.
+	t := &Type{Name: name, Size: size, Kind: KindStruct}
+	n := t.Size / ptrSize
+
+	// Types to use for ptr/nonptr fields of runtime types which
+	// have no corresponding DWARF type.
+	ptr := p.rtTypeByName["unsafe.Pointer"]
+	nonptr := p.rtTypeByName["uintptr"]
+	if ptr == nil || nonptr == nil {
+		panic("ptr / nonptr standins missing")
 	}
-	if t == nil {
-		// There's no corresponding DWARF type.  Make our own.
-		t = &Type{Name: name, Size: size, Kind: KindStruct}
-		n := t.Size / ptrSize
 
-		// Types to use for ptr/nonptr fields of runtime types which
-		// have no corresponding DWARF type.
-		ptr := p.findType("unsafe.Pointer")
-		nonptr := p.findType("uintptr")
-		if ptr == nil || nonptr == nil {
-			panic("ptr / nonptr standins missing")
+	for i := int64(0); i < n; i++ {
+		typ := nonptr
+		if len(ptrs) > 0 && ptrs[0] == i*ptrSize {
+			typ = ptr
+			ptrs = ptrs[1:]
 		}
+		t.Fields = append(t.Fields, Field{
+			Name: fmt.Sprintf("f%d", i),
+			Off:  i * ptrSize,
+			Type: typ,
+		})
 
-		for i := int64(0); i < n; i++ {
-			typ := nonptr
-			if len(ptrs) > 0 && ptrs[0] == i*ptrSize {
-				typ = ptr
-				ptrs = ptrs[1:]
-			}
-			t.Fields = append(t.Fields, Field{
-				Name: fmt.Sprintf("f%d", i),
-				Off:  i * ptrSize,
-				Type: typ,
-			})
-
-		}
-		if t.Size%ptrSize != 0 {
-			// TODO: tail of <ptrSize data.
-		}
+	}
+	if t.Size%ptrSize != 0 {
+		// TODO: tail of <ptrSize data.
 	}
 
 	// Memoize.
-	if p.rtTypeMap == nil {
-		p.rtTypeMap = make(map[core.Address]*Type)
-	}
 	p.rtTypeMap[a] = t
 	return t
 }
@@ -585,7 +576,6 @@
 			// merged with the 0-offset typing.  TODO: make more use of this info.
 		}
 	}
-
 }
 
 type reader interface {
@@ -610,23 +600,8 @@
 	return fr.p.proc.ReadInt(a)
 }
 
-// Match wrapper function name for method value.
-// eg. main.(*Bar).func-fm, or main.Bar.func-fm.
-var methodRegexp = regexp.MustCompile(`(\w+)\.(\(\*)?(\w+)(\))?\.\w+\-fm$`)
-
-// Extract the type of the method value from its wrapper function name,
-// return the type named *main.Bar or main.Bar, in the previous cases.
-func extractTypeFromFunctionName(method string, p *Process) *Type {
-	if matches := methodRegexp.FindStringSubmatch(method); len(matches) == 5 {
-		var typeName string
-		if matches[2] == "(*" && matches[4] == ")" {
-			typeName = "*" + matches[1] + "." + matches[3]
-		} else {
-			typeName = matches[1] + "." + matches[3]
-		}
-		return p.rtTypeByName[typeName]
-	}
-	return nil
+func methodFromMethodValueWrapper(name string) (string, bool) {
+	return strings.CutSuffix(name, "-fm")
 }
 
 // ifaceIndir reports whether t is stored indirectly in an interface value.
@@ -683,12 +658,16 @@
 					}
 				}
 			}
-			if directTyp.Kind != KindFunc && directTyp.Kind != KindPtr {
-				panic(fmt.Sprintf("type of direct interface, originally %s (kind %s), isn't a pointer: %s (kind %s)", typ, typ.Kind, directTyp, directTyp.Kind))
+			if directTyp.Kind == KindPtr {
+				p.typeObject(data, directTyp, r, add)
+				break
 			}
-			break
+			if directTyp.Kind == KindFunc {
+				add(data, directTyp, 1)
+				break
+			}
+			panic(fmt.Sprintf("type of direct interface, originally %s (kind %s), isn't a pointer: %s (kind %s)", typ, typ.Kind, directTyp, directTyp.Kind))
 		}
-		add(data, directTyp, 1)
 	case KindString:
 		ptr := r.ReadPtr(a)
 		len := r.ReadInt(a.Add(ptrSize))
@@ -724,11 +703,20 @@
 			f.closure = ft
 		}
 		p.typeObject(closure, ft, r, add)
-		// handle the special case for method value.
+
+		// Handle the special case for method value.
 		// It's a single-entry closure laid out like {pc uintptr, x T}.
-		if typ := extractTypeFromFunctionName(f.name, p); typ != nil {
-			ptr := closure.Add(p.proc.PtrSize())
-			p.typeObject(ptr, typ, r, add)
+		if method, ok := methodFromMethodValueWrapper(f.name); ok {
+			mf := p.funcTab.findByName(method)
+			if mf != nil {
+				for _, v := range p.dwarfVars[mf] {
+					if v.kind == dwarfParam {
+						ptr := closure.Add(p.proc.PtrSize())
+						p.typeObject(ptr, v.typ, r, add)
+						break
+					}
+				}
+			}
 		}
 	case KindArray:
 		n := t.Elem.Size