gocore: add gocore reader Add a library that can extract a bunch of information from a Go core dump. Update golang/go#21356 Change-Id: I9997c80452a4ad205d0799f89b2e268b4a5a9e2f Reviewed-on: https://go-review.googlesource.com/73191 Reviewed-by: Austin Clements <austin@google.com>
diff --git a/gocore/dwarf.go b/gocore/dwarf.go new file mode 100644 index 0000000..6aa0fdd --- /dev/null +++ b/gocore/dwarf.go
@@ -0,0 +1,541 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "debug/dwarf" + "fmt" + "sort" + "strings" + + "golang.org/x/debug/core" +) + +// read DWARF types from core dump. +func (p *Process) readDWARFTypes() { + d, _ := p.proc.DWARF() + + // Make one of our own Types for each dwarf type. + r := d.Reader() + var types []*Type + for e, err := r.Next(); e != nil && err == nil; e, err = r.Next() { + switch e.Tag { + case dwarf.TagArrayType, dwarf.TagPointerType, dwarf.TagStructType, dwarf.TagBaseType, dwarf.TagSubroutineType, dwarf.TagTypedef: + dt, err := d.Type(e.Offset) + if err != nil { + continue + } + t := &Type{Name: gocoreName(dt), Size: dwarfSize(dt, p.proc.PtrSize())} + p.dwarfMap[dt] = t + types = append(types, t) + } + } + + // Fill in fields of types. Postponed until now so we're sure + // we have all the Types allocated and available. + for dt, t := range p.dwarfMap { + switch x := dt.(type) { + case *dwarf.ArrayType: + t.Kind = KindArray + t.Elem = p.dwarfMap[x.Type] + t.Count = x.Count + case *dwarf.PtrType: + t.Kind = KindPtr + // unsafe.Pointer has a void base type. + if _, ok := x.Type.(*dwarf.VoidType); !ok { + t.Elem = p.dwarfMap[x.Type] + } + case *dwarf.StructType: + t.Kind = KindStruct + for _, f := range x.Field { + t.Fields = append(t.Fields, Field{Name: f.Name, Type: p.dwarfMap[f.Type], Off: f.ByteOffset}) + } + case *dwarf.BoolType: + t.Kind = KindBool + case *dwarf.IntType: + t.Kind = KindInt + case *dwarf.UintType: + t.Kind = KindUint + case *dwarf.FloatType: + t.Kind = KindFloat + case *dwarf.ComplexType: + t.Kind = KindComplex + case *dwarf.FuncType: + t.Kind = KindFunc + case *dwarf.TypedefType: + // handle these types in the loop below + default: + panic(fmt.Sprintf("unknown type %s %T", dt, dt)) + } + } + + // Detect strings & slices + for _, t := range types { + if t.Kind != KindStruct { + continue + } + if t.Name == "string" { // TODO: also "struct runtime.stringStructDWARF" ? + t.Kind = KindString + t.Elem = t.Fields[0].Type.Elem // TODO: check that it is always uint8. + t.Fields = nil + } + if len(t.Name) >= 9 && t.Name[:9] == "struct []" || + len(t.Name) >= 2 && t.Name[:2] == "[]" { + t.Kind = KindSlice + t.Elem = t.Fields[0].Type.Elem + t.Fields = nil + } + } + + // Copy info from base types into typedefs. + r = d.Reader() + for dt, t := range p.dwarfMap { + tt, ok := dt.(*dwarf.TypedefType) + if !ok { + continue + } + base := tt.Type + // Walk typedef chain until we reach a non-typedef type. + for { + if x, ok := base.(*dwarf.TypedefType); ok { + base = x.Type + continue + } + break + } + bt := p.dwarfMap[base] + + // Copy type info from base. Everything except the name. + name := t.Name + *t = *bt + t.Name = name + + // Detect some special types. If the base is some particular type, + // then the alias gets marked as special. + // We have aliases like: + // interface {} -> struct runtime.eface + // error -> struct runtime.iface + // Note: the base itself does not get marked as special. + // (Unlike strings and slices, where they do.) + if bt.Name == "runtime.eface" { + t.Kind = KindEface + t.Fields = nil + } + if bt.Name == "runtime.iface" { + t.Kind = KindIface + t.Fields = nil + } + } + + // Make a runtime name -> Type map for existing DWARF types. + p.runtimeNameMap = map[string][]*Type{} + for dt, t := range p.dwarfMap { + name := runtimeName(dt) + p.runtimeNameMap[name] = append(p.runtimeNameMap[name], t) + } + + // Construct the runtime.specialfinalizer type. It won't be found + // in DWARF before 1.10 because it does not appear in the type of any variable. + // type specialfinalizer struct { + // special special + // fn *funcval + // nret uintptr + // fint *_type + // ot *ptrtype + // } + if p.runtimeNameMap["runtime.specialfinalizer"] == nil { + special := p.findType("runtime.special") + p.runtimeNameMap["runtime.specialfinalizer"] = []*Type{ + &Type{ + Name: "runtime.specialfinalizer", + Size: special.Size + 4*p.proc.PtrSize(), + Kind: KindStruct, + Fields: []Field{ + Field{ + Name: "special", + Off: 0, + Type: special, + }, + Field{ + Name: "fn", + Off: special.Size, + Type: p.findType("*runtime.funcval"), + }, + Field{ + Name: "nret", + Off: special.Size + p.proc.PtrSize(), + Type: p.findType("uintptr"), + }, + Field{ + Name: "fint", + Off: special.Size + 2*p.proc.PtrSize(), + Type: p.findType("*runtime._type"), + }, + Field{ + Name: "fn", + Off: special.Size + 3*p.proc.PtrSize(), + Type: p.findType("*runtime.ptrtype"), + }, + }, + }, + } + } +} + +// dwarfSize is used to compute the size of a DWARF type. +// dt.Size() is wrong when it returns a negative number. +// This function implements just enough to correct the bad behavior. +func dwarfSize(dt dwarf.Type, ptrSize int64) int64 { + s := dt.Size() + if s >= 0 { + return s + } + switch x := dt.(type) { + case *dwarf.FuncType: + return ptrSize // Fix for issue 21097. + case *dwarf.ArrayType: + return x.Count * dwarfSize(x.Type, ptrSize) + case *dwarf.TypedefType: + return dwarfSize(x.Type, ptrSize) + default: + panic(fmt.Sprintf("unhandled: %T", x)) + } +} + +// gocoreName generates the name this package uses to refer to a dwarf type. +// This name differs from the dwarf name in that it stays closer to the Go name for the type. +// For instance (dwarf name -> gocoreName) +// struct runtime.siginfo -> runtime.siginfo +// *void -> unsafe.Pointer +// struct struct { runtime.signalLock uint32; runtime.hz int32 } -> struct { signalLock uint32; hz int32 } +func gocoreName(dt dwarf.Type) string { + switch x := dt.(type) { + case *dwarf.PtrType: + if _, ok := x.Type.(*dwarf.VoidType); ok { + return "unsafe.Pointer" + } + return "*" + gocoreName(x.Type) + case *dwarf.ArrayType: + return fmt.Sprintf("[%d]%s", x.Count, gocoreName(x.Type)) + case *dwarf.StructType: + if !strings.HasPrefix(x.StructName, "struct {") { + // This is a named type, return that name. + return x.StructName + } + // Build gocore name from the DWARF fields. + s := "struct {" + first := true + for _, f := range x.Field { + if !first { + s += ";" + } + name := f.Name + if i := strings.Index(name, "."); i >= 0 { + // Remove pkg path from field names. + name = name[i+1:] + } + s += fmt.Sprintf(" %s %s", name, gocoreName(f.Type)) + first = false + } + s += " }" + return s + default: + return dt.String() + } +} + +// Generate the name the runtime uses for a dwarf type. The DWARF generator +// and the runtime use slightly different names for the same underlying type. +func runtimeName(dt dwarf.Type) string { + switch x := dt.(type) { + case *dwarf.PtrType: + if _, ok := x.Type.(*dwarf.VoidType); ok { + return "unsafe.Pointer" + } + return "*" + runtimeName(x.Type) + case *dwarf.ArrayType: + return fmt.Sprintf("[%d]%s", x.Count, runtimeName(x.Type)) + case *dwarf.StructType: + if !strings.HasPrefix(x.StructName, "struct {") { + // This is a named type, return that name. + return x.StructName + } + // Figure out which fields have anonymous names. + var anon []bool + for _, f := range strings.Split(x.StructName[8:len(x.StructName)-1], ";") { + f = strings.TrimSpace(f) + anon = append(anon, !strings.Contains(f, " ")) + // TODO: this isn't perfect. If the field type has a space in it, + // then this logic doesn't work. Need to search for keyword for + // field type, like "interface", "struct", ... + } + // Make sure anon is long enough. This probably never triggers. + for len(anon) < len(x.Field) { + anon = append(anon, false) + } + + // Build runtime name from the DWARF fields. + s := "struct {" + first := true + for _, f := range x.Field { + if !first { + s += ";" + } + name := f.Name + if i := strings.Index(name, "."); i >= 0 { + name = name[i+1:] + } + if anon[0] { + s += fmt.Sprintf(" %s", runtimeName(f.Type)) + } else { + s += fmt.Sprintf(" %s %s", name, runtimeName(f.Type)) + } + first = false + anon = anon[1:] + } + s += " }" + return s + default: + name := dt.String() + if i := strings.LastIndex(name, "/"); i >= 0 { + name = name[i+1:] // Runtime uses only last name in package path. + } + return name + } +} + +// readRuntimeConstants populates the p.rtConstants map. +func (p *Process) readRuntimeConstants() { + p.rtConstants = map[string]int64{} + + // Hardcoded values for Go 1.9. + // (Go did not have constants in DWARF before 1.10.) + m := p.rtConstants + m["_MSpanDead"] = 0 + m["_MSpanInUse"] = 1 + m["_MSpanManual"] = 2 + m["_MSpanFree"] = 3 + m["_Gidle"] = 0 + m["_Grunnable"] = 1 + m["_Grunning"] = 2 + m["_Gsyscall"] = 3 + m["_Gwaiting"] = 4 + m["_Gdead"] = 6 + m["_Gscan"] = 0x1000 + m["_PCDATA_StackMapIndex"] = 0 + m["_FUNCDATA_LocalsPointerMaps"] = 1 + m["_FUNCDATA_ArgsPointerMaps"] = 0 + m["tflagExtraStar"] = 1 << 1 + m["kindGCProg"] = 1 << 6 + m["kindDirectIface"] = 1 << 5 + m["_PageSize"] = 1 << 13 + m["_KindSpecialFinalizer"] = 1 + + // From 1.10, these constants are recorded in DWARF records. + d, _ := p.proc.DWARF() + r := d.Reader() + for e, err := r.Next(); e != nil && err == nil; e, err = r.Next() { + if e.Tag != dwarf.TagConstant { + continue + } + name := e.AttrField(dwarf.AttrName).Val.(string) + if !strings.HasPrefix(name, "runtime.") { + continue + } + name = name[8:] + c := e.AttrField(dwarf.AttrConstValue) + if c == nil { + continue + } + p.rtConstants[name] = c.Val.(int64) + } +} + +const ( + _DW_OP_addr = 0x03 + _DW_OP_call_frame_cfa = 0x9c + _DW_OP_plus = 0x22 + _DW_OP_consts = 0x11 +) + +func (p *Process) readGlobals() { + d, _ := p.proc.DWARF() + r := d.Reader() + for e, err := r.Next(); e != nil && err == nil; e, err = r.Next() { + if e.Tag != dwarf.TagVariable { + continue + } + loc := e.AttrField(dwarf.AttrLocation).Val.([]byte) + if len(loc) == 0 || loc[0] != _DW_OP_addr { + continue + } + var a core.Address + if p.proc.PtrSize() == 8 { + a = core.Address(p.proc.ByteOrder().Uint64(loc[1:])) + } else { + a = core.Address(p.proc.ByteOrder().Uint32(loc[1:])) + } + if !p.proc.Writeable(a) { + // Read-only globals can't have heap pointers. + // TODO: keep roots around anyway? + continue + } + dt, err := d.Type(e.AttrField(dwarf.AttrType).Val.(dwarf.Offset)) + if err != nil { + panic(err) + } + if _, ok := dt.(*dwarf.UnspecifiedType); ok { + continue // Ignore markers like data/edata. + } + p.globals = append(p.globals, &Root{ + Name: e.AttrField(dwarf.AttrName).Val.(string), + Addr: a, + Type: p.dwarfMap[dt], + Frame: nil, + }) + } +} + +func (p *Process) readStackVars() { + type Var struct { + name string + off int64 + typ *Type + } + vars := map[*Func][]Var{} + var curfn *Func + d, _ := p.proc.DWARF() + r := d.Reader() + for e, err := r.Next(); e != nil && err == nil; e, err = r.Next() { + if e.Tag == dwarf.TagSubprogram { + min := core.Address(e.AttrField(dwarf.AttrLowpc).Val.(uint64)) + max := core.Address(e.AttrField(dwarf.AttrHighpc).Val.(uint64)) + f := p.funcTab.find(min) + if f == nil { + // some func Go doesn't know about. C? + curfn = nil + } else { + if f.entry != min { + panic("dwarf and runtime don't agree about start of " + f.name) + } + if p.funcTab.find(max-1) != f { + panic("function ranges don't match for " + f.name) + } + curfn = f + } + continue + } + if e.Tag != dwarf.TagVariable && e.Tag != dwarf.TagFormalParameter { + continue + } + aloc := e.AttrField(dwarf.AttrLocation) + if aloc == nil { + continue + } + // Interpret locations of the form + // DW_OP_call_frame_cfa + // DW_OP_consts <off> + // DW_OP_plus + // (with possibly missing DW_OP_consts & DW_OP_plus for the zero offset.) + // TODO: handle other possible locations (e.g. register locations). + loc := aloc.Val.([]byte) + if len(loc) == 0 || loc[0] != _DW_OP_call_frame_cfa { + continue + } + loc = loc[1:] + var off int64 + if len(loc) != 0 && loc[0] == _DW_OP_consts { + loc = loc[1:] + var s uint + for len(loc) > 0 { + b := loc[0] + loc = loc[1:] + off += int64(b&0x7f) << s + s += 7 + if b&0x80 == 0 { + break + } + } + off = off << (64 - s) >> (64 - s) + if len(loc) == 0 || loc[0] != _DW_OP_plus { + continue + } + loc = loc[1:] + } + if len(loc) != 0 { + continue // more stuff we don't recognize + } + dt, err := d.Type(e.AttrField(dwarf.AttrType).Val.(dwarf.Offset)) + if err != nil { + panic(err) + } + name := e.AttrField(dwarf.AttrName).Val.(string) + vars[curfn] = append(vars[curfn], Var{name: name, off: off, typ: p.dwarfMap[dt]}) + } + + // Get roots from goroutine stacks. + for _, g := range p.goroutines { + for _, f := range g.frames { + // Start with all pointer slots as unnamed. + unnamed := map[core.Address]bool{} + for a := range f.Live { + unnamed[a] = true + } + // Emit roots for DWARF entries. + for _, v := range vars[f.f] { + r := &Root{ + Name: v.name, + Addr: f.max.Add(v.off), + Type: v.typ, + Frame: f, + } + f.roots = append(f.roots, r) + // Remove this variable from the set of unnamed pointers. + for a := r.Addr; a < r.Addr.Add(r.Type.Size); a = a.Add(p.proc.PtrSize()) { + delete(unnamed, a) + } + } + // Emit roots for unnamed pointer slots in the frame. + // Make deterministic by sorting first. + s := make([]core.Address, 0, len(unnamed)) + for a := range unnamed { + s = append(s, a) + } + sort.Slice(s, func(i, j int) bool { return s[i] < s[j] }) + for _, a := range s { + r := &Root{ + Name: "unk", + Addr: a, + Type: p.findType("unsafe.Pointer"), + Frame: f, + } + f.roots = append(f.roots, r) + } + } + } +} + +/* Dwarf encoding notes + +type XXX sss + +translates to a dwarf type pkg.XXX of the type of sss (uint, float, ...) + +exception: if sss is a struct or array, then we get two types, the "unnamed" and "named" type. +The unnamed type is a dwarf struct type with name "struct pkg.XXX" or a dwarf array type with +name [N]elem. +Then there is a typedef with pkg.XXX pointing to "struct pkg.XXX" or [N]elem. + +For structures, lowercase field names are prepended with the package name (pkg path?). + +type XXX interface{} +pkg.XXX is a typedef to "struct runtime.eface" +type XXX interface{f()} +pkg.XXX is a typedef to "struct runtime.iface" + +Sometimes there is even a chain of identically-named typedefs. I have no idea why. +main.XXX -> main.XXX -> struct runtime.iface + +*/
diff --git a/gocore/gocore_test.go b/gocore/gocore_test.go new file mode 100644 index 0000000..ab6d980 --- /dev/null +++ b/gocore/gocore_test.go
@@ -0,0 +1,165 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "reflect" + "testing" + + "golang.org/x/debug/core" +) + +// loadTest loads a simple core file which resulted from running the +// following program on linux/amd64 with go 1.9.0 (the earliest supported runtime): +// package main +// func main() { +// _ = *(*int)(nil) +// } +func loadExample(t *testing.T) *Process { + c, err := core.Core("testdata/core", "testdata") + if err != nil { + t.Fatalf("can't load test core file: %s", err) + } + p, err := Core(c, FlagTypes|FlagReverse) + if err != nil { + t.Fatalf("can't parse Go core: %s", err) + } + return p +} + +func TestObjects(t *testing.T) { + p := loadExample(t) + n := 0 + p.ForEachObject(func(x Object) bool { + n++ + return true + }) + if n != 104 { + t.Errorf("#objects = %d, want 104", n) + } +} + +func TestRoots(t *testing.T) { + p := loadExample(t) + n := 0 + p.ForEachRoot(func(r *Root) bool { + n++ + return true + }) + if n != 257 { + t.Errorf("#roots = %d, want 257", n) + } +} + +// TestConfig checks the configuration accessors. +func TestConfig(t *testing.T) { + p := loadExample(t) + if v := p.BuildVersion(); v != "go1.9" { + t.Errorf("version=%s, wanted go1.9", v) + } + if n := p.Stats().Size; n != 2732032 { + t.Errorf("all stats=%d, want 2732032", n) + } +} + +func TestFindFunc(t *testing.T) { + p := loadExample(t) + a := core.Address(0x404000) + f := p.FindFunc(a) + if f == nil { + t.Errorf("can't find function at %x", a) + return + } + if n := f.Name(); n != "runtime.recvDirect" { + t.Errorf("funcname(%x)=%s, want runtime.recvDirect", a, n) + } +} + +func TestTypes(t *testing.T) { + p := loadExample(t) + // Check the type of a few objects. + for _, s := range [...]struct { + addr core.Address + size int64 + kind Kind + name string + repeat int64 + }{ + {0xc420000480, 384, KindStruct, "runtime.g", 1}, + {0xc42000a020, 32, KindPtr, "*runtime.g", 4}, + {0xc420082000, 96, KindStruct, "hchan<bool>", 1}, + {0xc420062000, 64, KindStruct, "runtime._defer", 1}, + } { + x, i := p.FindObject(s.addr) + if x == 0 { + t.Errorf("can't find object at %x", s.addr) + continue + } + if i != 0 { + t.Errorf("offset(%x)=%d, want 0", s.addr, i) + } + if p.Size(x) != s.size { + t.Errorf("size(%x)=%d, want %d", s.addr, p.Size(x), s.size) + } + typ, repeat := p.Type(x) + if typ.Kind != s.kind { + t.Errorf("kind(%x)=%s, want %s", s.addr, typ.Kind, s.kind) + } + if typ.Name != s.name { + t.Errorf("name(%x)=%s, want %s", s.addr, typ.Name, s.name) + } + if repeat != s.repeat { + t.Errorf("repeat(%x)=%d, want %d", s.addr, repeat, s.repeat) + } + + y, i := p.FindObject(s.addr + 1) + if y != x { + t.Errorf("can't find object at %x", s.addr+1) + } + if i != 1 { + t.Errorf("offset(%x)=%d, want i", s.addr, i) + } + } +} + +func TestReverse(t *testing.T) { + p := loadExample(t) + + // Build the pointer map. + // m[x]=y means address x has a pointer to address y. + m1 := map[core.Address]core.Address{} + p.ForEachObject(func(x Object) bool { + p.ForEachPtr(x, func(i int64, y Object, j int64) bool { + m1[p.Addr(x).Add(i)] = p.Addr(y).Add(j) + return true + }) + return true + }) + p.ForEachRoot(func(r *Root) bool { + p.ForEachRootPtr(r, func(i int64, y Object, j int64) bool { + m1[r.Addr.Add(i)] = p.Addr(y).Add(j) + return true + }) + return true + }) + + // Build the same, with reverse entries. + m2 := map[core.Address]core.Address{} + p.ForEachObject(func(y Object) bool { + p.ForEachReversePtr(y, func(x Object, r *Root, i, j int64) bool { + if r != nil { + m2[r.Addr.Add(i)] = p.Addr(y).Add(j) + } else { + m2[p.Addr(x).Add(i)] = p.Addr(y).Add(j) + } + return true + }) + return true + }) + + if !reflect.DeepEqual(m1, m2) { + t.Errorf("forward and reverse edges don't match") + } +}
diff --git a/gocore/goroutine.go b/gocore/goroutine.go new file mode 100644 index 0000000..b4891eb --- /dev/null +++ b/gocore/goroutine.go
@@ -0,0 +1,108 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "golang.org/x/debug/core" +) + +type Goroutine struct { + r region // inferior region holding the runtime.g + stackSize int64 // current stack allocation + frames []*Frame + + // TODO: defers, in-progress panics +} + +// Stack returns the total allocated stack for g. +func (g *Goroutine) Stack() int64 { + return g.stackSize +} + +// Addr returns the address of the runtime.g that identifies this goroutine. +func (g *Goroutine) Addr() core.Address { + return g.r.a +} + +// Frames returns the list of frames on the stack of the Goroutine. +// The first frame is the most recent one. +// This list is post-optimization, so any inlined calls, tail calls, etc. +// will not appear. +func (g *Goroutine) Frames() []*Frame { + return g.frames +} + +// A Frame represents the local variables of a single Go function invocation. +// (Note that in the presence of inlining, a Frame may contain local variables +// for more than one Go function invocation.) +type Frame struct { + parent *Frame + f *Func // function whose activation record this frame is + pc core.Address // resumption point + min, max core.Address // extent of stack frame + + // Set of locations that contain a live pointer. Note that this set + // may contain locations outside the frame (in particular, the args + // for the frame). + Live map[core.Address]bool + + roots []*Root // GC roots in this frame + + // TODO: keep vars from dwarf around? +} + +// Func returns the function for which this frame is an activation record. +func (f *Frame) Func() *Func { + return f.f +} + +// Min returns the minimum address of this frame. +func (f *Frame) Min() core.Address { + return f.min +} + +// Max returns the maximum address of this frame. +func (f *Frame) Max() core.Address { + return f.max +} + +// PC returns the program counter of the next instruction to be executed by this frame. +func (f *Frame) PC() core.Address { + return f.pc +} + +// Roots returns a list of all the garbage collection roots in the frame. +func (f *Frame) Roots() []*Root { + return f.roots +} + +// Parent returns the parent frame of f, or nil if it is the top of the stack. +func (f *Frame) Parent() *Frame { + return f.parent +} + +// A Func represents a Go function. +type Func struct { + r region // inferior region holding a runtime._func + module *module + name string + entry core.Address + frameSize pcTab // map from pc to frame size at that pc + pcdata []int32 + funcdata []core.Address + stackMap pcTab // map from pc to stack map # (index into locals and args bitmaps) + closure *Type // the type to use for closures of this function. Lazily allocated. +} + +// Name returns the name of the function, as reported by DWARF. +// Names are opaque; do not depend on the format of the returned name. +func (f *Func) Name() string { + return f.name +} + +// Entry returns the address of the entry point of f. +func (f *Func) Entry() core.Address { + return f.entry +}
diff --git a/gocore/module.go b/gocore/module.go new file mode 100644 index 0000000..93b5451 --- /dev/null +++ b/gocore/module.go
@@ -0,0 +1,192 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "fmt" + "sort" + + "golang.org/x/debug/core" +) + +type module struct { + r region // inferior region holding a runtime.moduledata + types, etypes core.Address // range that holds all the runtime._type data in this module +} + +func (p *Process) readModules() { + // Note: the cast is necessary for cores generated by Go 1.9, where + // runtime.moduledata is just an unsafe.Pointer. + ms := p.rtGlobals["modulesSlice"].Cast("*[]*runtime.moduledata").Deref() + n := ms.SliceLen() + for i := int64(0); i < n; i++ { + md := ms.SliceIndex(i).Deref() + p.modules = append(p.modules, p.readModule(md)) + } + p.funcTab.sort() +} + +func (p *Process) readModule(r region) *module { + m := &module{r: r} + m.types = core.Address(r.Field("types").Uintptr()) + m.etypes = core.Address(r.Field("etypes").Uintptr()) + + // Read the pc->function table + pcln := r.Field("pclntable") + ftab := r.Field("ftab") + n := ftab.SliceLen() - 1 // last slot is a dummy, just holds entry + for i := int64(0); i < n; i++ { + ft := ftab.SliceIndex(i) + min := core.Address(ft.Field("entry").Uintptr()) + max := core.Address(ftab.SliceIndex(i + 1).Field("entry").Uintptr()) + fr := pcln.SliceIndex(int64(ft.Field("funcoff").Uintptr())).Cast("runtime._func") + f := m.readFunc(fr, pcln) + if f.entry != min { + panic(fmt.Errorf("entry %x and min %x don't match for %s", f.entry, min, f.name)) + } + p.funcTab.add(min, max, f) + } + + return m +} + +// readFunc parses a runtime._func and returns a *Func. +// r must have type runtime._func. +// pcln must have type []byte and represent the module's pcln table region. +func (m *module) readFunc(r region, pcln region) *Func { + f := &Func{module: m, r: r} + f.entry = core.Address(r.Field("entry").Uintptr()) + f.name = r.p.proc.ReadCString(pcln.SliceIndex(int64(r.Field("nameoff").Int32())).a) + f.frameSize.read(r.p.proc, pcln.SliceIndex(int64(r.Field("pcsp").Int32())).a) + + // Parse pcdata and funcdata, which are laid out beyond the end of the _func. + a := r.a.Add(int64(r.p.findType("runtime._func").Size)) + n := r.Field("npcdata").Int32() + for i := int32(0); i < n; i++ { + f.pcdata = append(f.pcdata, r.p.proc.ReadInt32(a)) + a = a.Add(4) + } + a = a.Align(r.p.proc.PtrSize()) + n = r.Field("nfuncdata").Int32() + for i := int32(0); i < n; i++ { + f.funcdata = append(f.funcdata, r.p.proc.ReadPtr(a)) + a = a.Add(r.p.proc.PtrSize()) + } + + // Read pcln tables we need. + if stackmap := int(r.p.rtConstants["_PCDATA_StackMapIndex"]); stackmap < len(f.pcdata) { + f.stackMap.read(r.p.proc, pcln.SliceIndex(int64(f.pcdata[stackmap])).a) + } else { + f.stackMap.setEmpty() + } + + return f +} + +type funcTabEntry struct { + min, max core.Address + f *Func +} + +type funcTab struct { + entries []funcTabEntry +} + +// add records that PCs in the range [min,max) map to function f. +func (t *funcTab) add(min, max core.Address, f *Func) { + t.entries = append(t.entries, funcTabEntry{min: min, max: max, f: f}) +} + +// sort must be called after all the adds, but before any find. +func (t *funcTab) sort() { + sort.Slice(t.entries, func(i, j int) bool { + return t.entries[i].min < t.entries[j].min + }) +} + +// Finds a Func for the given address. Sort must have been called already. +func (t *funcTab) find(pc core.Address) *Func { + n := sort.Search(len(t.entries), func(i int) bool { + return t.entries[i].max > pc + }) + if n == len(t.entries) || pc < t.entries[n].min || pc >= t.entries[n].max { + return nil + } + return t.entries[n].f +} + +// a pcTab maps from an offset in a function to an int64. +type pcTab struct { + entries []pcTabEntry +} + +type pcTabEntry struct { + bytes int64 // # of bytes this entry covers + val int64 // value over that range of bytes +} + +// read parses a pctab from the core file at address data. +func (t *pcTab) read(core *core.Process, data core.Address) { + var pcQuantum int64 + switch core.Arch() { + case "386", "amd64", "amd64p32": + pcQuantum = 1 + case "s390x": + pcQuantum = 2 + case "arm", "arm64", "mips", "mipsle", "mips64", "mips64le", "ppc64", "ppc64le": + pcQuantum = 4 + default: + panic("unknown architecture " + core.Arch()) + } + val := int64(-1) + first := true + for { + // Advance value. + v, n := readVarint(core, data) + if v == 0 && !first { + return + } + data = data.Add(n) + if v&1 != 0 { + val += ^(v >> 1) + } else { + val += v >> 1 + } + + // Advance pc. + v, n = readVarint(core, data) + data = data.Add(n) + t.entries = append(t.entries, pcTabEntry{bytes: v * pcQuantum, val: val}) + first = false + } +} + +func (t *pcTab) setEmpty() { + t.entries = []pcTabEntry{{bytes: 1<<63 - 1, val: -1}} +} + +func (t *pcTab) find(off int64) int64 { + for _, e := range t.entries { + if off < e.bytes { + return e.val + } + off -= e.bytes + } + panic("can't find pctab entry") +} + +// readVarint reads a varint from the core file. +// val is the value, n is the number of bytes consumed. +func readVarint(core *core.Process, a core.Address) (val, n int64) { + for { + b := core.ReadUint8(a) + val |= int64(b&0x7f) << uint(n*7) + n++ + a++ + if b&0x80 == 0 { + return + } + } +}
diff --git a/gocore/object.go b/gocore/object.go new file mode 100644 index 0000000..6990e33 --- /dev/null +++ b/gocore/object.go
@@ -0,0 +1,339 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "math/bits" + "strings" + + "golang.org/x/debug/core" +) + +// An Object represents a single object in the Go heap. +type Object core.Address + +// markObjects finds all the live objects in the heap and marks them +// in the p.heapInfo mark fields. +func (p *Process) markObjects() { + ptrSize := p.proc.PtrSize() + + // number of live objects found so far + n := 0 + // total size of live objects + var live int64 + + var q []Object + + // Function to call when we find a new pointer. + add := func(x core.Address) { + h := p.findHeapInfo(x) + if h == nil || h.base == 0 { // not in heap or not in a valid span + // Invalid spans can happen with intra-stack pointers. + return + } + // Round down to object start. + x = h.base.Add(x.Sub(h.base) / h.size * h.size) + // Object start may map to a different info. Reload. + h = p.findHeapInfo(x) + // Find mark bit + b := uint64(x) % heapInfoSize / 8 + if h.mark&(uint64(1)<<b) != 0 { // already found + return + } + h.mark |= uint64(1) << b + n++ + live += h.size + q = append(q, Object(x)) + } + + // Start with scanning all the roots. + // Note that we don't just use the DWARF roots, just in case DWARF isn't complete. + // Instead we use exactly what the runtime uses. + + // Goroutine roots + for _, g := range p.goroutines { + for _, f := range g.frames { + for a := range f.Live { + add(p.proc.ReadPtr(a)) + } + } + } + + // Global roots + for _, m := range p.modules { + for _, s := range [2]string{"data", "bss"} { + min := core.Address(m.r.Field(s).Uintptr()) + max := core.Address(m.r.Field("e" + s).Uintptr()) + gc := m.r.Field("gc" + s + "mask").Field("bytedata").Address() + num := max.Sub(min) / ptrSize + for i := int64(0); i < num; i++ { + if p.proc.ReadUint8(gc.Add(i/8))>>uint(i%8)&1 != 0 { + add(p.proc.ReadPtr(min.Add(i * ptrSize))) + } + } + } + } + + // Finalizers + for _, r := range p.globals { + if !strings.HasPrefix(r.Name, "finalizer for ") { + continue + } + for _, f := range r.Type.Fields { + if f.Type.Kind == KindPtr { + add(p.proc.ReadPtr(r.Addr.Add(f.Off))) + } + } + } + + // Expand root set to all reachable objects. + for len(q) > 0 { + x := q[len(q)-1] + q = q[:len(q)-1] + + // Scan object for pointers. + size := p.Size(x) + for i := int64(0); i < size; i += ptrSize { + a := core.Address(x).Add(i) + if p.isPtrFromHeap(a) { + add(p.proc.ReadPtr(a)) + } + } + } + + p.nObj = n + + // Initialize firstIdx fields in the heapInfo, for fast object index lookups. + for i := len(p.heapInfo) - 1; i >= 0; i-- { + h := &p.heapInfo[i] + if h.mark == 0 { // not really necessary, just leave -1 sentinel as a double check. + continue + } + n -= bits.OnesCount64(h.mark) + h.firstIdx = n + } + + // Update stats to include the live/garbage distinction. + alloc := p.Stats().Child("heap").Child("in use spans").Child("alloc") + alloc.Children = []*Stats{ + &Stats{"live", live, nil}, + &Stats{"garbage", alloc.Size - live, nil}, + } +} + +// isPtrFromHeap reports whether the inferior at address a contains a pointer. +// a must be somewhere in the heap. +func (p *Process) isPtrFromHeap(a core.Address) bool { + // Convert arena offset in words to bitmap offset in bits. + off := a.Sub(p.arenaStart) + off >>= p.proc.LogPtrSize() + + // Find bit in bitmap. It goes backwards from the end. + // Each byte contains pointer/nonpointer bits for 4 words in its low nybble. + return p.proc.ReadUint8(p.bitmapEnd.Add(-(off>>2)-1))>>uint(off&3)&1 != 0 +} + +// IsPtr reports whether the inferior at address a contains a pointer. +func (p *Process) IsPtr(a core.Address) bool { + if a >= p.arenaStart && a < p.arenaUsed { + return p.isPtrFromHeap(a) + } + for _, m := range p.modules { + for _, s := range [2]string{"data", "bss"} { + min := core.Address(m.r.Field(s).Uintptr()) + max := core.Address(m.r.Field("e" + s).Uintptr()) + if a < min || a >= max { + continue + } + gc := m.r.Field("gc" + s + "mask").Field("bytedata").Address() + i := a.Sub(min) + return p.proc.ReadUint8(gc.Add(i/8))>>uint(i%8) != 0 + } + } + // Everywhere else can't be a pointer. At least, not a pointer into the Go heap. + // TODO: stacks? + // TODO: finalizers? + return false +} + +// FindObject finds the object containing a. Returns that object and the offset within +// that object to which a points. +// Returns 0,0 if a doesn't point to a live heap object. +func (p *Process) FindObject(a core.Address) (Object, int64) { + // Round down to the start of an object. + h := p.findHeapInfo(a) + if h == nil || h.size == 0 { + // Not in Go heap, or in a span + // that doesn't hold Go objects (freed, stacks, ...) + return 0, 0 + } + x := h.base.Add(a.Sub(h.base) / h.size * h.size) + // Check if object is marked. + h = p.findHeapInfo(x) + if h.mark>>(uint64(x)%heapInfoSize/8)&1 == 0 { + return 0, 0 + } + return Object(x), a.Sub(x) +} + +func (p *Process) findObjectIndex(a core.Address) (int, int64) { + x, off := p.FindObject(a) + if x == 0 { + return -1, 0 + } + h := p.findHeapInfo(core.Address(x)) + return h.firstIdx + bits.OnesCount64(h.mark&(uint64(1)<<(uint64(x)%heapInfoSize/8)-1)), off +} + +// ForEachObject calls fn with each object in the Go heap. +// If fn returns false, ForEachObject returns immediately. +func (p *Process) ForEachObject(fn func(x Object) bool) { + for i := 0; i < len(p.heapInfo); i++ { + m := p.heapInfo[i].mark + for m != 0 { + j := bits.TrailingZeros64(m) + m &= m - 1 + if !fn(Object(p.arenaStart.Add(int64(i)*heapInfoSize + int64(j)*8))) { + return + } + } + } +} + +// ForEachRoot calls fn with each garbage collection root. +// If fn returns false, ForEachRoot returns immediately. +func (p *Process) ForEachRoot(fn func(r *Root) bool) { + for _, r := range p.globals { + if !fn(r) { + return + } + } + for _, g := range p.goroutines { + for _, f := range g.frames { + for _, r := range f.roots { + if !fn(r) { + return + } + } + } + } +} + +// Addr returns the starting address of x. +func (p *Process) Addr(x Object) core.Address { + return core.Address(x) +} + +// Size returns the size of x in bytes. +func (p *Process) Size(x Object) int64 { + return p.findHeapInfo(core.Address(x)).size +} + +// Type returns the type and repeat count for the object x. +// x contains at least repeat copies of the returned type. +// FlagTypes must have been passed to Core when p was constructed. +func (p *Process) Type(x Object) (*Type, int64) { + i, _ := p.findObjectIndex(core.Address(x)) + return p.types[i].t, p.types[i].r +} + +// ForEachPtr calls fn for all heap pointers it finds in x. +// It calls fn with: +// the offset of the pointer slot in x +// the pointed-to object y +// the offset in y where the pointer points. +// If fn returns false, ForEachPtr returns immediately. +// For an edge from an object to its finalizer, the first argument +// passed to fn will be -1. (TODO: implement) +func (p *Process) ForEachPtr(x Object, fn func(int64, Object, int64) bool) { + size := p.Size(x) + for i := int64(0); i < size; i += p.proc.PtrSize() { + a := core.Address(x).Add(i) + if !p.isPtrFromHeap(a) { + continue + } + ptr := p.proc.ReadPtr(a) + y, off := p.FindObject(ptr) + if y != 0 { + if !fn(i, y, off) { + return + } + } + } +} + +// ForEachRootPtr behaves like ForEachPtr but it starts with a Root instead of an Object. +func (p *Process) ForEachRootPtr(r *Root, fn func(int64, Object, int64) bool) { + edges1(p, r, 0, r.Type, fn) +} + +// edges1 calls fn for the edges found in an object of type t living at offset off in the root r. +// If fn returns false, return immediately with false. +func edges1(p *Process, r *Root, off int64, t *Type, fn func(int64, Object, int64) bool) bool { + switch t.Kind { + case KindBool, KindInt, KindUint, KindFloat, KindComplex: + // no edges here + case KindIface, KindEface: + // The first word is a type or itab. + // Itabs are never in the heap. + // Types might be, though. + a := r.Addr.Add(off) + if r.Frame == nil || r.Frame.Live[a] { + dst, off2 := p.FindObject(p.proc.ReadPtr(a)) + if dst != 0 { + if !fn(off, dst, off2) { + return false + } + } + } + // Treat second word like a pointer. + off += p.proc.PtrSize() + fallthrough + case KindPtr, KindString, KindSlice, KindFunc: + a := r.Addr.Add(off) + if r.Frame == nil || r.Frame.Live[a] { + dst, off2 := p.FindObject(p.proc.ReadPtr(a)) + if dst != 0 { + if !fn(off, dst, off2) { + return false + } + } + } + case KindArray: + s := t.Elem.Size + for i := int64(0); i < t.Count; i++ { + if !edges1(p, r, off+i*s, t.Elem, fn) { + return false + } + } + case KindStruct: + for _, f := range t.Fields { + if !edges1(p, r, off+f.Off, f.Type, fn) { + return false + } + } + } + return true +} + +const heapInfoSize = 512 + +// Information for heapInfoSize bytes of heap. +type heapInfo struct { + base core.Address // start of the span containing this heap region + size int64 // size of objects in the span + mark uint64 // 64 mark bits, one for every 8 bytes + firstIdx int // the index of the first object that starts in this region (-1 if none) +} + +// findHeapInfo finds the heapInfo structure for a. +// Returns nil if a is not a heap address. +func (p *Process) findHeapInfo(a core.Address) *heapInfo { + if a < p.arenaStart || a >= p.arenaUsed { + return nil + } + i := a.Sub(p.arenaStart) / heapInfoSize + return &p.heapInfo[i] +}
diff --git a/gocore/process.go b/gocore/process.go new file mode 100644 index 0000000..2e061c8 --- /dev/null +++ b/gocore/process.go
@@ -0,0 +1,546 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "debug/dwarf" + "fmt" + "strings" + + "golang.org/x/debug/core" +) + +// A Process represents the state of a Go process that core dumped. +type Process struct { + proc *core.Process + + arenaStart core.Address + arenaUsed core.Address + bitmapEnd core.Address + + // data structure for fast object finding + heapInfo []heapInfo + + // number of live objects + nObj int + + goroutines []*Goroutine + + // runtime info + rtGlobals map[string]region + rtConstants map[string]int64 + + // A module is a loadable unit. Most Go programs have 1, programs + // which load plugins will have more. + modules []*module + + // address -> function mapping + funcTab funcTab + + // map from dwarf type to *Type + dwarfMap map[dwarf.Type]*Type + + // map from address of runtime._type to *Type + runtimeMap map[core.Address]*Type + + // map from runtime type name to the set of *Type with that name + // Used to find candidates to put in the runtimeMap map. + runtimeNameMap map[string][]*Type + + // memory usage by category + stats *Stats + + buildVersion string + + globals []*Root + + // Types of each object, indexed by object index. + // Only initialized if FlagTypes is passed to Core. + types []typeInfo + + // Reverse edges. + // The reverse edges for object #i are redge[ridx[i]:ridx[i+1]]. + // A "reverse edge" for object #i is a location in memory where a pointer + // to object #i lives. + // Only initialized if FlagReverse is passed to Core. + redge []core.Address + ridx []int64 + // Sorted list of all roots. + // Only initialized if FlagReverse is passed to Core. + rootIdx []*Root +} + +// Process returns the core.Process used to construct this Process. +func (p *Process) Process() *core.Process { + return p.proc +} + +func (p *Process) Goroutines() []*Goroutine { + return p.goroutines +} + +// Stats returns a breakdown of the program's memory use by category. +func (p *Process) Stats() *Stats { + return p.stats +} + +// BuildVersion returns the Go version that was used to build the inferior binary. +func (p *Process) BuildVersion() string { + return p.buildVersion +} + +func (p *Process) Globals() []*Root { + return p.globals +} + +// FindFunc returns the function which contains the code at address pc, if any. +func (p *Process) FindFunc(pc core.Address) *Func { + return p.funcTab.find(pc) +} + +func (p *Process) findType(name string) *Type { + s := p.runtimeNameMap[name] + if len(s) == 0 { + panic("can't find type " + name) + } + return s[0] +} + +// A Flags indicates optional analyses for Core to compute. +type Flags uint8 + +const ( + // FlagTypes requests that Core compute type information for all Go objects, + // required to use the Type function. + // Setting this flag will require more initialization time and use more memory. + FlagTypes Flags = 1 << iota + // FlagReverse requests that Core compute reverse edge information, + // required to use ForEachReversePtr. + // Setting this flag will require more initialization time and use more memory. + FlagReverse +) + +// Core takes a loaded core file and extracts Go information from it. +// flags is a bitmask of data that should be extracted from the core. +func Core(proc *core.Process, flags Flags) (p *Process, err error) { + // Make sure we have DWARF info. + if _, err := proc.DWARF(); err != nil { + return nil, err + } + + // Guard against failures of proc.Read* routines. + /* + defer func() { + e := recover() + if e == nil { + return + } + p = nil + if x, ok := e.(error); ok { + err = x + return + } + panic(e) // Not an error, re-panic it. + }() + */ + + p = &Process{ + proc: proc, + runtimeMap: map[core.Address]*Type{}, + dwarfMap: map[dwarf.Type]*Type{}, + } + + // Initialize everything that just depends on DWARF. + p.readDWARFTypes() + p.readRuntimeConstants() + p.readGlobals() + + // Find runtime globals we care about. Initialize regions for them. + p.rtGlobals = map[string]region{} + for _, g := range p.globals { + if strings.HasPrefix(g.Name, "runtime.") { + p.rtGlobals[g.Name[8:]] = region{p: p, a: g.Addr, typ: g.Type} + } + } + + // Read all the data that depend on runtime globals. + p.buildVersion = p.rtGlobals["buildVersion"].String() + p.readModules() + p.readSpans() + p.readGs() + p.readStackVars() // needs to be after readGs. + p.markObjects() // needs to be after readGlobals, readStackVars. + if flags&FlagTypes != 0 { + p.typeHeap() // needs to be after markObjects. + } + if flags&FlagReverse != 0 { + p.reverseEdges() // needs to be after markObjects. + } + + return p, nil +} + +func (p *Process) readSpans() { + mheap := p.rtGlobals["mheap_"] + + spanTableStart := mheap.Field("spans").SlicePtr().Address() + spanTableEnd := spanTableStart.Add(mheap.Field("spans").SliceCap() * p.proc.PtrSize()) + arenaStart := core.Address(mheap.Field("arena_start").Uintptr()) + arenaUsed := core.Address(mheap.Field("arena_used").Uintptr()) + arenaEnd := core.Address(mheap.Field("arena_end").Uintptr()) + bitmapEnd := core.Address(mheap.Field("bitmap").Uintptr()) + bitmapStart := bitmapEnd.Add(-int64(mheap.Field("bitmap_mapped").Uintptr())) + + p.arenaStart = arenaStart + p.arenaUsed = arenaUsed + p.bitmapEnd = bitmapEnd + + var all int64 + var text int64 + var readOnly int64 + var heap int64 + var spanTable int64 + var bitmap int64 + var data int64 + var bss int64 // also includes mmap'd regions + for _, m := range p.proc.Mappings() { + size := m.Size() + all += size + switch m.Perm() { + case core.Read: + readOnly += size + case core.Read | core.Exec: + text += size + case core.Read | core.Write: + if m.CopyOnWrite() { + // Check if m.file == text's file? That could distinguish + // data segment from mmapped file. + data += size + break + } + attribute := func(x, y core.Address, p *int64) { + a := x.Max(m.Min()) + b := y.Min(m.Max()) + if a < b { + *p += b.Sub(a) + size -= b.Sub(a) + } + } + attribute(spanTableStart, spanTableEnd, &spanTable) + attribute(arenaStart, arenaEnd, &heap) + attribute(bitmapStart, bitmapEnd, &bitmap) + // Any other anonymous mapping is bss. + // TODO: how to distinguish original bss from anonymous mmap? + bss += size + default: + panic("weird mapping " + m.Perm().String()) + } + } + pageSize := p.rtConstants["_PageSize"] + + // Span types + spanInUse := uint8(p.rtConstants["_MSpanInUse"]) + spanManual := uint8(p.rtConstants["_MSpanManual"]) + spanDead := uint8(p.rtConstants["_MSpanDead"]) + spanFree := uint8(p.rtConstants["_MSpanFree"]) + + // Process spans. + if pageSize%heapInfoSize != 0 { + panic(fmt.Sprintf("page size not a multiple of %d", heapInfoSize)) + } + p.heapInfo = make([]heapInfo, (p.arenaUsed-p.arenaStart)/heapInfoSize) + allspans := mheap.Field("allspans") + var allSpanSize int64 + var freeSpanSize int64 + var manualSpanSize int64 + var inUseSpanSize int64 + var allocSize int64 + var freeSize int64 + var spanRoundSize int64 + var manualAllocSize int64 + var manualFreeSize int64 + n := allspans.SliceLen() + for i := int64(0); i < n; i++ { + s := allspans.SliceIndex(i).Deref() + min := core.Address(s.Field("startAddr").Uintptr()) + elemSize := int64(s.Field("elemsize").Uintptr()) + nPages := int64(s.Field("npages").Uintptr()) + spanSize := nPages * pageSize + max := min.Add(spanSize) + allSpanSize += spanSize + switch s.Field("state").Uint8() { + case spanInUse: + inUseSpanSize += spanSize + n := int64(s.Field("nelems").Uintptr()) + // An object is allocated if it is marked as + // allocated or it is below freeindex. + x := s.Field("allocBits").Address() + alloc := make([]bool, n) + for i := int64(0); i < n; i++ { + alloc[i] = p.proc.ReadUint8(x.Add(i/8))>>uint(i%8)&1 != 0 + } + k := int64(s.Field("freeindex").Uintptr()) + for i := int64(0); i < k; i++ { + alloc[i] = true + } + for i := int64(0); i < n; i++ { + if alloc[i] { + allocSize += elemSize + } else { + freeSize += elemSize + } + } + spanRoundSize += spanSize - n*elemSize + for a := min; a < max; a += heapInfoSize { + p.heapInfo[(a.Sub(p.arenaStart))/heapInfoSize] = heapInfo{base: min, size: elemSize, firstIdx: -1} + } + + // Process special records. + for sp := s.Field("specials"); sp.Address() != 0; sp = sp.Field("next") { + sp = sp.Deref() // *special to special + if sp.Field("kind").Uint8() != uint8(p.rtConstants["_KindSpecialFinalizer"]) { + // All other specials (just profile records) can't point into the heap. + continue + } + obj := min.Add(int64(sp.Field("offset").Uint16())) + p.globals = append(p.globals, + &Root{ + Name: fmt.Sprintf("finalizer for %x", obj), + Addr: sp.a, + Type: p.findType("runtime.specialfinalizer"), + Frame: nil, + }) + // TODO: these aren't really "globals", as they + // are kept alive by the object they reference being alive. + // But we have no way of adding edges from an object to + // the corresponding finalizer data, so we punt on that thorny + // issue for now. + } + case spanFree: + freeSpanSize += spanSize + case spanDead: + // These are just deallocated span descriptors. They use no heap. + case spanManual: + manualSpanSize += spanSize + manualAllocSize += spanSize + for x := core.Address(s.Field("manualFreeList").Cast("uintptr").Uintptr()); x != 0; x = p.proc.ReadPtr(x) { + manualAllocSize -= elemSize + manualFreeSize += elemSize + } + } + } + + p.stats = &Stats{"all", all, []*Stats{ + &Stats{"text", text, nil}, + &Stats{"readonly", readOnly, nil}, + &Stats{"data", data, nil}, + &Stats{"bss", bss, nil}, + &Stats{"heap", heap, []*Stats{ + &Stats{"in use spans", inUseSpanSize, []*Stats{ + &Stats{"alloc", allocSize, nil}, + &Stats{"free", freeSize, nil}, + &Stats{"round", spanRoundSize, nil}, + }}, + &Stats{"manual spans", manualSpanSize, []*Stats{ + &Stats{"alloc", manualAllocSize, nil}, + &Stats{"free", manualFreeSize, nil}, + }}, + &Stats{"free spans", freeSpanSize, nil}, + }}, + &Stats{"ptr bitmap", bitmap, nil}, + &Stats{"span table", spanTable, nil}, + }} + + var check func(*Stats) + check = func(s *Stats) { + if len(s.Children) == 0 { + return + } + var sum int64 + for _, c := range s.Children { + sum += c.Size + } + if sum != s.Size { + panic(fmt.Sprintf("check failed for %s: %d vs %d", s.Name, s.Size, sum)) + } + for _, c := range s.Children { + check(c) + } + } + check(p.stats) +} + +func (p *Process) readGs() { + // TODO: figure out how to "flush" running Gs. + allgs := p.rtGlobals["allgs"] + n := allgs.SliceLen() + for i := int64(0); i < n; i++ { + r := allgs.SliceIndex(i).Deref() + g := p.readG(r) + if g == nil { + continue + } + p.goroutines = append(p.goroutines, g) + } +} + +func (p *Process) readG(r region) *Goroutine { + g := &Goroutine{r: r} + stk := r.Field("stack") + g.stackSize = int64(stk.Field("hi").Uintptr() - stk.Field("lo").Uintptr()) + + var osT *core.Thread // os thread working on behalf of this G (if any). + mp := r.Field("m") + if mp.Address() != 0 { + m := mp.Deref() + pid := m.Field("procid").Uint64() + // TODO check that m.curg points to g? + for _, t := range p.proc.Threads() { + if t.Pid() == pid { + osT = t + } + } + } + status := r.Field("atomicstatus").Uint32() + status &^= uint32(p.rtConstants["_Gscan"]) + var sp, pc core.Address + switch status { + case uint32(p.rtConstants["_Gidle"]): + return g + case uint32(p.rtConstants["_Grunnable"]), uint32(p.rtConstants["_Gwaiting"]): + sched := r.Field("sched") + sp = core.Address(sched.Field("sp").Uintptr()) + pc = core.Address(sched.Field("pc").Uintptr()) + case uint32(p.rtConstants["_Grunning"]): + sp = osT.SP() + pc = osT.PC() + // TODO: back up to the calling frame? + case uint32(p.rtConstants["_Gsyscall"]): + sp = core.Address(r.Field("syscallsp").Uintptr()) + pc = core.Address(r.Field("syscallpc").Uintptr()) + // TODO: or should we use the osT registers? + case uint32(p.rtConstants["_Gdead"]): + return nil + // TODO: copystack, others? + default: + // Unknown state. We can't read the frames, so just bail now. + // TODO: make this switch complete and then panic here. + // TODO: or just return nil? + return g + } + for { + f := p.readFrame(sp, pc) + if f.f.name == "runtime.goexit" { + break + } + if len(g.frames) > 0 { + g.frames[len(g.frames)-1].parent = f + } + g.frames = append(g.frames, f) + + if f.f.name == "runtime.sigtrampgo" { + // Continue traceback at location where the signal + // interrupted normal execution. + ctxt := p.proc.ReadPtr(sp.Add(16)) // 3rd arg + //ctxt is a *ucontext + mctxt := ctxt.Add(5 * 8) + // mctxt is a *mcontext + sp = p.proc.ReadPtr(mctxt.Add(15 * 8)) + pc = p.proc.ReadPtr(mctxt.Add(16 * 8)) + // TODO: totally arch-dependent! + } else { + sp = f.max + pc = core.Address(p.proc.ReadUintptr(sp - 8)) // TODO:amd64 only + } + if pc == 0 { + // TODO: when would this happen? + break + } + if f.f.name == "runtime.systemstack" { + // switch over to goroutine stack + sched := r.Field("sched") + sp = core.Address(sched.Field("sp").Uintptr()) + pc = core.Address(sched.Field("pc").Uintptr()) + } + } + return g +} + +func (p *Process) readFrame(sp, pc core.Address) *Frame { + f := p.funcTab.find(pc) + if f == nil { + panic(fmt.Errorf(" pc not found %x\n", pc)) + } + off := pc.Sub(f.entry) + size := f.frameSize.find(off) + size += p.proc.PtrSize() // TODO: on amd64, the pushed return address + + frame := &Frame{f: f, pc: pc, min: sp, max: sp.Add(size)} + + // Find live ptrs in locals + live := map[core.Address]bool{} + if x := int(p.rtConstants["_FUNCDATA_LocalsPointerMaps"]); x < len(f.funcdata) { + locals := region{p: p, a: f.funcdata[x], typ: p.findType("runtime.stackmap")} + n := locals.Field("n").Int32() // # of bitmaps + nbit := locals.Field("nbit").Int32() // # of bits per bitmap + idx := f.stackMap.find(off) + if idx < 0 { + idx = 0 + } + if idx < int64(n) { + bits := locals.Field("bytedata").a.Add(int64(nbit+7) / 8 * idx) + base := frame.max.Add(-16).Add(-int64(nbit) * p.proc.PtrSize()) + // TODO: -16 for amd64. Return address and parent's frame pointer + for i := int64(0); i < int64(nbit); i++ { + if p.proc.ReadUint8(bits.Add(i/8))>>uint(i&7)&1 != 0 { + live[base.Add(i*p.proc.PtrSize())] = true + } + } + } + } + // Same for args + if x := int(p.rtConstants["_FUNCDATA_ArgsPointerMaps"]); x < len(f.funcdata) { + args := region{p: p, a: f.funcdata[x], typ: p.findType("runtime.stackmap")} + n := args.Field("n").Int32() // # of bitmaps + nbit := args.Field("nbit").Int32() // # of bits per bitmap + idx := f.stackMap.find(off) + if idx < 0 { + idx = 0 + } + if idx < int64(n) { + bits := args.Field("bytedata").a.Add(int64(nbit+7) / 8 * idx) + base := frame.max + // TODO: add to base for LR archs. + for i := int64(0); i < int64(nbit); i++ { + if p.proc.ReadUint8(bits.Add(i/8))>>uint(i&7)&1 != 0 { + live[base.Add(i*p.proc.PtrSize())] = true + } + } + } + } + frame.Live = live + + return frame +} + +// A Stats struct is the node of a tree representing the entire memory +// usage of the Go program. Children of a node break its usage down +// by category. +// We maintain the invariant that, if there are children, +// Size == sum(c.Size for c in Children). +type Stats struct { + Name string + Size int64 + Children []*Stats +} + +func (s *Stats) Child(name string) *Stats { + for _, c := range s.Children { + if c.Name == name { + return c + } + } + return nil +}
diff --git a/gocore/region.go b/gocore/region.go new file mode 100644 index 0000000..b0fdc79 --- /dev/null +++ b/gocore/region.go
@@ -0,0 +1,159 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import "golang.org/x/debug/core" + +// A region is a piece of the virtual address space of the inferior. +// It has an address and a type. +// Note that it is the type of the thing in the region, +// not the type of the reference to the region. +type region struct { + p *Process + a core.Address + typ *Type +} + +// Address returns the address that a region of pointer type points to. +func (r region) Address() core.Address { + if r.typ.Kind != KindPtr { + panic("can't ask for the Address of a non-pointer " + r.typ.Name) + } + return r.p.proc.ReadPtr(r.a) +} + +// Int returns the int value stored in r. +func (r region) Int() int64 { + if r.typ.Kind != KindInt || r.typ.Size != r.p.proc.PtrSize() { + panic("not an int: " + r.typ.Name) + } + return r.p.proc.ReadInt(r.a) +} + +// Uintptr returns the uintptr value stored in r. +func (r region) Uintptr() uint64 { + if r.typ.Kind != KindUint || r.typ.Size != r.p.proc.PtrSize() { + panic("not a uintptr: " + r.typ.Name) + } + return r.p.proc.ReadUintptr(r.a) +} + +// Cast the region to the given type. +func (r region) Cast(typ string) region { + return region{p: r.p, a: r.a, typ: r.p.findType(typ)} +} + +// Deref loads from a pointer. r must contain a pointer. +func (r region) Deref() region { + if r.typ.Kind != KindPtr { + panic("can't deref on non-pointer: " + r.typ.Name) + } + if r.typ.Elem == nil { + panic("can't deref unsafe.Pointer") + } + p := r.p.proc.ReadPtr(r.a) + return region{p: r.p, a: p, typ: r.typ.Elem} +} + +// Uint64 returns the uint64 value stored in r. +// r must have type uint64. +func (r region) Uint64() uint64 { + if r.typ.Kind != KindUint || r.typ.Size != 8 { + panic("bad uint64 type " + r.typ.Name) + } + return r.p.proc.ReadUint64(r.a) +} + +// Uint32 returns the uint32 value stored in r. +// r must have type uint32. +func (r region) Uint32() uint32 { + if r.typ.Kind != KindUint || r.typ.Size != 4 { + panic("bad uint32 type " + r.typ.Name) + } + return r.p.proc.ReadUint32(r.a) +} + +// Int32 returns the int32 value stored in r. +// r must have type int32. +func (r region) Int32() int32 { + if r.typ.Kind != KindInt || r.typ.Size != 4 { + panic("bad int32 type " + r.typ.Name) + } + return r.p.proc.ReadInt32(r.a) +} + +// Uint16 returns the uint16 value stored in r. +// r must have type uint16. +func (r region) Uint16() uint16 { + if r.typ.Kind != KindUint || r.typ.Size != 2 { + panic("bad uint16 type " + r.typ.Name) + } + return r.p.proc.ReadUint16(r.a) +} + +// Uint8 returns the uint8 value stored in r. +// r must have type uint8. +func (r region) Uint8() uint8 { + if r.typ.Kind != KindUint || r.typ.Size != 1 { + panic("bad uint8 type " + r.typ.Name) + } + return r.p.proc.ReadUint8(r.a) +} + +// String returns the value of the string stored in r. +func (r region) String() string { + if r.typ.Kind != KindString { + panic("bad string type " + r.typ.Name) + } + p := r.p.proc.ReadPtr(r.a) + n := r.p.proc.ReadUintptr(r.a.Add(r.p.proc.PtrSize())) + b := make([]byte, n) + r.p.proc.ReadAt(b, p) + return string(b) +} + +// SliceIndex indexes a slice (a[n]). r must contain a slice. +// n must be in bounds for the slice. +func (r region) SliceIndex(n int64) region { + if r.typ.Kind != KindSlice { + panic("can't index a non-slice") + } + p := r.p.proc.ReadPtr(r.a) + return region{p: r.p, a: p.Add(n * r.typ.Elem.Size), typ: r.typ.Elem} +} + +// SlicePtr returns the pointer inside a slice. r must contain a slice. +func (r region) SlicePtr() region { + if r.typ.Kind != KindSlice { + panic("can't Ptr a non-slice") + } + return region{p: r.p, a: r.a, typ: &Type{Name: "*" + r.typ.Name[2:], Size: r.p.proc.PtrSize(), Kind: KindPtr, Elem: r.typ.Elem}} +} + +// SliceLen returns the length of a slice. r must contain a slice. +func (r region) SliceLen() int64 { + if r.typ.Kind != KindSlice { + panic("can't len a non-slice") + } + return r.p.proc.ReadInt(r.a.Add(r.p.proc.PtrSize())) +} + +// SliceCap returns the capacity of a slice. r must contain a slice. +func (r region) SliceCap() int64 { + if r.typ.Kind != KindSlice { + panic("can't cap a non-slice") + } + return r.p.proc.ReadInt(r.a.Add(2 * r.p.proc.PtrSize())) +} + +// Field returns the part of r which contains the field f. +// r must contain a struct, and f must be one of its fields. +func (r region) Field(f string) region { + finfo := r.typ.field(f) + if finfo == nil { + panic("can't find field " + r.typ.Name + "." + f) + } + return region{p: r.p, a: r.a.Add(finfo.Off), typ: finfo.Type} +}
diff --git a/gocore/reverse.go b/gocore/reverse.go new file mode 100644 index 0000000..6cda39d --- /dev/null +++ b/gocore/reverse.go
@@ -0,0 +1,112 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "sort" + + "golang.org/x/debug/core" +) + +func (p *Process) reverseEdges() { + // First, count the number of edges into each object. + // This allows for efficient packing of the reverse edge storage. + cnt := make([]int64, p.nObj+1) + p.ForEachObject(func(x Object) bool { + p.ForEachPtr(x, func(_ int64, y Object, _ int64) bool { + idx, _ := p.findObjectIndex(p.Addr(y)) + cnt[idx]++ + return true + }) + return true + }) + p.ForEachRoot(func(r *Root) bool { + p.ForEachRootPtr(r, func(_ int64, y Object, _ int64) bool { + idx, _ := p.findObjectIndex(p.Addr(y)) + cnt[idx]++ + return true + }) + return true + }) + + // Compute cumulative count of all incoming edges up to and including each object. + var n int64 + for idx, c := range cnt { + n += c + cnt[idx] = n + } + + // Allocate all the storage for the reverse edges. + p.redge = make([]core.Address, n) + + // Add edges to the lists. + p.ForEachObject(func(x Object) bool { + p.ForEachPtr(x, func(i int64, y Object, _ int64) bool { + idx, _ := p.findObjectIndex(p.Addr(y)) + e := cnt[idx] + e-- + cnt[idx] = e + p.redge[e] = p.Addr(x).Add(i) + return true + }) + return true + }) + p.ForEachRoot(func(r *Root) bool { + p.ForEachRootPtr(r, func(i int64, y Object, _ int64) bool { + idx, _ := p.findObjectIndex(p.Addr(y)) + e := cnt[idx] + e-- + cnt[idx] = e + p.redge[e] = r.Addr.Add(i) + return true + }) + return true + }) + // At this point, cnt contains the cumulative count of all edges up to + // but *not* including each object. + p.ridx = cnt + + // Make root index. + p.ForEachRoot(func(r *Root) bool { + p.rootIdx = append(p.rootIdx, r) + return true + }) + sort.Slice(p.rootIdx, func(i, j int) bool { return p.rootIdx[i].Addr < p.rootIdx[j].Addr }) +} + +// ForEachReversePtr calls fn for all pointers it finds pointing to y. +// It calls fn with: +// the object or root which points to y (exactly one will be non-nil) +// the offset i in that object or root where the pointer appears. +// the offset j in y where the pointer points. +// If fn returns false, ForEachReversePtr returns immediately. +// FlagReverse must have been passed to Core when p was constructed. +func (p *Process) ForEachReversePtr(y Object, fn func(x Object, r *Root, i, j int64) bool) { + idx, _ := p.findObjectIndex(p.Addr(y)) + for _, a := range p.redge[p.ridx[idx]:p.ridx[idx+1]] { + // Read pointer, compute offset in y. + ptr := p.proc.ReadPtr(a) + j := ptr.Sub(p.Addr(y)) + + // Find source of pointer. + x, i := p.FindObject(a) + if x != 0 { + // Source is an object. + if !fn(x, nil, i, j) { + return + } + continue + } + // Source is a root. + k := sort.Search(len(p.rootIdx), func(k int) bool { + r := p.rootIdx[k] + return a < r.Addr.Add(r.Type.Size) + }) + r := p.rootIdx[k] + if !fn(0, r, a.Sub(r.Addr), j) { + return + } + } +}
diff --git a/gocore/root.go b/gocore/root.go new file mode 100644 index 0000000..89a1ce8 --- /dev/null +++ b/gocore/root.go
@@ -0,0 +1,22 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "golang.org/x/debug/core" +) + +// A Root is an area of memory that might have pointers into the Go heap. +type Root struct { + Name string + Addr core.Address + Type *Type // always non-nil + // Frame, if non-nil, points to the frame in which this root lives. + // Roots with non-nil Frame fields refer to local variables on a stack. + // A stack root might be a large type, with some of its fields live and + // others dead. Consult Frame.Live to find out which pointers in a stack + // root are live. + Frame *Frame +}
diff --git a/gocore/testdata/README b/gocore/testdata/README new file mode 100644 index 0000000..11d9761 --- /dev/null +++ b/gocore/testdata/README
@@ -0,0 +1,14 @@ +This directory contains a simple core file for use in testing. + +It was generated by running the following program with go1.9.0. + +package main + +func main() { + _ = *(*int)(nil) +} + +The core file includes the executable by reference using an absolute +path. The executable was at /tmp/test, so that path is reproduced +here so the core dump reader can find the executable using this +testdata directory as the base directory.
diff --git a/gocore/testdata/core b/gocore/testdata/core new file mode 100644 index 0000000..55318de --- /dev/null +++ b/gocore/testdata/core Binary files differ
diff --git a/gocore/testdata/tmp/test b/gocore/testdata/tmp/test new file mode 100755 index 0000000..f3353b2 --- /dev/null +++ b/gocore/testdata/tmp/test Binary files differ
diff --git a/gocore/type.go b/gocore/type.go new file mode 100644 index 0000000..17c9c57 --- /dev/null +++ b/gocore/type.go
@@ -0,0 +1,609 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gocore + +import ( + "fmt" + "strings" + + "golang.org/x/debug/core" +) + +// A Type is the representation of the type of a Go object. +// Types are not necessarily canonical. +// Names are opaque; do not depend on the format of the returned name. +type Type struct { + Name string + Size int64 + Kind Kind + + // Fields only valid for a subset of kinds. + Count int64 // for kind == KindArray + Elem *Type // for kind == Kind{Ptr,Array,Slice,String}. nil for unsafe.Pointer. Always uint8 for KindString. + Fields []Field // for kind == KindStruct +} + +type Kind uint8 + +const ( + KindNone Kind = iota + KindBool + KindInt + KindUint + KindFloat + KindComplex + KindArray + KindPtr // includes chan, map, unsafe.Pointer + KindIface + KindEface + KindSlice + KindString + KindStruct + KindFunc +) + +func (k Kind) String() string { + return [...]string{ + "KindNone", + "KindBool", + "KindInt", + "KindUint", + "KindFloat", + "KindComplex", + "KindArray", + "KindPtr", + "KindIface", + "KindEface", + "KindSlice", + "KindString", + "KindStruct", + "KindFunc", + }[k] +} + +// A Field represents a single field of a struct type. +type Field struct { + Name string + Off int64 + Type *Type +} + +func (t *Type) String() string { + return t.Name +} + +func (t *Type) field(name string) *Field { + if t.Kind != KindStruct { + panic("asking for field of non-struct") + } + for i := range t.Fields { + f := &t.Fields[i] + if f.Name == name { + return f + } + } + return nil +} + +// Convert the address of a runtime._type to a *Type. +// Guaranteed to return a non-nil *Type. +func (p *Process) runtimeType2Type(a core.Address) *Type { + if t := p.runtimeMap[a]; t != nil { + return t + } + + // Read runtime._type.size + r := region{p: p, a: a, typ: p.findType("runtime._type")} + size := int64(r.Field("size").Uintptr()) + + // Find module this type is in. + var m *module + for _, x := range p.modules { + if x.types <= a && a < x.etypes { + m = x + break + } + } + + // Read information out of the runtime._type. + var name string + if m != nil { + x := m.types.Add(int64(r.Field("str").Int32())) + n := uint16(p.proc.ReadUint8(x.Add(1)))<<8 + uint16(p.proc.ReadUint8(x.Add(2))) + b := make([]byte, n) + p.proc.ReadAt(b, x.Add(3)) + name = string(b) + if r.Field("tflag").Uint8()&uint8(p.rtConstants["tflagExtraStar"]) != 0 { + name = name[1:] + } + } else { + // A reflect-generated type. + // TODO: The actual name is in the runtime.reflectOffs map. + // Too hard to look things up in maps here, just allocate a placeholder for now. + name = fmt.Sprintf("reflect.generated%x", a) + } + + // Read ptr/nonptr bits + ptrSize := p.proc.PtrSize() + nptrs := int64(r.Field("ptrdata").Uintptr()) / ptrSize + var ptrs []int64 + if r.Field("kind").Uint8()&uint8(p.rtConstants["kindGCProg"]) == 0 { + gcdata := r.Field("gcdata").Address() + for i := int64(0); i < nptrs; i++ { + if p.proc.ReadUint8(gcdata.Add(i/8))>>uint(i%8)&1 != 0 { + ptrs = append(ptrs, i*ptrSize) + } + } + } else { + // TODO: run GC program to get ptr indexes + } + + // Find a Type that matches this type. + // (The matched type will be one constructed from DWARF info.) + // It must match name, size, and pointer bits. + var candidates []*Type + for _, t := range p.runtimeNameMap[name] { + if size == t.Size && equal(ptrs, t.ptrs()) { + candidates = append(candidates, t) + } + } + var t *Type + if len(candidates) > 0 { + // If a runtime type matches more than one DWARF type, + // pick one arbitrarily. + // This looks mostly harmless. DWARF has some redundant entries. + // For example, [32]uint8 appears twice. + // TODO: investigate the reason for this duplication. + t = candidates[0] + } else { + // There's no corresponding DWARF type. Make our own. + t = &Type{Name: name, Size: size, Kind: KindStruct} + n := t.Size / ptrSize + + // Types to use for ptr/nonptr fields of runtime types which + // have no corresponding DWARF type. + ptr := p.findType("unsafe.Pointer") + nonptr := p.findType("uintptr") + if ptr == nil || nonptr == nil { + panic("ptr / nonptr standins missing") + } + + for i := int64(0); i < n; i++ { + typ := nonptr + if len(ptrs) > 0 && ptrs[0] == i*ptrSize { + typ = ptr + ptrs = ptrs[1:] + } + t.Fields = append(t.Fields, Field{ + Name: fmt.Sprintf("f%d", i), + Off: i * ptrSize, + Type: typ, + }) + + } + if t.Size%ptrSize != 0 { + // TODO: tail of <ptrSize data. + } + } + // Memoize. + p.runtimeMap[a] = t + + return t +} + +// ptrs returns a sorted list of pointer offsets in t. +func (t *Type) ptrs() []int64 { + return t.ptrs1(nil, 0) +} +func (t *Type) ptrs1(s []int64, off int64) []int64 { + switch t.Kind { + case KindPtr, KindFunc, KindSlice, KindString: + s = append(s, off) + case KindIface, KindEface: + s = append(s, off, off+t.Size/2) + case KindArray: + if t.Count > 10000 { + // Be careful about really large types like [1e9]*byte. + // To process such a type we'd make a huge ptrs list. + // The ptrs list here is only used for matching + // a runtime type with a dwarf type, and for making + // fields for types with no dwarf type. + // Both uses can fail with no terrible repercussions. + // We still will scan the whole object during markObjects, for example. + // TODO: make this more robust somehow. + break + } + for i := int64(0); i < t.Count; i++ { + s = t.Elem.ptrs1(s, off) + off += t.Elem.Size + } + case KindStruct: + for _, f := range t.Fields { + s = f.Type.ptrs1(s, off+f.Off) + } + default: + // no pointers + } + return s +} + +func equal(a, b []int64) bool { + if len(a) != len(b) { + return false + } + for i, x := range a { + if x != b[i] { + return false + } + } + return true +} + +// A typeInfo contains information about the type of an object. +// A slice of these hold the results of typing the heap. +type typeInfo struct { + // This object has an effective type of [r]t. + // Parts of the object beyond the first r*t.Size bytes have unknown type. + // If t == nil, the type is unknown. (TODO: provide access to ptr/nonptr bits in this case.) + t *Type + r int64 +} + +// A typeChunk records type information for a portion of an object. +// Similar to a typeInfo, but it has an offset so it can be used for interior typings. +type typeChunk struct { + off int64 + t *Type + r int64 +} + +func (c typeChunk) min() int64 { + return c.off +} +func (c typeChunk) max() int64 { + return c.off + c.r*c.t.Size +} +func (c typeChunk) size() int64 { + return c.r * c.t.Size +} +func (c typeChunk) matchingAlignment(d typeChunk) bool { + if c.t != d.t { + panic("can't check alignment of differently typed chunks") + } + return (c.off-d.off)%c.t.Size == 0 +} + +func (c typeChunk) merge(d typeChunk) typeChunk { + t := c.t + if t != d.t { + panic("can't merge chunks with different types") + } + size := t.Size + if (c.off-d.off)%size != 0 { + panic("can't merge poorly aligned chunks") + } + min := c.min() + max := c.max() + if max < d.min() || min > d.max() { + panic("can't merge chunks which don't overlap or abut") + } + if x := d.min(); x < min { + min = x + } + if x := d.max(); x > max { + max = x + } + return typeChunk{off: min, t: t, r: (max - min) / size} +} +func (c typeChunk) String() string { + return fmt.Sprintf("%x[%d]%s", c.off, c.r, c.t) +} + +// typeHeap tries to label all the heap objects with types. +func (p *Process) typeHeap() { + // Type info for the start of each object. a.k.a. "0 offset" typings. + p.types = make([]typeInfo, p.nObj) + + // Type info for the interior of objects, a.k.a. ">0 offset" typings. + // Type information is arranged in chunks. Chunks are stored in an + // arbitrary order, and are guaranteed to not overlap. If types are + // equal, chunks are also guaranteed not to abut. + // Interior typings are kept separate because they hopefully are rare. + // TODO: They aren't really that rare. On some large heaps I tried + // ~50% of objects have an interior pointer into them. + // Keyed by object index. + interior := map[int][]typeChunk{} + + // Typings we know about but haven't scanned yet. + type workRecord struct { + a core.Address + t *Type + r int64 + } + var work []workRecord + + // add records the fact that we know the object at address a has + // r copies of type t. + add := func(a core.Address, t *Type, r int64) { + if a == 0 { // nil pointer + return + } + i, off := p.findObjectIndex(a) + if i < 0 { // pointer doesn't point to an object in the Go heap + return + } + if off == 0 { + // We have a 0-offset typing. Replace existing 0-offset typing + // if the new one is larger. + ot := p.types[i].t + or := p.types[i].r + if ot == nil || r*t.Size > or*ot.Size { + if t == ot { + // Scan just the new section. + work = append(work, workRecord{ + a: a.Add(or * ot.Size), + t: t, + r: r - or, + }) + } else { + // Rescan the whole typing using the updated type. + work = append(work, workRecord{ + a: a, + t: t, + r: r, + }) + } + p.types[i].t = t + p.types[i].r = r + } + return + } + + // Add an interior typing to object #i. + c := typeChunk{off: off, t: t, r: r} + + // Merge the given typing into the chunks we already know. + // TODO: this could be O(n) per insert if there are lots of internal pointers. + chunks := interior[i] + newchunks := chunks[:0] + addWork := true + for _, d := range chunks { + if c.max() <= d.min() || c.min() >= d.max() { + // c does not overlap with d. + if c.t == d.t && (c.max() == d.min() || c.min() == d.max()) { + // c and d abut and share the same base type. Merge them. + c = c.merge(d) + continue + } + // Keep existing chunk d. + // Overwrites chunks slice, but we're only merging chunks so it + // can't overwrite to-be-processed elements. + newchunks = append(newchunks, d) + continue + } + // There is some overlap. There are a few possibilities: + // 1) One is completely contained in the other. + // 2) Both are slices of a larger underlying array. + // 3) Some unsafe trickery has happened. Non-containing overlap + // can only happen in safe Go via case 2. + if c.min() >= d.min() && c.max() <= d.max() { + // 1a: c is contained within the existing chunk d. + // Note that there can be a type mismatch between c and d, + // but we don't care. We use the larger chunk regardless. + c = d + addWork = false // We've already scanned all of c. + continue + } + if d.min() >= c.min() && d.max() <= c.max() { + // 1b: existing chunk d is completely covered by c. + continue + } + if c.t == d.t && c.matchingAlignment(d) { + // Union two regions of the same base type. Case 2 above. + c = c.merge(d) + continue + } + if c.size() < d.size() { + // Keep the larger of the two chunks. + c = d + addWork = false + } + } + // Add new chunk to list of chunks for object. + newchunks = append(newchunks, c) + interior[i] = newchunks + // Also arrange to scan the new chunk. Note that if we merged + // with an existing chunk (or chunks), those will get rescanned. + // Duplicate work, but that's ok. TODO: but could be expensive. + if addWork { + work = append(work, workRecord{ + a: a.Add(c.off - off), + t: c.t, + r: c.r, + }) + } + } + + // Get typings starting at roots. + fr := &frameReader{p: p} + p.ForEachRoot(func(r *Root) bool { + if r.Frame != nil { + fr.live = r.Frame.Live + p.typeObject(r.Addr, r.Type, fr, add) + } else { + p.typeObject(r.Addr, r.Type, p.proc, add) + } + return true + }) + + // Propagate typings through the heap. + for len(work) > 0 { + c := work[len(work)-1] + work = work[:len(work)-1] + for i := int64(0); i < c.r; i++ { + p.typeObject(c.a.Add(i*c.t.Size), c.t, p.proc, add) + } + } + + // Merge any interior typings with the 0-offset typing. + for i, chunks := range interior { + t := p.types[i].t + r := p.types[i].r + if t == nil { + continue // We have no type info at offset 0. + } + for _, c := range chunks { + if c.max() <= r*t.Size { + // c is completely contained in the 0-offset typing. Ignore it. + continue + } + if c.min() <= r*t.Size { + // Typings overlap or abut. Extend if we can. + if c.t == t && c.min()%t.Size == 0 { + r = c.max() / t.Size + p.types[i].r = r + } + continue + } + // Note: at this point we throw away any interior typings that weren't + // merged with the 0-offset typing. TODO: make more use of this info. + } + } +} + +type reader interface { + ReadPtr(core.Address) core.Address + ReadInt(core.Address) int64 +} + +// A frameReader reads data out of a stack frame. +// Any pointer slots marked as dead will read as nil instead of their real value. +type frameReader struct { + p *Process + live map[core.Address]bool +} + +func (fr *frameReader) ReadPtr(a core.Address) core.Address { + if !fr.live[a] { + return 0 + } + return fr.p.proc.ReadPtr(a) +} +func (fr *frameReader) ReadInt(a core.Address) int64 { + return fr.p.proc.ReadInt(a) +} + +// typeObject takes an address and a type for the data at that address. +// For each pointer it finds in the memory at that address, it calls add with the pointer +// and the type + repeat count of the thing that it points to. +func (p *Process) typeObject(a core.Address, t *Type, r reader, add func(core.Address, *Type, int64)) { + ptrSize := p.proc.PtrSize() + + switch t.Kind { + case KindBool, KindInt, KindUint, KindFloat, KindComplex: + // Nothing to do + case KindEface, KindIface: + // interface. Use the type word to determine the type + // of the pointed-to object. + typ := r.ReadPtr(a) + if typ == 0 { // nil interface + return + } + ptr := r.ReadPtr(a.Add(ptrSize)) + if t.Kind == KindIface { + typ = p.proc.ReadPtr(typ.Add(p.findType("runtime.itab").field("_type").Off)) + } + // TODO: for KindEface, type the typ pointer. It might point to the heap + // if the type was allocated with reflect. + dt := p.runtimeType2Type(typ) + typr := region{p: p, a: typ, typ: p.findType("runtime._type")} + if typr.Field("kind").Uint8()&uint8(p.rtConstants["kindDirectIface"]) != 0 { + // Find the base type of the pointer held in the interface. + findptr: + if dt.Kind == KindArray { + dt = dt.Elem + goto findptr + } + if dt.Kind == KindStruct { + for _, f := range dt.Fields { + if f.Type.Size != 0 { + dt = f.Type + goto findptr + } + } + } + if dt.Kind != KindPtr { + panic(fmt.Sprintf("direct type isn't a pointer %s", dt.Kind)) + } + dt = dt.Elem + } + add(ptr, dt, 1) + case KindString: + ptr := r.ReadPtr(a) + len := r.ReadInt(a.Add(ptrSize)) + add(ptr, t.Elem, len) + case KindSlice: + ptr := r.ReadPtr(a) + cap := r.ReadInt(a.Add(2 * ptrSize)) + add(ptr, t.Elem, cap) + case KindPtr: + if t.Elem != nil { // unsafe.Pointer has a nil Elem field. + add(r.ReadPtr(a), t.Elem, 1) + } + case KindFunc: + // The referent is a closure. We don't know much about the + // type of the referent. Its first entry is a code pointer. + // The runtime._type we want exists in the binary (for all + // heap-allocated closures, anyway) but it would be hard to find + // just given the pc. + closure := r.ReadPtr(a) + if closure == 0 { + break + } + pc := p.proc.ReadPtr(closure) + f := p.funcTab.find(pc) + if f == nil { + panic(fmt.Sprintf("can't find func for closure pc %x", pc)) + } + ft := f.closure + if ft == nil { + ft = &Type{Name: "closure for " + f.name, Size: ptrSize, Kind: KindPtr} + // For now, treat a closure like an unsafe.Pointer. + // TODO: better value for size? + f.closure = ft + } + p.typeObject(closure, ft, r, add) + case KindArray: + n := t.Elem.Size + for i := int64(0); i < t.Count; i++ { + p.typeObject(a.Add(i*n), t.Elem, r, add) + } + case KindStruct: + if strings.HasPrefix(t.Name, "hash<") { + // Special case - maps have a pointer to the first bucket + // but it really types all the buckets (like a slice would). + var bPtr core.Address + var bTyp *Type + var n int64 + for _, f := range t.Fields { + if f.Name == "buckets" { + bPtr = p.proc.ReadPtr(a.Add(f.Off)) + bTyp = f.Type.Elem + } + if f.Name == "B" { + n = int64(1) << p.proc.ReadUint8(a.Add(f.Off)) + } + } + add(bPtr, bTyp, n) + // TODO: also oldbuckets + } + // TODO: also special case for channels? + for _, f := range t.Fields { + p.typeObject(a.Add(f.Off), f.Type, r, add) + } + default: + panic(fmt.Sprintf("unknown type kind %s\n", t.Kind)) + } +}