runtime: simplify CPU profiling code
This makes Go's CPU profiling code somewhat more idiomatic; e.g.,
using := instead of forward declaring variables, using "int" for
element counts instead of "uintptr", and slices instead of C-style
pointer+length. This makes the code easier to read and eliminates a
lot of type conversion clutter.
Additionally, in sigprof we can collect just maxCPUProfStack stack
frames, as cpuprof won't use more than that anyway.
Change-Id: I0235b5ae552191bcbb453b14add6d8c01381bd06
Reviewed-on: https://go-review.googlesource.com/6072
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go
index d8e0ab7..055b2af 100644
--- a/src/runtime/cpuprof.go
+++ b/src/runtime/cpuprof.go
@@ -30,8 +30,8 @@
// The state of this dance between the signal handler and the goroutine
// is encoded in the Profile.handoff field. If handoff == 0, then the goroutine
// is not using either log half and is waiting (or will soon be waiting) for
-// a new piece by calling notesleep(&p->wait). If the signal handler
-// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait)
+// a new piece by calling notesleep(&p.wait). If the signal handler
+// changes handoff from 0 to non-zero, it must call notewakeup(&p.wait)
// to wake the goroutine. The value indicates the number of entries in the
// log half being handed off. The goroutine leaves the non-zero value in
// place until it has finished processing the log half and then flips the number
@@ -61,7 +61,7 @@
type cpuprofEntry struct {
count uintptr
- depth uintptr
+ depth int
stack [maxCPUProfStack]uintptr
}
@@ -81,7 +81,7 @@
// Signal handler has filled log[toggle][:nlog].
// Goroutine is writing log[1-toggle][:handoff].
log [2][logSize / 2]uintptr
- nlog uintptr
+ nlog int
toggle int32
handoff uint32
@@ -167,7 +167,7 @@
cpuprof.on = false
// Now add is not running anymore, and getprofile owns the entire log.
- // Set the high bit in prof->handoff to tell getprofile.
+ // Set the high bit in cpuprof.handoff to tell getprofile.
for {
n := cpuprof.handoff
if n&0x80000000 != 0 {
@@ -185,20 +185,16 @@
unlock(&cpuprofLock)
}
-func cpuproftick(pc *uintptr, n int32) {
- if n > maxCPUProfStack {
- n = maxCPUProfStack
- }
- s := (*[maxCPUProfStack]uintptr)(unsafe.Pointer(pc))[:n]
- cpuprof.add(s)
-}
-
// add adds the stack trace to the profile.
// It is called from signal handlers and other limited environments
// and cannot allocate memory or acquire locks that might be
// held at the time of the signal, nor can it use substantial amounts
// of stack. It is allowed to call evict.
func (p *cpuProfile) add(pc []uintptr) {
+ if len(pc) > maxCPUProfStack {
+ pc = pc[:maxCPUProfStack]
+ }
+
// Compute hash.
h := uintptr(0)
for _, x := range pc {
@@ -212,7 +208,7 @@
Assoc:
for i := range b.entry {
e := &b.entry[i]
- if e.depth != uintptr(len(pc)) {
+ if e.depth != len(pc) {
continue
}
for j := range pc {
@@ -241,7 +237,7 @@
}
// Reuse the newly evicted entry.
- e.depth = uintptr(len(pc))
+ e.depth = len(pc)
e.count = 1
copy(e.stack[:], pc)
}
@@ -256,7 +252,7 @@
d := e.depth
nslot := d + 2
log := &p.log[p.toggle]
- if p.nlog+nslot > uintptr(len(p.log[0])) {
+ if p.nlog+nslot > len(log) {
if !p.flushlog() {
return false
}
@@ -266,7 +262,7 @@
q := p.nlog
log[q] = e.count
q++
- log[q] = d
+ log[q] = uintptr(d)
q++
copy(log[q:], e.stack[:d])
q += d
@@ -287,7 +283,7 @@
p.toggle = 1 - p.toggle
log := &p.log[p.toggle]
- q := uintptr(0)
+ q := 0
if p.lost > 0 {
lostPC := funcPC(lostProfileData)
log[0] = p.lost
@@ -360,7 +356,7 @@
// In flush mode.
// Add is no longer being called. We own the log.
- // Also, p->handoff is non-zero, so flushlog will return false.
+ // Also, p.handoff is non-zero, so flushlog will return false.
// Evict the hash table into the log and return it.
Flush:
for i := range p.hash {
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index cd90390..540d7b5 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -114,7 +114,7 @@
// and what it called, so that we can see if it
// "called" sigpanic.
var rpc [2]uintptr
- if callers(1+skip-1, &rpc[0], 2) < 2 {
+ if callers(1+skip-1, rpc[:]) < 2 {
return
}
f := findfunc(rpc[1])
@@ -161,7 +161,7 @@
if len(pc) == 0 {
return 0
}
- return callers(skip, &pc[0], len(pc))
+ return callers(skip, pc)
}
// GOROOT returns the root of the Go tree.
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index df7093a..4544344 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -232,7 +232,7 @@
// Called by malloc to record a profiled block.
func mProf_Malloc(p unsafe.Pointer, size uintptr) {
var stk [maxStack]uintptr
- nstk := callers(4, &stk[0], len(stk))
+ nstk := callers(4, stk[:])
lock(&proflock)
b := stkbucket(memProfile, size, stk[:nstk], true)
mp := b.mp()
@@ -300,9 +300,9 @@
var nstk int
var stk [maxStack]uintptr
if gp.m.curg == nil || gp.m.curg == gp {
- nstk = callers(skip, &stk[0], len(stk))
+ nstk = callers(skip, stk[:])
} else {
- nstk = gcallers(gp.m.curg, skip, &stk[0], len(stk))
+ nstk = gcallers(gp.m.curg, skip, stk[:])
}
lock(&proflock)
b := stkbucket(blockProfile, 0, stk[:nstk], true)
diff --git a/src/runtime/os1_windows.go b/src/runtime/os1_windows.go
index 2de6b09..744b1a9 100644
--- a/src/runtime/os1_windows.go
+++ b/src/runtime/os1_windows.go
@@ -527,7 +527,7 @@
r = (*context)(unsafe.Pointer((uintptr(unsafe.Pointer(&rbuf[15]))) &^ 15))
r.contextflags = _CONTEXT_CONTROL
stdcall2(_GetThreadContext, mp.thread, uintptr(unsafe.Pointer(r)))
- sigprof((*byte)(unsafe.Pointer(r.ip())), (*byte)(unsafe.Pointer(r.sp())), nil, gp, mp)
+ sigprof(r.ip(), r.sp(), 0, gp, mp)
}
func profileloop1() {
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
index 1eef1b8..7b0d7fd 100644
--- a/src/runtime/proc1.go
+++ b/src/runtime/proc1.go
@@ -100,7 +100,7 @@
// g0 stack won't make sense for user (and is not necessary unwindable).
if _g_ != _g_.m.g0 {
- callers(1, &mp.createstack[0], len(mp.createstack))
+ callers(1, mp.createstack[:])
}
mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
@@ -2286,11 +2286,7 @@
var etext struct{}
// Called if we receive a SIGPROF signal.
-func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
- var n int32
- var traceback bool
- var stk [100]uintptr
-
+func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
if prof.hz == 0 {
return
}
@@ -2370,18 +2366,18 @@
// To recap, there are no constraints on the assembly being used for the
// transition. We simply require that g and SP match and that the PC is not
// in gogo.
- traceback = true
- usp := uintptr(unsafe.Pointer(sp))
+ traceback := true
gogo := funcPC(gogo)
if gp == nil || gp != mp.curg ||
- usp < gp.stack.lo || gp.stack.hi < usp ||
- (gogo <= uintptr(unsafe.Pointer(pc)) && uintptr(unsafe.Pointer(pc)) < gogo+_RuntimeGogoBytes) {
+ sp < gp.stack.lo || gp.stack.hi < sp ||
+ (gogo <= pc && pc < gogo+_RuntimeGogoBytes) {
traceback = false
}
- n = 0
+ var stk [maxCPUProfStack]uintptr
+ n := 0
if traceback {
- n = int32(gentraceback(uintptr(unsafe.Pointer(pc)), uintptr(unsafe.Pointer(sp)), uintptr(unsafe.Pointer(lr)), gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap))
+ n = gentraceback(pc, sp, lr, gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap)
}
if !traceback || n <= 0 {
// Normal traceback is impossible or has failed.
@@ -2391,21 +2387,21 @@
// Cgo, we can't unwind and symbolize arbitrary C code,
// so instead collect Go stack that leads to the cgo call.
// This is especially important on windows, since all syscalls are cgo calls.
- n = int32(gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0))
+ n = gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0)
}
if GOOS == "windows" && n == 0 && mp.libcallg != nil && mp.libcallpc != 0 && mp.libcallsp != 0 {
// Libcall, i.e. runtime syscall on windows.
// Collect Go stack that leads to the call.
- n = int32(gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0))
+ n = gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0)
}
if n == 0 {
// If all of the above has failed, account it against abstract "System" or "GC".
n = 2
// "ExternalCode" is better than "etext".
- if uintptr(unsafe.Pointer(pc)) > uintptr(unsafe.Pointer(&etext)) {
- pc = (*uint8)(unsafe.Pointer(uintptr(funcPC(_ExternalCode) + _PCQuantum)))
+ if pc > uintptr(unsafe.Pointer(&etext)) {
+ pc = funcPC(_ExternalCode) + _PCQuantum
}
- stk[0] = uintptr(unsafe.Pointer(pc))
+ stk[0] = pc
if mp.preemptoff != "" || mp.helpgc != 0 {
stk[1] = funcPC(_GC) + _PCQuantum
} else {
@@ -2420,7 +2416,7 @@
osyield()
}
if prof.hz != 0 {
- cpuproftick(&stk[0], n)
+ cpuprof.add(stk[:n])
}
atomicstore(&prof.lock, 0)
}
diff --git a/src/runtime/signal_386.go b/src/runtime/signal_386.go
index 5336a43..0ca593d 100644
--- a/src/runtime/signal_386.go
+++ b/src/runtime/signal_386.go
@@ -29,7 +29,7 @@
c := &sigctxt{info, ctxt}
if sig == _SIGPROF {
- sigprof((*byte)(unsafe.Pointer(uintptr(c.eip()))), (*byte)(unsafe.Pointer(uintptr(c.esp()))), nil, gp, _g_.m)
+ sigprof(uintptr(c.eip()), uintptr(c.esp()), 0, gp, _g_.m)
return
}
diff --git a/src/runtime/signal_amd64x.go b/src/runtime/signal_amd64x.go
index 50ecfbe..cd87d76 100644
--- a/src/runtime/signal_amd64x.go
+++ b/src/runtime/signal_amd64x.go
@@ -42,7 +42,7 @@
c := &sigctxt{info, ctxt}
if sig == _SIGPROF {
- sigprof((*byte)(unsafe.Pointer(uintptr(c.rip()))), (*byte)(unsafe.Pointer(uintptr(c.rsp()))), nil, gp, _g_.m)
+ sigprof(uintptr(c.rip()), uintptr(c.rsp()), 0, gp, _g_.m)
return
}
diff --git a/src/runtime/signal_arm.go b/src/runtime/signal_arm.go
index d224ce6..c07b45e 100644
--- a/src/runtime/signal_arm.go
+++ b/src/runtime/signal_arm.go
@@ -37,7 +37,7 @@
c := &sigctxt{info, ctxt}
if sig == _SIGPROF {
- sigprof((*byte)(unsafe.Pointer(uintptr(c.pc()))), (*byte)(unsafe.Pointer(uintptr(c.sp()))), (*byte)(unsafe.Pointer(uintptr(c.lr()))), gp, _g_.m)
+ sigprof(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.lr()), gp, _g_.m)
return
}
diff --git a/src/runtime/signal_ppc64x.go b/src/runtime/signal_ppc64x.go
index ab32300..94b4708 100644
--- a/src/runtime/signal_ppc64x.go
+++ b/src/runtime/signal_ppc64x.go
@@ -55,7 +55,7 @@
c := &sigctxt{info, ctxt}
if sig == _SIGPROF {
- sigprof((*byte)(unsafe.Pointer(uintptr(c.pc()))), (*byte)(unsafe.Pointer(uintptr(c.sp()))), (*byte)(unsafe.Pointer(uintptr(c.link()))), gp, _g_.m)
+ sigprof(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.link()), gp, _g_.m)
return
}
flags := int32(_SigThrow)
diff --git a/src/runtime/trace.go b/src/runtime/trace.go
index 9804092..3a4421b 100644
--- a/src/runtime/trace.go
+++ b/src/runtime/trace.go
@@ -468,9 +468,9 @@
}
var nstk int
if gp == _g_ {
- nstk = callers(1, &buf.stk[0], len(buf.stk))
+ nstk = callers(1, buf.stk[:])
} else if gp != nil {
- nstk = gcallers(mp.curg, 1, &buf.stk[0], len(buf.stk))
+ nstk = gcallers(mp.curg, 1, buf.stk[:])
}
id := trace.stackTab.put(buf.stk[:nstk])
data = traceAppend(data, uint64(id))
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 8c31c5a..92dddfd 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -104,7 +104,7 @@
// the runtime.Callers function (pcbuf != nil), as well as the garbage
// collector (callback != nil). A little clunky to merge these, but avoids
// duplicating the code and all its subtlety.
-func gentraceback(pc0 uintptr, sp0 uintptr, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max int, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer, flags uint) int {
+func gentraceback(pc0, sp0, lr0 uintptr, gp *g, skip int, pcbuf *uintptr, max int, callback func(*stkframe, unsafe.Pointer) bool, v unsafe.Pointer, flags uint) int {
if goexitPC == 0 {
throw("gentraceback before goexitPC initialization")
}
@@ -367,7 +367,7 @@
}
}
- if pcbuf == nil && callback == nil {
+ if printing {
n = nprint
}
@@ -474,7 +474,7 @@
}
}
-func traceback(pc uintptr, sp uintptr, lr uintptr, gp *g) {
+func traceback(pc, sp, lr uintptr, gp *g) {
traceback1(pc, sp, lr, gp, 0)
}
@@ -484,11 +484,11 @@
// the initial PC must not be rewound to the previous instruction.
// (All the saved pairs record a PC that is a return address, so we
// rewind it into the CALL instruction.)
-func tracebacktrap(pc uintptr, sp uintptr, lr uintptr, gp *g) {
+func tracebacktrap(pc, sp, lr uintptr, gp *g) {
traceback1(pc, sp, lr, gp, _TraceTrap)
}
-func traceback1(pc uintptr, sp uintptr, lr uintptr, gp *g, flags uint) {
+func traceback1(pc, sp, lr uintptr, gp *g, flags uint) {
var n int
if readgstatus(gp)&^_Gscan == _Gsyscall {
// Override registers if blocked in system call.
@@ -508,18 +508,18 @@
printcreatedby(gp)
}
-func callers(skip int, pcbuf *uintptr, m int) int {
+func callers(skip int, pcbuf []uintptr) int {
sp := getcallersp(unsafe.Pointer(&skip))
pc := uintptr(getcallerpc(unsafe.Pointer(&skip)))
var n int
systemstack(func() {
- n = gentraceback(pc, sp, 0, getg(), skip, pcbuf, m, nil, nil, 0)
+ n = gentraceback(pc, sp, 0, getg(), skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
})
return n
}
-func gcallers(gp *g, skip int, pcbuf *uintptr, m int) int {
- return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, pcbuf, m, nil, nil, 0)
+func gcallers(gp *g, skip int, pcbuf []uintptr) int {
+ return gentraceback(^uintptr(0), ^uintptr(0), 0, gp, skip, &pcbuf[0], len(pcbuf), nil, nil, 0)
}
func showframe(f *_func, gp *g) bool {