runtime: fix deadlock when gctrace
Calling ReadMemStats which does stoptheworld on m0 holding locks
was not a good idea.
Stoptheworld holding locks is a recipe for deadlocks (added check for this).
Stoptheworld on g0 may or may not work (added check for this as well).
As far as I understand scavenger will print incorrect numbers now,
as stack usage is not subtracted from heap. But it's better than deadlocking.
LGTM=khr
R=golang-codereviews, rsc, khr
CC=golang-codereviews, rlh
https://golang.org/cl/124670043
diff --git a/src/pkg/runtime/heapdump.c b/src/pkg/runtime/heapdump.c
index 63d80b8..a2d12ad 100644
--- a/src/pkg/runtime/heapdump.c
+++ b/src/pkg/runtime/heapdump.c
@@ -748,7 +748,6 @@
// Stop the world.
runtime·semacquire(&runtime·worldsema, false);
g->m->gcing = 1;
- g->m->locks++;
runtime·stoptheworld();
// Update stats so we can dump them.
@@ -774,6 +773,7 @@
// Start up the world again.
g->m->gcing = 0;
+ g->m->locks++;
runtime·semrelease(&runtime·worldsema);
runtime·starttheworld();
g->m->locks--;
diff --git a/src/pkg/runtime/malloc.go b/src/pkg/runtime/malloc.go
index 8ee4607..578fbd1 100644
--- a/src/pkg/runtime/malloc.go
+++ b/src/pkg/runtime/malloc.go
@@ -413,6 +413,7 @@
return
}
releasem(mp)
+ mp = nil
if panicking != 0 {
return
@@ -441,7 +442,11 @@
startTime := gonanotime()
mp = acquirem()
mp.gcing = 1
+ releasem(mp)
stoptheworld()
+ if mp != acquirem() {
+ gothrow("gogc: rescheduled")
+ }
clearpools()
@@ -474,6 +479,7 @@
semrelease(&worldsema)
starttheworld()
releasem(mp)
+ mp = nil
// now that gc is done, kick off finalizer thread if needed
if !concurrentSweep {
diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c
index 5998724..8e6190c 100644
--- a/src/pkg/runtime/mheap.c
+++ b/src/pkg/runtime/mheap.c
@@ -608,7 +608,6 @@
{
uint32 i;
uintptr sumreleased;
- MStats stats;
MHeap *h;
h = &runtime·mheap;
@@ -618,12 +617,13 @@
sumreleased += scavengelist(&h->freelarge, now, limit);
if(runtime·debug.gctrace > 0) {
- runtime·ReadMemStats(&stats);
if(sumreleased > 0)
runtime·printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20);
+ // TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
+ // But we can't call ReadMemStats on g0 holding locks.
runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
- k, stats.heap_inuse>>20, stats.heap_idle>>20, stats.heap_sys>>20,
- stats.heap_released>>20, (stats.heap_sys - stats.heap_released)>>20);
+ k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
+ mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
}
}
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index 2510a42..8584cb6 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -498,6 +498,14 @@
P *p;
bool wait;
+ // If we hold a lock, then we won't be able to stop another M
+ // that is blocked trying to acquire the lock.
+ if(g->m->locks > 0)
+ runtime·throw("stoptheworld: holding locks");
+ // There is no evidence that stoptheworld on g0 does not work,
+ // we just don't do it today.
+ if(g == g->m->g0)
+ runtime·throw("stoptheworld: on g0");
runtime·lock(&runtime·sched.lock);
runtime·sched.stopwait = runtime·gomaxprocs;
runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);