runtime: fix deadlock when gctrace Calling ReadMemStats which does stoptheworld on m0 holding locks was not a good idea. Stoptheworld holding locks is a recipe for deadlocks (added check for this). Stoptheworld on g0 may or may not work (added check for this as well). As far as I understand scavenger will print incorrect numbers now, as stack usage is not subtracted from heap. But it's better than deadlocking. LGTM=khr R=golang-codereviews, rsc, khr CC=golang-codereviews, rlh https://golang.org/cl/124670043
diff --git a/src/pkg/runtime/heapdump.c b/src/pkg/runtime/heapdump.c index 63d80b8..a2d12ad 100644 --- a/src/pkg/runtime/heapdump.c +++ b/src/pkg/runtime/heapdump.c
@@ -748,7 +748,6 @@ // Stop the world. runtime·semacquire(&runtime·worldsema, false); g->m->gcing = 1; - g->m->locks++; runtime·stoptheworld(); // Update stats so we can dump them. @@ -774,6 +773,7 @@ // Start up the world again. g->m->gcing = 0; + g->m->locks++; runtime·semrelease(&runtime·worldsema); runtime·starttheworld(); g->m->locks--;
diff --git a/src/pkg/runtime/malloc.go b/src/pkg/runtime/malloc.go index 8ee4607..578fbd1 100644 --- a/src/pkg/runtime/malloc.go +++ b/src/pkg/runtime/malloc.go
@@ -413,6 +413,7 @@ return } releasem(mp) + mp = nil if panicking != 0 { return @@ -441,7 +442,11 @@ startTime := gonanotime() mp = acquirem() mp.gcing = 1 + releasem(mp) stoptheworld() + if mp != acquirem() { + gothrow("gogc: rescheduled") + } clearpools() @@ -474,6 +479,7 @@ semrelease(&worldsema) starttheworld() releasem(mp) + mp = nil // now that gc is done, kick off finalizer thread if needed if !concurrentSweep {
diff --git a/src/pkg/runtime/mheap.c b/src/pkg/runtime/mheap.c index 5998724..8e6190c 100644 --- a/src/pkg/runtime/mheap.c +++ b/src/pkg/runtime/mheap.c
@@ -608,7 +608,6 @@ { uint32 i; uintptr sumreleased; - MStats stats; MHeap *h; h = &runtime·mheap; @@ -618,12 +617,13 @@ sumreleased += scavengelist(&h->freelarge, now, limit); if(runtime·debug.gctrace > 0) { - runtime·ReadMemStats(&stats); if(sumreleased > 0) runtime·printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20); + // TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap. + // But we can't call ReadMemStats on g0 holding locks. runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n", - k, stats.heap_inuse>>20, stats.heap_idle>>20, stats.heap_sys>>20, - stats.heap_released>>20, (stats.heap_sys - stats.heap_released)>>20); + k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20, + mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20); } }
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 2510a42..8584cb6 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c
@@ -498,6 +498,14 @@ P *p; bool wait; + // If we hold a lock, then we won't be able to stop another M + // that is blocked trying to acquire the lock. + if(g->m->locks > 0) + runtime·throw("stoptheworld: holding locks"); + // There is no evidence that stoptheworld on g0 does not work, + // we just don't do it today. + if(g == g->m->g0) + runtime·throw("stoptheworld: on g0"); runtime·lock(&runtime·sched.lock); runtime·sched.stopwait = runtime·gomaxprocs; runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);