runtime: fix deadlock in parallel for test
The deadlock occurs when another goroutine requests GC
during the test. When wait=true the test expects physical parallelism,
that is, that P goroutines are all active at the same time.
If GC is requested, then part of the goroutines are not scheduled,
so other goroutines deadlock.
With wait=false, goroutines finish parallel for w/o waiting for all
other goroutines.
Fixes #3954.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/6820098
diff --git a/src/pkg/runtime/parfor_test.go b/src/pkg/runtime/parfor_test.go
index 7644354..b382b76 100644
--- a/src/pkg/runtime/parfor_test.go
+++ b/src/pkg/runtime/parfor_test.go
@@ -109,14 +109,21 @@
 		data[i] = i
 	}
 	P := GOMAXPROCS(-1)
+	c := make(chan bool, P)
 	desc := NewParFor(uint32(P))
-	ParForSetup(desc, uint32(P), uint32(N), nil, true, func(desc *ParFor, i uint32) {
+	ParForSetup(desc, uint32(P), uint32(N), nil, false, func(desc *ParFor, i uint32) {
 		data[i] = data[i]*data[i] + 1
 	})
 	for p := 1; p < P; p++ {
-		go ParForDo(desc)
+		go func() {
+			ParForDo(desc)
+			c <- true
+		}()
 	}
 	ParForDo(desc)
+	for p := 1; p < P; p++ {
+		<-c
+	}
 	for i := uint64(0); i < N; i++ {
 		if data[i] != i*i+1 {
 			t.Fatalf("Wrong element %d: %d", i, data[i])