runtime: always run stackalloc on scheduler stack

Avoids deadlocks like the one below, in which a stack split happened
in order to call lock(&stacks), but then the stack unsplit cannot run
because stacks is now locked.

The only code calling stackalloc that wasn't on a scheduler
stack already was malg, which creates a new goroutine.

runtime.futex+0x23 /home/rsc/g/go/src/pkg/runtime/linux/amd64/sys.s:139
       runtime.futex()
futexsleep+0x50 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:51
       futexsleep(0x5b0188, 0x300000003, 0x100020000, 0x4159e2)
futexlock+0x85 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:119
       futexlock(0x5b0188, 0x5b0188)
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x7f0d27b4a000)
runtime.stackfree+0x4d /home/rsc/g/go/src/pkg/runtime/malloc.goc:336
       runtime.stackfree(0x7f0d27b4a000, 0x1000, 0x8, 0x7fff37e1e218)
runtime.oldstack+0xa6 /home/rsc/g/go/src/pkg/runtime/proc.c:705
       runtime.oldstack()
runtime.lessstack+0x22 /home/rsc/g/go/src/pkg/runtime/amd64/asm.s:224
       runtime.lessstack()
----- lessstack called from goroutine 2 -----
runtime.lock+0x56 /home/rsc/g/go/src/pkg/runtime/linux/thread.c:158
       runtime.lock(0x5b0188, 0x40a5e2)
runtime.stackalloc+0x55 /home/rsc/g/go/src/pkg/runtime/malloc.c:316
       runtime.stackalloc(0x1000, 0x4055b0)
runtime.malg+0x3d /home/rsc/g/go/src/pkg/runtime/proc.c:803
       runtime.malg(0x1000, 0x40add9)
runtime.newproc1+0x12b /home/rsc/g/go/src/pkg/runtime/proc.c:854
       runtime.newproc1(0xf840027440, 0x7f0d27b49230, 0x0, 0x49f238, 0x40, ...)
runtime.newproc+0x2f /home/rsc/g/go/src/pkg/runtime/proc.c:831
       runtime.newproc(0x0, 0xf840027440, 0xf800000010, 0x44b059)
...

R=r, r2
CC=golang-dev
https://golang.org/cl/4216045
diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc
index abbf63b..4106068 100644
--- a/src/pkg/runtime/malloc.goc
+++ b/src/pkg/runtime/malloc.goc
@@ -394,6 +394,12 @@
 {
 	void *v;
 
+	// Stackalloc must be called on scheduler stack, so that we
+	// never try to grow the stack during the code that stackalloc runs.
+	// Doing so would cause a deadlock (issue 1547).
+	if(g != m->g0)
+		runtime·throw("stackalloc not on scheduler stack");
+
 	if(m->mallocing || m->gcing || n == FixedStack) {
 		runtime·lock(&stacks);
 		if(stacks.size == 0)
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index c471fff..1d38258 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -380,6 +380,7 @@
 			break;
 		case Grunning:
 		case Grecovery:
+		case Gstackalloc:
 			if(gp != g)
 				runtime·throw("mark - world not stopped");
 			scanstack(gp);
diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c
index 1bbca63..455a39e 100644
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@@ -253,8 +253,10 @@
 	}
 
 	// Mark runnable.
-	if(g->status == Grunnable || g->status == Grunning || g->status == Grecovery)
+	if(g->status == Grunnable || g->status == Grunning || g->status == Grecovery || g->status == Gstackalloc) {
+		runtime·printf("goroutine %d has status %d\n", g->goid, g->status);
 		runtime·throw("bad g->status in ready");
+	}
 	g->status = Grunnable;
 
 	gput(g);
@@ -492,6 +494,13 @@
 			runtime·free(d);
 			runtime·gogo(&gp->sched, 1);
 		}
+		
+		if(gp->status == Gstackalloc) {
+			// switched to scheduler stack to call stackalloc.
+			gp->param = runtime·stackalloc((uintptr)gp->param);
+			gp->status = Grunning;
+			runtime·gogo(&gp->sched, 1);
+		}
 
 		// Jumped here via runtime·gosave/gogo, so didn't
 		// execute lock(&runtime·sched) above.
@@ -509,6 +518,8 @@
 		switch(gp->status){
 		case Grunnable:
 		case Gdead:
+		case Grecovery:
+		case Gstackalloc:
 			// Shouldn't have been running!
 			runtime·throw("bad gp->status in sched");
 		case Grunning:
@@ -795,18 +806,35 @@
 G*
 runtime·malg(int32 stacksize)
 {
-	G *g;
+	G *newg;
 	byte *stk;
+	int32 oldstatus;
 
-	g = runtime·malloc(sizeof(G));
+	newg = runtime·malloc(sizeof(G));
 	if(stacksize >= 0) {
-		stk = runtime·stackalloc(StackSystem + stacksize);
-		g->stack0 = stk;
-		g->stackguard = stk + StackSystem + StackGuard;
-		g->stackbase = stk + StackSystem + stacksize - sizeof(Stktop);
-		runtime·memclr(g->stackbase, sizeof(Stktop));
+		if(g == m->g0) {
+			// running on scheduler stack already.
+			stk = runtime·stackalloc(StackSystem + stacksize);
+		} else {
+			// have to call stackalloc on scheduler stack.
+			oldstatus = g->status;
+			g->param = (void*)(StackSystem + stacksize);
+			g->status = Gstackalloc;
+			// next two lines are runtime·gosched without the check
+			// of m->locks.  we're almost certainly holding a lock,
+			// but this is not a real rescheduling so it's okay.
+			if(runtime·gosave(&g->sched) == 0)
+				runtime·gogo(&m->sched, 1);
+			stk = g->param;
+			g->param = nil;
+			g->status = oldstatus;
+		}
+		newg->stack0 = stk;
+		newg->stackguard = stk + StackSystem + StackGuard;
+		newg->stackbase = stk + StackSystem + stacksize - sizeof(Stktop);
+		runtime·memclr(newg->stackbase, sizeof(Stktop));
 	}
-	return g;
+	return newg;
 }
 
 /*
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index ac992a2..4456e9b 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -104,6 +104,7 @@
 	Gmoribund,
 	Gdead,
 	Grecovery,
+	Gstackalloc,
 };
 enum
 {