runtime: allow cgo callbacks on non-Go threads

Fixes #4435.

R=golang-dev, iant, alex.brainman, minux.ma, dvyukov
CC=golang-dev
https://golang.org/cl/7304104
diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s
index 901d9f3..f09ddd0 100644
--- a/src/pkg/runtime/asm_386.s
+++ b/src/pkg/runtime/asm_386.s
@@ -476,21 +476,33 @@
 // cgocallback(void (*fn)(void*), void *frame, uintptr framesize)
 // See cgocall.c for more details.
 TEXT runtime·cgocallback(SB),7,$12
-	MOVL	fn+0(FP), AX
-	MOVL	frame+4(FP), BX
-	MOVL	framesize+8(FP), DX
-
-	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// If m is nil, Go did not create the current thread.
+	// Call needm to obtain one for temporary use.
+	// In this case, we're running on the thread stack, so there's
+	// lots of space, but the linker doesn't know. Hide the call from
+	// the linker analysis by using an indirect call through AX.
+	get_tls(CX)
+#ifdef GOOS_windows
+	CMPL	CX, $0
+	JNE	3(PC)
+	PUSHL	$0
+	JMP needm
+#endif
+	MOVL	m(CX), BP
+	PUSHL	BP
+	CMPL	BP, $0
+	JNE	havem
+needm:
+	MOVL	$runtime·needm(SB), AX
+	CALL	AX
 	get_tls(CX)
 	MOVL	m(CX), BP
 
-	// If m is nil, it is almost certainly because we have been called
-	// on a thread that Go did not create.  We're going to crash as
-	// soon as we try to use m; instead, try to print a nice error and exit.
-	CMPL	BP, $0
-	JNE 2(PC)
-	CALL	runtime·badcallback(SB)
-
+havem:
+	// Now there's a valid m, and we're running on its m->g0.
+	// Save current m->g0->sched.sp on stack and then set it to SP.
+	// Save current sp in m->g0->sched.sp in preparation for
+	// switch back to m->curg stack.
 	MOVL	m_g0(BP), SI
 	PUSHL	(g_sched+gobuf_sp)(SI)
 	MOVL	SP, (g_sched+gobuf_sp)(SI)
@@ -509,6 +521,10 @@
 	// a frame size of 12, the same amount that we use below),
 	// so that the traceback will seamlessly trace back into
 	// the earlier calls.
+	MOVL	fn+0(FP), AX
+	MOVL	frame+4(FP), BX
+	MOVL	framesize+8(FP), DX
+
 	MOVL	m_curg(BP), SI
 	MOVL	SI, g(CX)
 	MOVL	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
@@ -546,10 +562,38 @@
 	MOVL	SI, g(CX)
 	MOVL	(g_sched+gobuf_sp)(SI), SP
 	POPL	(g_sched+gobuf_sp)(SI)
+	
+	// If the m on entry was nil, we called needm above to borrow an m
+	// for the duration of the call. Since the call is over, return it with dropm.
+	POPL	BP
+	CMPL	BP, $0
+	JNE 3(PC)
+	MOVL	$runtime·dropm(SB), AX
+	CALL	AX
 
 	// Done!
 	RET
 
+// void setmg(M*, G*); set m and g. for use by needm.
+TEXT runtime·setmg(SB), 7, $0
+#ifdef GOOS_windows
+	MOVL	mm+0(FP), AX
+	CMPL	AX, $0
+	JNE	settls
+	MOVL	$0, 0x14(FS)
+	RET
+settls:
+	LEAL	m_tls(AX), AX
+	MOVL	AX, 0x14(FS)
+#endif
+	MOVL	mm+0(FP), AX
+	get_tls(CX)
+	MOVL	mm+0(FP), AX
+	MOVL	AX, m(CX)
+	MOVL	gg+4(FP), BX
+	MOVL	BX, g(CX)
+	RET
+
 // check that SP is in range [g->stackbase, g->stackguard)
 TEXT runtime·stackcheck(SB), 7, $0
 	get_tls(CX)