runtime: pass setmg function to cgo_init

This keeps the logic about how to set the thread-local variables
m and g in code compiled and linked by the gc toolchain,
an important property for upcoming cgo changes.

It's also just a nice cleanup: one less place to update when
these details change.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/7560048
diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s
index 6bcacf4..805405a 100644
--- a/src/pkg/runtime/asm_386.s
+++ b/src/pkg/runtime/asm_386.s
@@ -37,6 +37,8 @@
 	MOVL	_cgo_init(SB), AX
 	TESTL	AX, AX
 	JZ	needtls
+	MOVL	$setmg_gcc<>(SB), BX
+	MOVL	BX, 4(SP)
 	MOVL	BP, 0(SP)
 	CALL	AX
 	// skip runtime·ldt0setup(SB) and tls test after _cgo_init for non-windows
@@ -643,6 +645,15 @@
 	MOVL	BX, g(CX)
 	RET
 
+// void setmg_gcc(M*, G*); set m and g. for use by gcc
+TEXT setmg_gcc<>(SB), 7, $0	
+	get_tls(AX)
+	MOVL	mm+0(FP), DX
+	MOVL	DX, m(AX)
+	MOVL	gg+4(FP), DX
+	MOVL	DX,g (AX)
+	RET
+
 // check that SP is in range [g->stackbase, g->stackguard)
 TEXT runtime·stackcheck(SB), 7, $0
 	get_tls(CX)
diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s
index f4cfa57..af2064ff 100644
--- a/src/pkg/runtime/asm_amd64.s
+++ b/src/pkg/runtime/asm_amd64.s
@@ -37,6 +37,7 @@
 	JZ	needtls
 	// g0 already in DI
 	MOVQ	DI, CX	// Win64 uses CX for first parameter
+	MOVQ	$setmg_gcc<>(SB), SI
 	CALL	AX
 	CMPL	runtime·iswindows(SB), $0
 	JEQ ok
@@ -682,6 +683,13 @@
 	MOVQ	BX, g(CX)
 	RET
 
+// void setmg_gcc(M*, G*); set m and g called from gcc.
+TEXT setmg_gcc<>(SB),7,$0
+	get_tls(AX)
+	MOVQ	DI, m(AX)
+	MOVQ	SI, g(AX)
+	RET
+
 // check that SP is in range [g->stackbase, g->stackguard)
 TEXT runtime·stackcheck(SB), 7, $0
 	get_tls(CX)
diff --git a/src/pkg/runtime/cgo/gcc_freebsd_386.c b/src/pkg/runtime/cgo/gcc_freebsd_386.c
index 7c62a1b..6797824 100644
--- a/src/pkg/runtime/cgo/gcc_freebsd_386.c
+++ b/src/pkg/runtime/cgo/gcc_freebsd_386.c
@@ -10,13 +10,15 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -66,15 +68,9 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On FreeBSD/ELF, the thread local storage
-	 * is just before %gs:0.  Our dynamic 8.out's reserve 8 bytes
-	 * for the two words g and m at %gs:-8 and %gs:-4.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movl %0, %%gs:-8\n"	// MOVL g, -8(GS)
-		"movl %1, %%gs:-4\n"	// MOVL m, -4(GS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
 
 	crosscall_386(ts.fn);
 	return nil;
diff --git a/src/pkg/runtime/cgo/gcc_freebsd_amd64.c b/src/pkg/runtime/cgo/gcc_freebsd_amd64.c
index 6be8bd2..eb342a2 100644
--- a/src/pkg/runtime/cgo/gcc_freebsd_amd64.c
+++ b/src/pkg/runtime/cgo/gcc_freebsd_amd64.c
@@ -10,20 +10,21 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
 	pthread_attr_destroy(&attr);
 }
 
-
 void
 _cgo_sys_thread_start(ThreadStart *ts)
 {
@@ -67,15 +68,10 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On FreeBSD/ELF, the thread local storage
-	 * is just before %fs:0.  Our dynamic 6.out's reserve 16 bytes
-	 * for the two words g and m at %fs:-16 and %fs:-8.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movq %0, %%fs:-16\n"	// MOVL g, -16(FS)
-		"movq %1, %%fs:-8\n"	// MOVL m, -8(FS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
+
 	crosscall_amd64(ts.fn);
 	return nil;
 }
diff --git a/src/pkg/runtime/cgo/gcc_linux_386.c b/src/pkg/runtime/cgo/gcc_linux_386.c
index 9357a63..c25c7b7 100644
--- a/src/pkg/runtime/cgo/gcc_linux_386.c
+++ b/src/pkg/runtime/cgo/gcc_linux_386.c
@@ -8,13 +8,15 @@
 #include "libcgo.h"
 
 static void *threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -69,18 +71,9 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On Linux/ELF, the thread local storage
-	 * is just before %gs:0.  Our dynamic 8.out's reserve 8 bytes
-	 * for the two words g and m at %gs:-8 and %gs:-4.
-	 * Xen requires us to access those words indirect from %gs:0
-	 * which points at itself.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movl %%gs:0, %%eax\n"		// MOVL 0(GS), tmp
-		"movl %0, -8(%%eax)\n"	// MOVL g, -8(GS)
-		"movl %1, -4(%%eax)\n"	// MOVL m, -4(GS)
-		:: "r"(ts.g), "r"(ts.m) : "%eax"
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
 
 	crosscall_386(ts.fn);
 	return nil;
diff --git a/src/pkg/runtime/cgo/gcc_linux_amd64.c b/src/pkg/runtime/cgo/gcc_linux_amd64.c
index bc76117..bd7c88d 100644
--- a/src/pkg/runtime/cgo/gcc_linux_amd64.c
+++ b/src/pkg/runtime/cgo/gcc_linux_amd64.c
@@ -8,13 +8,15 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 void
-x_cgo_init(G* g)
+x_cgo_init(G* g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -64,15 +66,10 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On Linux/ELF, the thread local storage
-	 * is just before %fs:0.  Our dynamic 6.out's reserve 16 bytes
-	 * for the two words g and m at %fs:-16 and %fs:-8.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movq %0, %%fs:-16\n"	// MOVL g, -16(FS)
-		"movq %1, %%fs:-8\n"	// MOVL m, -8(FS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
+
 	crosscall_amd64(ts.fn);
 	return nil;
 }
diff --git a/src/pkg/runtime/cgo/gcc_netbsd_386.c b/src/pkg/runtime/cgo/gcc_netbsd_386.c
index 09b271d..b399e16 100644
--- a/src/pkg/runtime/cgo/gcc_netbsd_386.c
+++ b/src/pkg/runtime/cgo/gcc_netbsd_386.c
@@ -9,13 +9,15 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -65,15 +67,9 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On NetBSD/ELF, the thread local storage
-	 * is just before %gs:0.  Our dynamic 8.out's reserve 8 bytes
-	 * for the two words g and m at %gs:-8 and %gs:-4.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movl %0, %%gs:-8\n"	// MOVL g, -8(GS)
-		"movl %1, %%gs:-4\n"	// MOVL m, -4(GS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
 
 	crosscall_386(ts.fn);
 	return nil;
diff --git a/src/pkg/runtime/cgo/gcc_netbsd_amd64.c b/src/pkg/runtime/cgo/gcc_netbsd_amd64.c
index 080c59b..f27e142 100644
--- a/src/pkg/runtime/cgo/gcc_netbsd_amd64.c
+++ b/src/pkg/runtime/cgo/gcc_netbsd_amd64.c
@@ -9,13 +9,15 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -66,15 +68,10 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On NetBSD/ELF, the thread local storage
-	 * is just before %fs:0.  Our dynamic 6.out's reserve 16 bytes
-	 * for the two words g and m at %fs:-16 and %fs:-8.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movq %0, %%fs:-16\n"	// MOVL g, -16(FS)
-		"movq %1, %%fs:-8\n"	// MOVL m, -8(FS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
+
 	crosscall_amd64(ts.fn);
 	return nil;
 }
diff --git a/src/pkg/runtime/cgo/gcc_openbsd_386.c b/src/pkg/runtime/cgo/gcc_openbsd_386.c
index 80be31b..6422d1b 100644
--- a/src/pkg/runtime/cgo/gcc_openbsd_386.c
+++ b/src/pkg/runtime/cgo/gcc_openbsd_386.c
@@ -11,6 +11,7 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 // TCB_SIZE is sizeof(struct thread_control_block),
 // as defined in /usr/src/lib/librthread/tcb.h
@@ -82,12 +83,13 @@
 }
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 	void *handle;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -154,15 +156,9 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On OpenBSD/ELF, the thread local storage
-	 * is just before %gs:0.  Our dynamic 8.out's reserve 8 bytes
-	 * for the two words g and m at %gs:-8 and %gs:-4.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movl %0, %%gs:-8\n"	// MOVL g, -8(GS)
-		"movl %1, %%gs:-4\n"	// MOVL m, -4(GS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
 
 	crosscall_386(ts.fn);
 	return nil;
diff --git a/src/pkg/runtime/cgo/gcc_openbsd_amd64.c b/src/pkg/runtime/cgo/gcc_openbsd_amd64.c
index e9cc818..5a5a171 100644
--- a/src/pkg/runtime/cgo/gcc_openbsd_amd64.c
+++ b/src/pkg/runtime/cgo/gcc_openbsd_amd64.c
@@ -11,6 +11,7 @@
 #include "libcgo.h"
 
 static void* threadentry(void*);
+static void (*setmg_gcc)(void*, void*);
 
 // TCB_SIZE is sizeof(struct thread_control_block),
 // as defined in /usr/src/lib/librthread/tcb.h
@@ -82,12 +83,13 @@
 }
 
 void
-x_cgo_init(G *g)
+x_cgo_init(G *g, void (*setmg)(void*, void*))
 {
 	pthread_attr_t attr;
 	size_t size;
 	void *handle;
 
+	setmg_gcc = setmg;
 	pthread_attr_init(&attr);
 	pthread_attr_getstacksize(&attr, &size);
 	g->stackguard = (uintptr)&attr - size + 4096;
@@ -155,15 +157,10 @@
 	ts.g->stackguard = (uintptr)&ts - ts.g->stackguard + 4096;
 
 	/*
-	 * Set specific keys.  On OpenBSD/ELF, the thread local storage
-	 * is just before %fs:0.  Our dynamic 6.out's reserve 16 bytes
-	 * for the two words g and m at %fs:-16 and %fs:-8.
+	 * Set specific keys.
 	 */
-	asm volatile (
-		"movq %0, %%fs:-16\n"	// MOVL g, -16(FS)
-		"movq %1, %%fs:-8\n"	// MOVL m, -8(FS)
-		:: "r"(ts.g), "r"(ts.m)
-	);
+	setmg_gcc((void*)ts.m, (void*)ts.g);
+
 	crosscall_amd64(ts.fn);
 	return nil;
 }