amd64: use segment memory for thread-local storage
Returns R14 and R15 to the available register pool.
Plays more nicely with ELF ABI C code.
In particular, our signal handlers will no longer crash
when a signal arrives during execution of a cgo C call.

Fixes #720.

R=ken2, r
CC=golang-dev
https://golang.org/cl/1847051
diff --git a/src/cmd/6l/obj.c b/src/cmd/6l/obj.c
index 724f112..3b981a6 100644
--- a/src/cmd/6l/obj.c
+++ b/src/cmd/6l/obj.c
@@ -165,6 +165,11 @@
 			INITRND = 4096;
 		break;
 	case 6:	/* apple MACH */
+		/*
+		 * OS X system constant - offset from 0(GS) to our TLS.
+		 * Explained in ../../libcgo/darwin_amd64.c.
+		 */
+		tlsoffset = 0x8a0;
 		machoinit();
 		HEADR = MACHORESERVE;
 		if(INITRND == -1)
@@ -176,6 +181,13 @@
 		break;
 	case 7:	/* elf64 executable */
 	case 9: /* freebsd */
+		/*
+		 * ELF uses TLS offset negative from FS.
+		 * Translate 0(FS) and 8(FS) into -16(FS) and -8(FS).
+		 * Also known to ../../pkg/runtime/linux/amd64/sys.s
+		 * and ../../libcgo/linux_amd64.s.
+		 */
+		tlsoffset = -16;
 		elfinit();
 		HEADR = ELFRESERVE;
 		if(INITTEXT == -1)
@@ -434,6 +446,8 @@
 		adrgotype = zsym(pn, f, h);
 	s = a->sym;
 	t = a->type;
+	if(t == D_INDIR+D_GS)
+		a->offset += tlsoffset;
 	if(t != D_AUTO && t != D_PARAM) {
 		if(s && adrgotype)
 			s->gotype = adrgotype;