amd64: use segment memory for thread-local storage
Returns R14 and R15 to the available register pool.
Plays more nicely with ELF ABI C code.
In particular, our signal handlers will no longer crash
when a signal arrives during execution of a cgo C call.
Fixes #720.
R=ken2, r
CC=golang-dev
https://golang.org/cl/1847051
diff --git a/src/cmd/6a/a.y b/src/cmd/6a/a.y
index 804f638..6341ba7 100644
--- a/src/cmd/6a/a.y
+++ b/src/cmd/6a/a.y
@@ -453,6 +453,12 @@
$$.type = D_INDIR+D_SP;
$$.offset = $1;
}
+| con '(' LSREG ')'
+ {
+ $$ = nullgen;
+ $$.type = D_INDIR+$3;
+ $$.offset = $1;
+ }
| con '(' LLREG '*' con ')'
{
$$ = nullgen;
diff --git a/src/cmd/6c/cgen.c b/src/cmd/6c/cgen.c
index 39452c9..dd8573c 100644
--- a/src/cmd/6c/cgen.c
+++ b/src/cmd/6c/cgen.c
@@ -57,6 +57,12 @@
l = n->left;
r = n->right;
o = n->op;
+
+ if(n->op == OEXREG || (nn != Z && nn->op == OEXREG)) {
+ gmove(n, nn);
+ return;
+ }
+
if(n->addable >= INDEXED) {
if(nn == Z) {
switch(o) {
diff --git a/src/cmd/6c/peep.c b/src/cmd/6c/peep.c
index 01793bf..13fd25e 100644
--- a/src/cmd/6c/peep.c
+++ b/src/cmd/6c/peep.c
@@ -797,8 +797,6 @@
return 3;
case ACALL: /* funny */
- if(REGEXT && v->type <= REGEXT && v->type > exregoffset)
- return 2;
if(REGARG >= 0 && v->type == REGARG)
return 2;
diff --git a/src/cmd/6c/sgen.c b/src/cmd/6c/sgen.c
index b8247a1..42045f8 100644
--- a/src/cmd/6c/sgen.c
+++ b/src/cmd/6c/sgen.c
@@ -131,6 +131,10 @@
n->addable = 11;
break;
+ case OEXREG:
+ n->addable = 0;
+ break;
+
case OREGISTER:
n->addable = 12;
break;
diff --git a/src/cmd/6c/txt.c b/src/cmd/6c/txt.c
index f96c40f..9a94ca2 100644
--- a/src/cmd/6c/txt.c
+++ b/src/cmd/6c/txt.c
@@ -38,8 +38,6 @@
thechar = '6';
thestring = "amd64";
- exregoffset = REGEXT;
- exfregoffset = FREGEXT;
listinit();
nstring = 0;
mnstring = 0;
@@ -491,6 +489,10 @@
a->sym = S;
break;
+ case OEXREG:
+ a->type = D_INDIR + D_GS;
+ a->offset = n->reg - 1;
+ break;
case OIND:
naddr(n->left, a);
@@ -1502,11 +1504,11 @@
int32 o;
if(typechlpv[t->etype]) {
- if(exregoffset <= REGEXT-4)
+ if(exregoffset >= 64)
return 0;
o = exregoffset;
- exregoffset--;
- return o;
+ exregoffset += 8;
+ return o+1; // +1 to avoid 0 == failure; naddr's case OEXREG will subtract 1.
}
return 0;
}
diff --git a/src/cmd/6l/asm.c b/src/cmd/6l/asm.c
index b45557e..fa419b6 100644
--- a/src/cmd/6l/asm.c
+++ b/src/cmd/6l/asm.c
@@ -821,6 +821,17 @@
ph->type = PT_DYNAMIC;
ph->flags = PF_R + PF_W;
phsh(ph, sh);
+
+ /*
+ * Thread-local storage segment (really just size).
+ */
+ if(tlsoffset != 0) {
+ ph = newElfPhdr();
+ ph->type = PT_TLS;
+ ph->flags = PF_R;
+ ph->memsz = -tlsoffset;
+ ph->align = 8;
+ }
}
ph = newElfPhdr();
diff --git a/src/cmd/6l/l.h b/src/cmd/6l/l.h
index eb796e2..23ca223 100644
--- a/src/cmd/6l/l.h
+++ b/src/cmd/6l/l.h
@@ -340,6 +340,7 @@
EXTERN int32 symsize;
EXTERN Prog* textp;
EXTERN vlong textsize;
+EXTERN int tlsoffset;
EXTERN int version;
EXTERN Prog zprg;
EXTERN int dtype;
diff --git a/src/cmd/6l/obj.c b/src/cmd/6l/obj.c
index 724f112..3b981a6 100644
--- a/src/cmd/6l/obj.c
+++ b/src/cmd/6l/obj.c
@@ -165,6 +165,11 @@
INITRND = 4096;
break;
case 6: /* apple MACH */
+ /*
+ * OS X system constant - offset from 0(GS) to our TLS.
+ * Explained in ../../libcgo/darwin_amd64.c.
+ */
+ tlsoffset = 0x8a0;
machoinit();
HEADR = MACHORESERVE;
if(INITRND == -1)
@@ -176,6 +181,13 @@
break;
case 7: /* elf64 executable */
case 9: /* freebsd */
+ /*
+ * ELF uses TLS offset negative from FS.
+ * Translate 0(FS) and 8(FS) into -16(FS) and -8(FS).
+ * Also known to ../../pkg/runtime/linux/amd64/sys.s
+ * and ../../libcgo/linux_amd64.s.
+ */
+ tlsoffset = -16;
elfinit();
HEADR = ELFRESERVE;
if(INITTEXT == -1)
@@ -434,6 +446,8 @@
adrgotype = zsym(pn, f, h);
s = a->sym;
t = a->type;
+ if(t == D_INDIR+D_GS)
+ a->offset += tlsoffset;
if(t != D_AUTO && t != D_PARAM) {
if(s && adrgotype)
s->gotype = adrgotype;
diff --git a/src/cmd/6l/pass.c b/src/cmd/6l/pass.c
index 8eced50..5fedee2 100644
--- a/src/cmd/6l/pass.c
+++ b/src/cmd/6l/pass.c
@@ -421,6 +421,13 @@
s = lookup("exit", 0);
vexit = s->value;
for(p = firstp; p != P; p = p->link) {
+ if(HEADTYPE == 7 || HEADTYPE == 9) {
+ // ELF uses FS instead of GS.
+ if(p->from.type == D_INDIR+D_GS)
+ p->from.type = D_INDIR+D_FS;
+ if(p->to.type == D_INDIR+D_GS)
+ p->to.type = D_INDIR+D_FS;
+ }
if(p->as == ATEXT)
curtext = p;
if(p->as == ACALL || (p->as == AJMP && p->to.type != D_BRANCH)) {
@@ -663,6 +670,15 @@
diag("nosplit func likely to overflow stack");
if(!(p->from.scale & NOSPLIT)) {
+ p = appendp(p); // load g into CX
+ p->as = AMOVQ;
+ if(HEADTYPE == 7 || HEADTYPE == 9) // ELF uses FS
+ p->from.type = D_INDIR+D_FS;
+ else
+ p->from.type = D_INDIR+D_GS;
+ p->from.offset = tlsoffset+0;
+ p->to.type = D_CX;
+
if(debug['K']) {
// 6l -K means check not only for stack
// overflow but stack underflow.
@@ -672,7 +688,7 @@
p = appendp(p);
p->as = ACMPQ;
- p->from.type = D_INDIR+D_R15;
+ p->from.type = D_INDIR+D_CX;
p->from.offset = 8;
p->to.type = D_SP;
@@ -694,7 +710,7 @@
p = appendp(p);
p->as = ACMPQ;
p->from.type = D_SP;
- p->to.type = D_INDIR+D_R15;
+ p->to.type = D_INDIR+D_CX;
if(q1) {
q1->pcond = p;
q1 = P;
@@ -714,7 +730,7 @@
p = appendp(p);
p->as = ACMPQ;
p->from.type = D_AX;
- p->to.type = D_INDIR+D_R15;
+ p->to.type = D_INDIR+D_CX;
}
// common
@@ -824,7 +840,7 @@
// function is marked as nosplit.
p = appendp(p);
p->as = AMOVQ;
- p->from.type = D_INDIR+D_R15;
+ p->from.type = D_INDIR+D_CX;
p->from.offset = 0;
p->to.type = D_BX;
diff --git a/src/cmd/6l/span.c b/src/cmd/6l/span.c
index 15f931b..7e0086e 100644
--- a/src/cmd/6l/span.c
+++ b/src/cmd/6l/span.c
@@ -445,6 +445,24 @@
}
int
+prefixof(Adr *a)
+{
+ switch(a->type) {
+ case D_INDIR+D_CS:
+ return 0x2e;
+ case D_INDIR+D_DS:
+ return 0x3e;
+ case D_INDIR+D_ES:
+ return 0x26;
+ case D_INDIR+D_FS:
+ return 0x64;
+ case D_INDIR+D_GS:
+ return 0x65;
+ }
+ return 0;
+}
+
+int
oclass(Adr *a)
{
vlong v;
@@ -879,7 +897,7 @@
if(t >= D_INDIR) {
t -= D_INDIR;
rexflag |= (regrex[t] & Rxb) | rex;
- if(t == D_NONE) {
+ if(t == D_NONE || (D_CS <= t && t <= D_GS)) {
if(asmode != 64){
*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
put4(v);
@@ -1173,7 +1191,7 @@
Prog *q, pp;
uchar *t;
Movtab *mo;
- int z, op, ft, tt, xo, l;
+ int z, op, ft, tt, xo, l, pre;
vlong v;
o = opindex[p->as];
@@ -1181,6 +1199,13 @@
diag("asmins: missing op %P", p);
return;
}
+
+ pre = prefixof(&p->from);
+ if(pre)
+ *andptr++ = pre;
+ pre = prefixof(&p->to);
+ if(pre)
+ *andptr++ = pre;
if(p->ft == 0)
p->ft = oclass(&p->from);
@@ -1748,7 +1773,7 @@
n = andptr - and;
for(np = 0; np < n; np++) {
c = and[np];
- if(c != 0x66 && c != 0xf2 && c != 0xf3 && c != 0x67)
+ if(c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26)
break;
}
memmove(and+np+1, and+np, n-np);
diff --git a/src/cmd/8c/txt.c b/src/cmd/8c/txt.c
index 194599c..4cfd7bc 100644
--- a/src/cmd/8c/txt.c
+++ b/src/cmd/8c/txt.c
@@ -1403,7 +1403,6 @@
return o+1; // +1 to avoid 0 == failure; naddr case OEXREG will -1.
}
- USED(t);
return 0;
}
diff --git a/src/cmd/8l/obj.c b/src/cmd/8l/obj.c
index f3584bf..9067e94 100644
--- a/src/cmd/8l/obj.c
+++ b/src/cmd/8l/obj.c
@@ -227,7 +227,7 @@
case 7: /* elf32 executable */
case 9:
/*
- * Linux ELF uses TLS offsets negative from %gs.
+ * ELF uses TLS offsets negative from %gs.
* Translate 0(GS) and 4(GS) into -8(GS) and -4(GS).
* Also known to ../../pkg/runtime/linux/386/sys.s
* and ../../libcgo/linux_386.c.
diff --git a/src/cmd/cc/com.c b/src/cmd/cc/com.c
index 5cbe8b7..b1a8a47 100644
--- a/src/cmd/cc/com.c
+++ b/src/cmd/cc/com.c
@@ -638,10 +638,10 @@
n->addable = 1;
if(n->class == CEXREG) {
n->op = OREGISTER;
- // on 386, "extern register" generates
+ // on 386 or amd64, "extern register" generates
// memory references relative to the
- // fs segment.
- if(thechar == '8') // [sic]
+ // gs or fs segment.
+ if(thechar == '8' || thechar == '6') // [sic]
n->op = OEXREG;
n->reg = n->sym->offset;
n->xoffset = 0;