more work on divide by constant.
no real change -- disabled because of bugs
R=rsc
OCL=32923
CL=32923
diff --git a/src/cmd/6g/gg.h b/src/cmd/6g/gg.h
index f9f50cc..6ba975b 100644
--- a/src/cmd/6g/gg.h
+++ b/src/cmd/6g/gg.h
@@ -42,6 +42,23 @@
void* reg; // pointer to containing Reg struct
};
+typedef struct Magic Magic;
+struct Magic
+{
+ int w; // input for both - width
+ int s; // output for both - shift
+ int bad; // output for both - unexpected failure
+
+ // magic multiplier for signed literal divisors
+ int64 sd; // input - literal divisor
+ int64 sm; // output - multiplier
+
+ // magic multiplier for unsigned literal divisors
+ uint64 ud; // input - literal divisor
+ uint64 um; // output - multiplier
+ int ua; // output - adder
+};
+
EXTERN Biobuf* bout;
EXTERN int32 dynloc;
EXTERN uchar reg[D_NONE];
@@ -127,6 +144,8 @@
void datagostring(Strlit*, Addr*);
int powtwo(Node*);
Type* tounsigned(Type*);
+void smagic(Magic*);
+void umagic(Magic*);
/*
* obj.c
diff --git a/src/cmd/6g/ggen.c b/src/cmd/6g/ggen.c
index 4e71f75..b0fa9c7 100644
--- a/src/cmd/6g/ggen.c
+++ b/src/cmd/6g/ggen.c
@@ -525,6 +525,36 @@
gmove(dx, res);
}
+static void
+savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
+{
+ int r;
+
+ r = reg[dr];
+
+ nodreg(x, types[TINT64], dr);
+ regalloc(x, t, x);
+
+ // save current ax and dx if they are live
+ // and not the destination
+ memset(oldx, 0, sizeof *oldx);
+ if(r > 0 && !samereg(x, res)) {
+ regalloc(oldx, t, N);
+ gmove(x, oldx);
+ }
+}
+
+static void
+restx(Node *x, Node *oldx)
+{
+ regfree(x);
+
+ if(oldx->op != 0) {
+ gmove(oldx, x);
+ regfree(oldx);
+ }
+}
+
/*
* generate division according to op, one of:
* res = nl / nr
@@ -533,18 +563,20 @@
void
cgen_div(int op, Node *nl, Node *nr, Node *res)
{
- Node ax, dx, oldax, olddx, n1, n2, n3;
- int rax, rdx, n, w;
+ Node ax, dx, cx, oldax, olddx, oldcx;
+ Node n1, n2, n3, savl, savr;
+ int n, w, s;
+ Magic m;
if(nl->ullman >= UINF) {
- tempname(&n1, nl->type);
- cgen(nl, &n1);
- nl = &n1;
+ tempname(&savl, nl->type);
+ cgen(nl, &savl);
+ nl = &savl;
}
if(nr->ullman >= UINF) {
- tempname(&n2, nr->type);
- cgen(nr, &n2);
- nr = &n2;
+ tempname(&savr, nr->type);
+ cgen(nr, &savr);
+ nr = &savr;
}
if(nr->op != OLITERAL)
@@ -552,8 +584,14 @@
// special cases of mod/div
// by a constant
- n = powtwo(nr);
w = nl->type->width*8;
+ s = 0;
+ n = powtwo(nr);
+ if(n >= 1000) {
+ // negative power of 2
+ s = 1;
+ n -= 1000;
+ }
if(n+1 >= w) {
// just sign bit
@@ -571,97 +609,132 @@
switch(n) {
case 0:
// divide by 1
- cgen(nl, res);
+ regalloc(&n1, nl->type, res);
+ cgen(nl, &n1);
+ if(s)
+ gins(optoas(OMINUS, nl->type), N, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
return;
case 1:
// divide by 2
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
- if(issigned[nl->type->etype]) {
- // develop -1 iff nl is negative
- regalloc(&n2, nl->type, N);
- gmove(&n1, &n2);
- nodconst(&n3, nl->type, w-1);
- gins(optoas(ORSH, nl->type), &n3, &n2);
- gins(optoas(OSUB, nl->type), &n2, &n1);
- regfree(&n2);
- }
- nodconst(&n2, nl->type, n);
- gins(optoas(ORSH, nl->type), &n2, &n1);
- gmove(&n1, res);
- regfree(&n1);
- return;
+ if(!issigned[nl->type->etype])
+ break;
+
+ // develop -1 iff nl is negative
+ regalloc(&n2, nl->type, N);
+ gmove(&n1, &n2);
+ nodconst(&n3, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n3, &n2);
+ gins(optoas(OSUB, nl->type), &n2, &n1);
+ regfree(&n2);
+ break;
default:
regalloc(&n1, nl->type, res);
cgen(nl, &n1);
- if(issigned[nl->type->etype]) {
- // develop (2^k)-1 iff nl is negative
- regalloc(&n2, nl->type, N);
- gmove(&n1, &n2);
- nodconst(&n3, nl->type, w-1);
- gins(optoas(ORSH, nl->type), &n3, &n2);
- nodconst(&n3, nl->type, w-n);
- gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
- gins(optoas(OADD, nl->type), &n2, &n1);
- regfree(&n2);
- }
- nodconst(&n2, nl->type, n);
- gins(optoas(ORSH, nl->type), &n2, &n1);
- gmove(&n1, res);
- regfree(&n1);
+ if(!issigned[nl->type->etype])
+ break;
+
+ // develop (2^k)-1 iff nl is negative
+ regalloc(&n2, nl->type, N);
+ gmove(&n1, &n2);
+ nodconst(&n3, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n3, &n2);
+ nodconst(&n3, nl->type, w-n);
+ gins(optoas(ORSH, tounsigned(nl->type)), &n3, &n2);
+ gins(optoas(OADD, nl->type), &n2, &n1);
+ regfree(&n2);
+ break;
}
+ nodconst(&n2, nl->type, n);
+ gins(optoas(ORSH, nl->type), &n2, &n1);
+ if(s)
+ gins(optoas(OMINUS, nl->type), N, &n1);
+ gmove(&n1, res);
+ regfree(&n1);
return;
divbymul:
+goto longdiv;
switch(simtype[nl->type->etype]) {
default:
goto longdiv;
- case TINT32:
+ case TUINT16:
case TUINT32:
- case TINT64:
case TUINT64:
+ m.w = w;
+ m.ud = mpgetfix(nr->val.u.xval);
+ umagic(&m);
+ if(m.bad)
+ break;
+ if(op == OMOD) {
+ // todo
+ break;
+ }
+ if(m.ua != 0) {
+ // todo fixup
+ break;
+ }
break;
+
+ case TINT16:
+ case TINT32:
+ case TINT64:
+ m.w = w;
+ m.sd = mpgetfix(nr->val.u.xval);
+ smagic(&m);
+ if(m.bad)
+ break;
+ if(op == OMOD) {
+ // todo
+ break;
+ }
+ if(m.sm < 0) {
+ // todo fixup
+ break;
+ }
+
+ savex(D_AX, &ax, &oldax, res, nl->type);
+ savex(D_DX, &dx, &olddx, res, nl->type);
+ savex(D_CX, &cx, &oldcx, res, nl->type);
+
+ regalloc(&n1, nl->type, N);
+ cgen(nl, &n1); // num -> reg(n1)
+
+ nodconst(&n2, nl->type, m.sm);
+ gmove(&n2, &ax); // const->ax
+
+ gins(optoas(OMUL, nl->type), &n1, N); // imul reg
+
+ nodconst(&n2, nl->type, m.s);
+ gins(optoas(ORSH, nl->type), &n2, &dx); // shift dx
+
+ nodconst(&n2, nl->type, w-1);
+ gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg
+ gins(optoas(OSUB, nl->type), &n1, &dx); // added
+
+ if(m.sd < 0)
+ gins(optoas(OMINUS, nl->type), N, &dx);
+
+ regfree(&n1);
+ gmove(&dx, res);
+
+ restx(&ax, &oldax);
+ restx(&dx, &olddx);
+ restx(&cx, &oldcx);
+ return;
}
- // todo
goto longdiv;
longdiv:
- rax = reg[D_AX];
- rdx = reg[D_DX];
-
- nodreg(&ax, types[TINT64], D_AX);
- nodreg(&dx, types[TINT64], D_DX);
- regalloc(&ax, nl->type, &ax);
- regalloc(&dx, nl->type, &dx);
-
- // save current ax and dx if they are live
- // and not the destination
- memset(&oldax, 0, sizeof oldax);
- memset(&olddx, 0, sizeof olddx);
- if(rax > 0 && !samereg(&ax, res)) {
- regalloc(&oldax, nl->type, N);
- gmove(&ax, &oldax);
- }
- if(rdx > 0 && !samereg(&dx, res)) {
- regalloc(&olddx, nl->type, N);
- gmove(&dx, &olddx);
- }
-
+ savex(D_AX, &ax, &oldax, res, nl->type);
+ savex(D_DX, &dx, &olddx, res, nl->type);
dodiv(op, nl, nr, res, &ax, &dx);
-
- regfree(&ax);
- regfree(&dx);
-
- if(oldax.op != 0) {
- gmove(&oldax, &ax);
- regfree(&oldax);
- }
- if(olddx.op != 0) {
- gmove(&olddx, &dx);
- regfree(&olddx);
- }
-
+ restx(&ax, &oldax);
+ restx(&dx, &olddx);
}
/*
diff --git a/src/cmd/6g/gsubr.c b/src/cmd/6g/gsubr.c
index 136a8d5..c9d7980 100644
--- a/src/cmd/6g/gsubr.c
+++ b/src/cmd/6g/gsubr.c
@@ -1881,6 +1881,17 @@
b = b<<1;
}
+ if(!issigned[n->type->etype])
+ goto no;
+
+ v = -v;
+ b = 1ULL;
+ for(i=0; i<64; i++) {
+ if(b == v)
+ return i+1000;
+ b = b<<1;
+ }
+
no:
return -1;
}
@@ -1894,6 +1905,7 @@
switch(t->etype) {
default:
print("tounsigned: unknown type %T\n", t);
+ t = T;
break;
case TINT:
t = types[TUINT];
@@ -1913,3 +1925,186 @@
}
return t;
}
+
+void
+smagic(Magic *m)
+{
+ int p;
+ uint64 ad, anc, delta, q1, r1, q2, r2, t, two31;
+ uint64 mask;
+
+ m->bad = 0;
+ switch(m->w) {
+ default:
+ m->bad = 1;
+ return;
+ case 8:
+ mask = 0xffLL;
+ break;
+ case 16:
+ mask = 0xffffLL;
+ break;
+ case 32:
+ mask = 0xffffffffLL;
+ break;
+ case 64:
+ mask = 0xffffffffffffffffLL;
+ break;
+ }
+ two31 = mask ^ (mask>>1);
+
+ p = m->w-1;
+ ad = m->sd;
+ if(m->sd < 0)
+ ad = -m->sd;
+
+ // bad denominators
+ if(ad == 0 || ad == 1 || ad == two31) {
+ m->bad = 1;
+ return;
+ }
+
+ t = two31;
+ ad &= mask;
+
+ anc = t - 1 - t%ad;
+ anc &= mask;
+
+ q1 = two31/anc;
+ r1 = two31 - q1*anc;
+ q1 &= mask;
+ r1 &= mask;
+
+ q2 = two31/ad;
+ r2 = two31 - q2*ad;
+ q2 &= mask;
+ r2 &= mask;
+
+ for(;;) {
+ p++;
+ q1 <<= 1;
+ r1 <<= 1;
+ q1 &= mask;
+ r1 &= mask;
+ if(r1 >= anc) {
+ q1++;
+ r1 -= anc;
+ q1 &= mask;
+ r1 &= mask;
+ }
+
+ q2 <<= 1;
+ r2 <<= 1;
+ q2 &= mask;
+ r2 &= mask;
+ if(r2 >= ad) {
+ q2++;
+ r2 -= ad;
+ q2 &= mask;
+ r2 &= mask;
+ }
+
+ delta = ad - r2;
+ delta &= mask;
+ if(q1 < delta || (q1 == delta && r1 == 0)) {
+ continue;
+ }
+ break;
+ }
+
+ m->sm = q2+1;
+ m->s = p-m->w;
+}
+
+void
+umagic(Magic *m)
+{
+ int p;
+ uint64 nc, delta, q1, r1, q2, r2, two31;
+ uint64 mask;
+
+ m->bad = 0;
+ m->ua = 0;
+
+ switch(m->w) {
+ default:
+ m->bad = 1;
+ return;
+ case 8:
+ mask = 0xffLL;
+ break;
+ case 16:
+ mask = 0xffffLL;
+ break;
+ case 32:
+ mask = 0xffffffffLL;
+ break;
+ case 64:
+ mask = 0xffffffffffffffffLL;
+ break;
+ }
+ two31 = mask ^ (mask>>1);
+
+ m->ud &= mask;
+ if(m->ud == 0 || m->ud == two31) {
+ m->bad = 1;
+ return;
+ }
+ nc = mask - (-m->ud&mask)%m->ud;
+ p = m->w-1;
+
+ q1 = two31/nc;
+ r1 = two31 - q1*nc;
+ q1 &= mask;
+ r1 &= mask;
+
+ q2 = (two31-1) / m->ud;
+ r2 = (two31-1) - q2*m->ud;
+ q2 &= mask;
+ r2 &= mask;
+
+ for(;;) {
+ p++;
+ if(r1 >= nc-r1) {
+ q1 <<= 1;
+ q1++;
+ r1 <<= 1;
+ r1 -= nc;
+ } else {
+ q1 <<= 1;
+ r1 <<= 1;
+ }
+ q1 &= mask;
+ r1 &= mask;
+ if(r2+1 >= m->ud-r2) {
+ if(q2 >= two31-1) {
+ m->ua = 1;
+ }
+ q2 <<= 1;
+ q2++;
+ r2 <<= 1;
+ r2++;
+ r2 -= m->ud;
+ } else {
+ if(q2 >= two31) {
+ m->ua = 1;
+ }
+ q2 <<= 1;
+ r2 <<= 1;
+ r2++;
+ }
+ q2 &= mask;
+ r2 &= mask;
+
+ delta = m->ud - 1 - r2;
+ delta &= mask;
+
+ if(p < m->w+m->w)
+ if(q1 < delta || (q1 == delta && r1 == 0)) {
+ continue;
+ }
+ break;
+ }
+ m->um = q2+1;
+ m->s = p-m->w;
+}