blob: ef11e2adb94d231a86d6d4c50251a713e667732e [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include <u.h>
#include <libc.h>
#include "gg.h"
/*
* attempt to generate 64-bit
* res = n
* return 1 on success, 0 if op not handled.
*/
void
cgen64(Node *n, Node *res)
{
Node t1, t2, *l, *r;
Node lo1, lo2, hi1, hi2;
Node al, ah, bl, bh, cl, ch, s, n1, creg;
Prog *p1, *p2, *p3, *p4, *p5, *p6;
uint64 v;
if(res->op != OINDREG && res->op != ONAME) {
dump("n", n);
dump("res", res);
fatal("cgen64 %O of %O", n->op, res->op);
}
l = n->left;
if(!l->addable) {
tempname(&t1, l->type);
cgen(l, &t1);
l = &t1;
}
split64(l, &lo1, &hi1);
switch(n->op) {
default:
fatal("cgen64 %O", n->op);
case OMINUS:
split64(res, &lo2, &hi2);
regalloc(&t1, lo1.type, N);
regalloc(&al, lo1.type, N);
regalloc(&ah, hi1.type, N);
gins(AMOVW, &lo1, &al);
gins(AMOVW, &hi1, &ah);
gmove(ncon(0), &t1);
p1 = gins(ASUB, &al, &t1);
p1->scond |= C_SBIT;
gins(AMOVW, &t1, &lo2);
gmove(ncon(0), &t1);
gins(ASBC, &ah, &t1);
gins(AMOVW, &t1, &hi2);
regfree(&t1);
regfree(&al);
regfree(&ah);
splitclean();
splitclean();
return;
case OCOM:
regalloc(&t1, lo1.type, N);
gmove(ncon(-1), &t1);
split64(res, &lo2, &hi2);
regalloc(&n1, lo1.type, N);
gins(AMOVW, &lo1, &n1);
gins(AEOR, &t1, &n1);
gins(AMOVW, &n1, &lo2);
gins(AMOVW, &hi1, &n1);
gins(AEOR, &t1, &n1);
gins(AMOVW, &n1, &hi2);
regfree(&t1);
regfree(&n1);
splitclean();
splitclean();
return;
case OADD:
case OSUB:
case OMUL:
case OLSH:
case ORSH:
case OAND:
case OOR:
case OXOR:
case OLROT:
// binary operators.
// common setup below.
break;
}
// setup for binary operators
r = n->right;
if(r != N && !r->addable) {
tempname(&t2, r->type);
cgen(r, &t2);
r = &t2;
}
if(is64(r->type))
split64(r, &lo2, &hi2);
regalloc(&al, lo1.type, N);
regalloc(&ah, hi1.type, N);
// Do op. Leave result in ah:al.
switch(n->op) {
default:
fatal("cgen64: not implemented: %N\n", n);
case OADD:
// TODO: Constants
regalloc(&bl, types[TPTR32], N);
regalloc(&bh, types[TPTR32], N);
gins(AMOVW, &hi1, &ah);
gins(AMOVW, &lo1, &al);
gins(AMOVW, &hi2, &bh);
gins(AMOVW, &lo2, &bl);
p1 = gins(AADD, &bl, &al);
p1->scond |= C_SBIT;
gins(AADC, &bh, &ah);
regfree(&bl);
regfree(&bh);
break;
case OSUB:
// TODO: Constants.
regalloc(&bl, types[TPTR32], N);
regalloc(&bh, types[TPTR32], N);
gins(AMOVW, &lo1, &al);
gins(AMOVW, &hi1, &ah);
gins(AMOVW, &lo2, &bl);
gins(AMOVW, &hi2, &bh);
p1 = gins(ASUB, &bl, &al);
p1->scond |= C_SBIT;
gins(ASBC, &bh, &ah);
regfree(&bl);
regfree(&bh);
break;
case OMUL:
// TODO(kaib): this can be done with 4 regs and does not need 6
regalloc(&bl, types[TPTR32], N);
regalloc(&bh, types[TPTR32], N);
regalloc(&cl, types[TPTR32], N);
regalloc(&ch, types[TPTR32], N);
// load args into bh:bl and bh:bl.
gins(AMOVW, &hi1, &bh);
gins(AMOVW, &lo1, &bl);
gins(AMOVW, &hi2, &ch);
gins(AMOVW, &lo2, &cl);
// bl * cl -> ah al
p1 = gins(AMULLU, N, N);
p1->from.type = D_REG;
p1->from.reg = bl.val.u.reg;
p1->reg = cl.val.u.reg;
p1->to.type = D_REGREG;
p1->to.reg = ah.val.u.reg;
p1->to.offset = al.val.u.reg;
//print("%P\n", p1);
// bl * ch + ah -> ah
p1 = gins(AMULA, N, N);
p1->from.type = D_REG;
p1->from.reg = bl.val.u.reg;
p1->reg = ch.val.u.reg;
p1->to.type = D_REGREG2;
p1->to.reg = ah.val.u.reg;
p1->to.offset = ah.val.u.reg;
//print("%P\n", p1);
// bh * cl + ah -> ah
p1 = gins(AMULA, N, N);
p1->from.type = D_REG;
p1->from.reg = bh.val.u.reg;
p1->reg = cl.val.u.reg;
p1->to.type = D_REGREG2;
p1->to.reg = ah.val.u.reg;
p1->to.offset = ah.val.u.reg;
//print("%P\n", p1);
regfree(&bh);
regfree(&bl);
regfree(&ch);
regfree(&cl);
break;
case OLROT:
// We only rotate by a constant c in [0,64).
// if c >= 32:
// lo, hi = hi, lo
// c -= 32
// if c == 0:
// no-op
// else:
// t = hi
// shld hi:lo, c
// shld lo:t, c
v = mpgetfix(r->val.u.xval);
regalloc(&bl, lo1.type, N);
regalloc(&bh, hi1.type, N);
if(v >= 32) {
// reverse during load to do the first 32 bits of rotate
v -= 32;
gins(AMOVW, &hi1, &bl);
gins(AMOVW, &lo1, &bh);
} else {
gins(AMOVW, &hi1, &bh);
gins(AMOVW, &lo1, &bl);
}
if(v == 0) {
gins(AMOVW, &bh, &ah);
gins(AMOVW, &bl, &al);
} else {
// rotate by 1 <= v <= 31
// MOVW bl<<v, al
// MOVW bh<<v, ah
// OR bl>>(32-v), ah
// OR bh>>(32-v), al
gshift(AMOVW, &bl, SHIFT_LL, v, &al);
gshift(AMOVW, &bh, SHIFT_LL, v, &ah);
gshift(AORR, &bl, SHIFT_LR, 32-v, &ah);
gshift(AORR, &bh, SHIFT_LR, 32-v, &al);
}
regfree(&bl);
regfree(&bh);
break;
case OLSH:
regalloc(&bl, lo1.type, N);
regalloc(&bh, hi1.type, N);
gins(AMOVW, &hi1, &bh);
gins(AMOVW, &lo1, &bl);
if(r->op == OLITERAL) {
v = mpgetfix(r->val.u.xval);
if(v >= 64) {
// TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al)
// here and below (verify it optimizes to EOR)
gins(AEOR, &al, &al);
gins(AEOR, &ah, &ah);
} else
if(v > 32) {
gins(AEOR, &al, &al);
// MOVW bl<<(v-32), ah
gshift(AMOVW, &bl, SHIFT_LL, (v-32), &ah);
} else
if(v == 32) {
gins(AEOR, &al, &al);
gins(AMOVW, &bl, &ah);
} else
if(v > 0) {
// MOVW bl<<v, al
gshift(AMOVW, &bl, SHIFT_LL, v, &al);
// MOVW bh<<v, ah
gshift(AMOVW, &bh, SHIFT_LL, v, &ah);
// OR bl>>(32-v), ah
gshift(AORR, &bl, SHIFT_LR, 32-v, &ah);
} else {
gins(AMOVW, &bl, &al);
gins(AMOVW, &bh, &ah);
}
goto olsh_break;
}
regalloc(&s, types[TUINT32], N);
regalloc(&creg, types[TUINT32], N);
if (is64(r->type)) {
// shift is >= 1<<32
split64(r, &cl, &ch);
gmove(&ch, &s);
gins(ATST, &s, N);
p6 = gbranch(ABNE, T, 0);
gmove(&cl, &s);
splitclean();
} else {
gmove(r, &s);
p6 = P;
}
gins(ATST, &s, N);
// shift == 0
p1 = gins(AMOVW, &bl, &al);
p1->scond = C_SCOND_EQ;
p1 = gins(AMOVW, &bh, &ah);
p1->scond = C_SCOND_EQ;
p2 = gbranch(ABEQ, T, 0);
// shift is < 32
nodconst(&n1, types[TUINT32], 32);
gmove(&n1, &creg);
gcmp(ACMP, &s, &creg);
// MOVW.LO bl<<s, al
p1 = gregshift(AMOVW, &bl, SHIFT_LL, &s, &al);
p1->scond = C_SCOND_LO;
// MOVW.LO bh<<s, ah
p1 = gregshift(AMOVW, &bh, SHIFT_LL, &s, &ah);
p1->scond = C_SCOND_LO;
// SUB.LO s, creg
p1 = gins(ASUB, &s, &creg);
p1->scond = C_SCOND_LO;
// OR.LO bl>>creg, ah
p1 = gregshift(AORR, &bl, SHIFT_LR, &creg, &ah);
p1->scond = C_SCOND_LO;
// BLO end
p3 = gbranch(ABLO, T, 0);
// shift == 32
p1 = gins(AEOR, &al, &al);
p1->scond = C_SCOND_EQ;
p1 = gins(AMOVW, &bl, &ah);
p1->scond = C_SCOND_EQ;
p4 = gbranch(ABEQ, T, 0);
// shift is < 64
nodconst(&n1, types[TUINT32], 64);
gmove(&n1, &creg);
gcmp(ACMP, &s, &creg);
// EOR.LO al, al
p1 = gins(AEOR, &al, &al);
p1->scond = C_SCOND_LO;
// MOVW.LO creg>>1, creg
p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg);
p1->scond = C_SCOND_LO;
// SUB.LO creg, s
p1 = gins(ASUB, &creg, &s);
p1->scond = C_SCOND_LO;
// MOVW bl<<s, ah
p1 = gregshift(AMOVW, &bl, SHIFT_LL, &s, &ah);
p1->scond = C_SCOND_LO;
p5 = gbranch(ABLO, T, 0);
// shift >= 64
if (p6 != P) patch(p6, pc);
gins(AEOR, &al, &al);
gins(AEOR, &ah, &ah);
patch(p2, pc);
patch(p3, pc);
patch(p4, pc);
patch(p5, pc);
regfree(&s);
regfree(&creg);
olsh_break:
regfree(&bl);
regfree(&bh);
break;
case ORSH:
regalloc(&bl, lo1.type, N);
regalloc(&bh, hi1.type, N);
gins(AMOVW, &hi1, &bh);
gins(AMOVW, &lo1, &bl);
if(r->op == OLITERAL) {
v = mpgetfix(r->val.u.xval);
if(v >= 64) {
if(bh.type->etype == TINT32) {
// MOVW bh->31, al
gshift(AMOVW, &bh, SHIFT_AR, 31, &al);
// MOVW bh->31, ah
gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
} else {
gins(AEOR, &al, &al);
gins(AEOR, &ah, &ah);
}
} else
if(v > 32) {
if(bh.type->etype == TINT32) {
// MOVW bh->(v-32), al
gshift(AMOVW, &bh, SHIFT_AR, v-32, &al);
// MOVW bh->31, ah
gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
} else {
// MOVW bh>>(v-32), al
gshift(AMOVW, &bh, SHIFT_LR, v-32, &al);
gins(AEOR, &ah, &ah);
}
} else
if(v == 32) {
gins(AMOVW, &bh, &al);
if(bh.type->etype == TINT32) {
// MOVW bh->31, ah
gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
} else {
gins(AEOR, &ah, &ah);
}
} else
if( v > 0) {
// MOVW bl>>v, al
gshift(AMOVW, &bl, SHIFT_LR, v, &al);
// OR bh<<(32-v), al
gshift(AORR, &bh, SHIFT_LL, 32-v, &al);
if(bh.type->etype == TINT32) {
// MOVW bh->v, ah
gshift(AMOVW, &bh, SHIFT_AR, v, &ah);
} else {
// MOVW bh>>v, ah
gshift(AMOVW, &bh, SHIFT_LR, v, &ah);
}
} else {
gins(AMOVW, &bl, &al);
gins(AMOVW, &bh, &ah);
}
goto orsh_break;
}
regalloc(&s, types[TUINT32], N);
regalloc(&creg, types[TUINT32], N);
if(is64(r->type)) {
// shift is >= 1<<32
split64(r, &cl, &ch);
gmove(&ch, &s);
gins(ATST, &s, N);
if(bh.type->etype == TINT32)
p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
else
p1 = gins(AEOR, &ah, &ah);
p1->scond = C_SCOND_NE;
p6 = gbranch(ABNE, T, 0);
gmove(&cl, &s);
splitclean();
} else {
gmove(r, &s);
p6 = P;
}
gins(ATST, &s, N);
// shift == 0
p1 = gins(AMOVW, &bl, &al);
p1->scond = C_SCOND_EQ;
p1 = gins(AMOVW, &bh, &ah);
p1->scond = C_SCOND_EQ;
p2 = gbranch(ABEQ, T, 0);
// check if shift is < 32
nodconst(&n1, types[TUINT32], 32);
gmove(&n1, &creg);
gcmp(ACMP, &s, &creg);
// MOVW.LO bl>>s, al
p1 = gregshift(AMOVW, &bl, SHIFT_LR, &s, &al);
p1->scond = C_SCOND_LO;
// SUB.LO s,creg
p1 = gins(ASUB, &s, &creg);
p1->scond = C_SCOND_LO;
// OR.LO bh<<(32-s), al
p1 = gregshift(AORR, &bh, SHIFT_LL, &creg, &al);
p1->scond = C_SCOND_LO;
if(bh.type->etype == TINT32) {
// MOVW bh->s, ah
p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &ah);
} else {
// MOVW bh>>s, ah
p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &ah);
}
p1->scond = C_SCOND_LO;
// BLO end
p3 = gbranch(ABLO, T, 0);
// shift == 32
p1 = gins(AMOVW, &bh, &al);
p1->scond = C_SCOND_EQ;
if(bh.type->etype == TINT32)
gshift(AMOVW, &bh, SHIFT_AR, 31, &ah);
else
gins(AEOR, &ah, &ah);
p4 = gbranch(ABEQ, T, 0);
// check if shift is < 64
nodconst(&n1, types[TUINT32], 64);
gmove(&n1, &creg);
gcmp(ACMP, &s, &creg);
// MOVW.LO creg>>1, creg
p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg);
p1->scond = C_SCOND_LO;
// SUB.LO creg, s
p1 = gins(ASUB, &creg, &s);
p1->scond = C_SCOND_LO;
if(bh.type->etype == TINT32) {
// MOVW bh->(s-32), al
p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &al);
p1->scond = C_SCOND_LO;
} else {
// MOVW bh>>(v-32), al
p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &al);
p1->scond = C_SCOND_LO;
}
// BLO end
p5 = gbranch(ABLO, T, 0);
// s >= 64
if(p6 != P)
patch(p6, pc);
if(bh.type->etype == TINT32) {
// MOVW bh->31, al
gshift(AMOVW, &bh, SHIFT_AR, 31, &al);
} else {
gins(AEOR, &al, &al);
}
patch(p2, pc);
patch(p3, pc);
patch(p4, pc);
patch(p5, pc);
regfree(&s);
regfree(&creg);
orsh_break:
regfree(&bl);
regfree(&bh);
break;
case OXOR:
case OAND:
case OOR:
// TODO(kaib): literal optimizations
// make constant the right side (it usually is anyway).
// if(lo1.op == OLITERAL) {
// nswap(&lo1, &lo2);
// nswap(&hi1, &hi2);
// }
// if(lo2.op == OLITERAL) {
// // special cases for constants.
// lv = mpgetfix(lo2.val.u.xval);
// hv = mpgetfix(hi2.val.u.xval);
// splitclean(); // right side
// split64(res, &lo2, &hi2);
// switch(n->op) {
// case OXOR:
// gmove(&lo1, &lo2);
// gmove(&hi1, &hi2);
// switch(lv) {
// case 0:
// break;
// case 0xffffffffu:
// gins(ANOTL, N, &lo2);
// break;
// default:
// gins(AXORL, ncon(lv), &lo2);
// break;
// }
// switch(hv) {
// case 0:
// break;
// case 0xffffffffu:
// gins(ANOTL, N, &hi2);
// break;
// default:
// gins(AXORL, ncon(hv), &hi2);
// break;
// }
// break;
// case OAND:
// switch(lv) {
// case 0:
// gins(AMOVL, ncon(0), &lo2);
// break;
// default:
// gmove(&lo1, &lo2);
// if(lv != 0xffffffffu)
// gins(AANDL, ncon(lv), &lo2);
// break;
// }
// switch(hv) {
// case 0:
// gins(AMOVL, ncon(0), &hi2);
// break;
// default:
// gmove(&hi1, &hi2);
// if(hv != 0xffffffffu)
// gins(AANDL, ncon(hv), &hi2);
// break;
// }
// break;
// case OOR:
// switch(lv) {
// case 0:
// gmove(&lo1, &lo2);
// break;
// case 0xffffffffu:
// gins(AMOVL, ncon(0xffffffffu), &lo2);
// break;
// default:
// gmove(&lo1, &lo2);
// gins(AORL, ncon(lv), &lo2);
// break;
// }
// switch(hv) {
// case 0:
// gmove(&hi1, &hi2);
// break;
// case 0xffffffffu:
// gins(AMOVL, ncon(0xffffffffu), &hi2);
// break;
// default:
// gmove(&hi1, &hi2);
// gins(AORL, ncon(hv), &hi2);
// break;
// }
// break;
// }
// splitclean();
// splitclean();
// goto out;
// }
regalloc(&n1, lo1.type, N);
gins(AMOVW, &lo1, &al);
gins(AMOVW, &hi1, &ah);
gins(AMOVW, &lo2, &n1);
gins(optoas(n->op, lo1.type), &n1, &al);
gins(AMOVW, &hi2, &n1);
gins(optoas(n->op, lo1.type), &n1, &ah);
regfree(&n1);
break;
}
if(is64(r->type))
splitclean();
splitclean();
split64(res, &lo1, &hi1);
gins(AMOVW, &al, &lo1);
gins(AMOVW, &ah, &hi1);
splitclean();
//out:
regfree(&al);
regfree(&ah);
}
/*
* generate comparison of nl, nr, both 64-bit.
* nl is memory; nr is constant or memory.
*/
void
cmp64(Node *nl, Node *nr, int op, int likely, Prog *to)
{
Node lo1, hi1, lo2, hi2, r1, r2;
Prog *br;
Type *t;
split64(nl, &lo1, &hi1);
split64(nr, &lo2, &hi2);
// compare most significant word;
// if they differ, we're done.
t = hi1.type;
regalloc(&r1, types[TINT32], N);
regalloc(&r2, types[TINT32], N);
gins(AMOVW, &hi1, &r1);
gins(AMOVW, &hi2, &r2);
gcmp(ACMP, &r1, &r2);
regfree(&r1);
regfree(&r2);
br = P;
switch(op) {
default:
fatal("cmp64 %O %T", op, t);
case OEQ:
// cmp hi
// bne L
// cmp lo
// beq to
// L:
br = gbranch(ABNE, T, -likely);
break;
case ONE:
// cmp hi
// bne to
// cmp lo
// bne to
patch(gbranch(ABNE, T, likely), to);
break;
case OGE:
case OGT:
// cmp hi
// bgt to
// blt L
// cmp lo
// bge to (or bgt to)
// L:
patch(gbranch(optoas(OGT, t), T, likely), to);
br = gbranch(optoas(OLT, t), T, -likely);
break;
case OLE:
case OLT:
// cmp hi
// blt to
// bgt L
// cmp lo
// ble to (or jlt to)
// L:
patch(gbranch(optoas(OLT, t), T, likely), to);
br = gbranch(optoas(OGT, t), T, -likely);
break;
}
// compare least significant word
t = lo1.type;
regalloc(&r1, types[TINT32], N);
regalloc(&r2, types[TINT32], N);
gins(AMOVW, &lo1, &r1);
gins(AMOVW, &lo2, &r2);
gcmp(ACMP, &r1, &r2);
regfree(&r1);
regfree(&r2);
// jump again
patch(gbranch(optoas(op, t), T, likely), to);
// point first branch down here if appropriate
if(br != P)
patch(br, pc);
splitclean();
splitclean();
}