cmd/gc: faster code, mainly for rotate
* Eliminate bounds check on known small shifts.
* Rewrite x<<s | x>>(32-s) as a rotate (constant s).
* More aggressive (but still minimal) range analysis.
R=ken, dave, iant
CC=golang-dev
https://golang.org/cl/6209077
diff --git a/src/cmd/8g/ggen.c b/src/cmd/8g/ggen.c
index 6a45701..4ccf6a0 100644
--- a/src/cmd/8g/ggen.c
+++ b/src/cmd/8g/ggen.c
@@ -630,7 +630,7 @@
* res = nl >> nr
*/
void
-cgen_shift(int op, Node *nl, Node *nr, Node *res)
+cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
{
Node n1, n2, nt, cx, oldcx, hi, lo;
int a, w;
@@ -651,7 +651,7 @@
gmove(&n2, &n1);
sc = mpgetfix(nr->val.u.xval);
if(sc >= nl->type->width*8) {
- // large shift gets 2 shifts by width
+ // large shift gets 2 shifts by width-1
gins(a, ncon(w-1), &n1);
gins(a, ncon(w-1), &n1);
} else
@@ -689,27 +689,37 @@
}
// test and fix up large shifts
- if(nr->type->width > 4) {
- // delayed reg alloc
- nodreg(&n1, types[TUINT32], D_CX);
- regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
- split64(&nt, &lo, &hi);
- gmove(&lo, &n1);
- gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
- p2 = gbranch(optoas(ONE, types[TUINT32]), T);
- gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
- p1 = gbranch(optoas(OLT, types[TUINT32]), T);
- patch(p2, pc);
+ if(bounded) {
+ if(nr->type->width > 4) {
+ // delayed reg alloc
+ nodreg(&n1, types[TUINT32], D_CX);
+ regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
+ split64(&nt, &lo, &hi);
+ gmove(&lo, &n1);
+ }
} else {
- gins(optoas(OCMP, nr->type), &n1, ncon(w));
- p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ if(nr->type->width > 4) {
+ // delayed reg alloc
+ nodreg(&n1, types[TUINT32], D_CX);
+ regalloc(&n1, types[TUINT32], &n1); // to hold the shift type in CX
+ split64(&nt, &lo, &hi);
+ gmove(&lo, &n1);
+ gins(optoas(OCMP, types[TUINT32]), &hi, ncon(0));
+ p2 = gbranch(optoas(ONE, types[TUINT32]), T);
+ gins(optoas(OCMP, types[TUINT32]), &n1, ncon(w));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ patch(p2, pc);
+ } else {
+ gins(optoas(OCMP, nr->type), &n1, ncon(w));
+ p1 = gbranch(optoas(OLT, types[TUINT32]), T);
+ }
+ if(op == ORSH && issigned[nl->type->etype]) {
+ gins(a, ncon(w-1), &n2);
+ } else {
+ gmove(ncon(0), &n2);
+ }
+ patch(p1, pc);
}
- if(op == ORSH && issigned[nl->type->etype]) {
- gins(a, ncon(w-1), &n2);
- } else {
- gmove(ncon(0), &n2);
- }
- patch(p1, pc);
gins(a, &n1, &n2);
if(oldcx.op != 0)