[dev.ssa] cmd/compile/internal/ssa: implement lots of small (<8byte) ops.
Lots and lots of ops!
Also XOR for good measure.
Add a pass to the compiler generator to check that all of the
architecture-specific opcodes are handled by genValue. We will
catch any missing ones if we come across them during compilation,
but probably better to catch them statically.
Change-Id: Ic4adfbec55c8257f88117bc732fa664486262868
Reviewed-on: https://go-review.googlesource.com/12813
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 4334dc7..7344d22 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -1628,10 +1628,12 @@
p.From.Index = regnum(v.Args[1])
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v)
+ // 2-address opcode arithmetic, symmetric
case ssa.OpAMD64ADDB,
ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL, ssa.OpAMD64ANDW, ssa.OpAMD64ANDB,
- ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64MULW,
- ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64ORW, ssa.OpAMD64ORB:
+ ssa.OpAMD64ORQ, ssa.OpAMD64ORL, ssa.OpAMD64ORW, ssa.OpAMD64ORB,
+ ssa.OpAMD64XORQ, ssa.OpAMD64XORL, ssa.OpAMD64XORW, ssa.OpAMD64XORB,
+ ssa.OpAMD64MULQ, ssa.OpAMD64MULL, ssa.OpAMD64MULW:
r := regnum(v)
x := regnum(v.Args[0])
y := regnum(v.Args[1])
@@ -1652,59 +1654,47 @@
} else {
p.From.Reg = x
}
- case ssa.OpAMD64ADDQconst:
- // TODO: use addq instead of leaq if target is in the right register.
- p := Prog(x86.ALEAQ)
- p.From.Type = obj.TYPE_MEM
- p.From.Reg = regnum(v.Args[0])
- p.From.Offset = v.AuxInt
- p.To.Type = obj.TYPE_REG
- p.To.Reg = regnum(v)
- case ssa.OpAMD64MULQconst:
+ // 2-address opcode arithmetic, not symmetric
+ case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL, ssa.OpAMD64SUBW, ssa.OpAMD64SUBB:
r := regnum(v)
x := regnum(v.Args[0])
- if r != x {
- p := Prog(x86.AMOVQ)
- p.From.Type = obj.TYPE_REG
- p.From.Reg = x
- p.To.Type = obj.TYPE_REG
- p.To.Reg = r
+ y := regnum(v.Args[1])
+ var neg bool
+ if y == r {
+ // compute -(y-x) instead
+ x, y = y, x
+ neg = true
}
- p := Prog(x86.AIMULQ)
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = v.AuxInt
- p.To.Type = obj.TYPE_REG
- p.To.Reg = r
- // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
- // instead of using the MOVQ above.
- //p.From3 = new(obj.Addr)
- //p.From3.Type = obj.TYPE_REG
- //p.From3.Reg = regnum(v.Args[0])
- case ssa.OpAMD64SUBQconst:
- // This code compensates for the fact that the register allocator
- // doesn't understand 2-address instructions yet. TODO: fix that.
- x := regnum(v.Args[0])
- r := regnum(v)
if x != r {
- p := Prog(x86.AMOVQ)
+ p := Prog(regMoveAMD64(v.Type.Size()))
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
p.To.Reg = r
}
- p := Prog(x86.ASUBQ)
- p.From.Type = obj.TYPE_CONST
- p.From.Offset = v.AuxInt
+
+ p := Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_REG
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpAMD64SHLQ, ssa.OpAMD64SHRQ, ssa.OpAMD64SARQ:
+ p.From.Reg = y
+ if neg {
+ p := Prog(x86.ANEGQ) // TODO: use correct size? This is mostly a hack until regalloc does 2-address correctly
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = r
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+ }
+ case ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL, ssa.OpAMD64SHLW, ssa.OpAMD64SHLB,
+ ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
+ ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB:
x := regnum(v.Args[0])
r := regnum(v)
if x != r {
if r == x86.REG_CX {
v.Fatalf("can't implement %s, target and shift both in CX", v.LongString())
}
- p := Prog(x86.AMOVQ)
+ p := Prog(regMoveAMD64(v.Type.Size()))
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
@@ -1715,11 +1705,57 @@
p.From.Reg = regnum(v.Args[1]) // should be CX
p.To.Type = obj.TYPE_REG
p.To.Reg = r
- case ssa.OpAMD64ANDQconst, ssa.OpAMD64SHLQconst, ssa.OpAMD64SHRQconst, ssa.OpAMD64SARQconst, ssa.OpAMD64XORQconst:
+ case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst, ssa.OpAMD64ADDWconst:
+ // TODO: use addq instead of leaq if target is in the right register.
+ var asm int
+ switch v.Op {
+ case ssa.OpAMD64ADDQconst:
+ asm = x86.ALEAQ
+ case ssa.OpAMD64ADDLconst:
+ asm = x86.ALEAL
+ case ssa.OpAMD64ADDWconst:
+ asm = x86.ALEAW
+ }
+ p := Prog(asm)
+ p.From.Type = obj.TYPE_MEM
+ p.From.Reg = regnum(v.Args[0])
+ p.From.Offset = v.AuxInt
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = regnum(v)
+ case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst:
+ r := regnum(v)
+ x := regnum(v.Args[0])
+ if r != x {
+ p := Prog(regMoveAMD64(v.Type.Size()))
+ p.From.Type = obj.TYPE_REG
+ p.From.Reg = x
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+ }
+ p := Prog(v.Op.Asm())
+ p.From.Type = obj.TYPE_CONST
+ p.From.Offset = v.AuxInt
+ p.To.Type = obj.TYPE_REG
+ p.To.Reg = r
+ // TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
+ // instead of using the MOVQ above.
+ //p.From3 = new(obj.Addr)
+ //p.From3.Type = obj.TYPE_REG
+ //p.From3.Reg = regnum(v.Args[0])
+ case ssa.OpAMD64ADDBconst,
+ ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst, ssa.OpAMD64ANDWconst, ssa.OpAMD64ANDBconst,
+ ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst, ssa.OpAMD64ORWconst, ssa.OpAMD64ORBconst,
+ ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst, ssa.OpAMD64XORWconst, ssa.OpAMD64XORBconst,
+ ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst, ssa.OpAMD64SUBWconst, ssa.OpAMD64SUBBconst,
+ ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst, ssa.OpAMD64SHLWconst, ssa.OpAMD64SHLBconst,
+ ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
+ ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst:
+ // This code compensates for the fact that the register allocator
+ // doesn't understand 2-address instructions yet. TODO: fix that.
x := regnum(v.Args[0])
r := regnum(v)
if x != r {
- p := Prog(x86.AMOVQ)
+ p := Prog(regMoveAMD64(v.Type.Size()))
p.From.Type = obj.TYPE_REG
p.From.Reg = x
p.To.Type = obj.TYPE_REG
@@ -1732,7 +1768,7 @@
p.To.Reg = r
case ssa.OpAMD64SBBQcarrymask:
r := regnum(v)
- p := Prog(x86.ASBBQ)
+ p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r
p.To.Type = obj.TYPE_REG
@@ -1785,14 +1821,16 @@
addAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v)
- case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB, ssa.OpAMD64TESTB, ssa.OpAMD64TESTQ:
+ case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
+ ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB:
p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v.Args[1])
- case ssa.OpAMD64CMPQconst:
- p := Prog(x86.ACMPQ)
+ case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst,
+ ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
+ p := Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = regnum(v.Args[0])
p.To.Type = obj.TYPE_CONST
@@ -1943,6 +1981,16 @@
p := Prog(v.Op.Asm())
p.To.Type = obj.TYPE_REG
p.To.Reg = regnum(v)
+ case ssa.OpAMD64InvertFlags:
+ v.Fatalf("InvertFlags should never make it to codegen %v", v)
+ case ssa.OpAMD64REPSTOSQ:
+ Prog(x86.AREP)
+ Prog(x86.ASTOSQ)
+ v.Unimplementedf("REPSTOSQ clobbers not implemented: %s", v.LongString())
+ case ssa.OpAMD64REPMOVSB:
+ Prog(x86.AREP)
+ Prog(x86.AMOVSB)
+ v.Unimplementedf("REPMOVSB clobbers not implemented: %s", v.LongString())
default:
v.Unimplementedf("genValue not implemented: %s", v.LongString())
}