cmd/compile: fold LEAQ with constant scale into LEA
Discovered this after rebasing CL196679 (use poset bounds in prove).
Some tests fail with that CL applied:
codegen/smallintiface.go:11: linux/amd64/: opcode not found: "^LEAQ\\truntime.staticuint64s\\+8\\(SB\\)"
codegen/smallintiface.go:16: linux/amd64/: opcode not found: "^LEAQ\\truntime.staticuint64s\\+2024\\(SB\\)"
codegen/smallintiface.go:21: linux/amd64/: opcode not found: "^LEAQ\\truntime.staticuint64s\\+24\\(SB\\)"
The only difference in prove SSA dumps is that a single Lsh64x64
op with constant shift (<< 3) is marked as bounded. This triggers
a different rule matching sequence in lower, which manages to generate
worse code for the above testcases.
This CL fixes the above test after CL196679 is applied. Right now,
these rules never trigger (this CL passes toolstash -cmp), so I can't
write a test.
Change-Id: I353f1c79c1875cac1da82cd8afa1e05e42684f1c
Reviewed-on: https://go-review.googlesource.com/c/go/+/224877
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index c6fad48..306847d 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -1202,6 +1202,14 @@
(LEAQ8 [off1+4*off2] {sym1} x y)
// TODO: more?
+// Lower LEAQ2/4/8 when the offset is a constant
+(LEAQ2 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(off+scale*2) ->
+ (LEAQ [off+scale*2] {sym} x)
+(LEAQ4 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(off+scale*4) ->
+ (LEAQ [off+scale*4] {sym} x)
+(LEAQ8 [off] {sym} x (MOV(Q|L)const [scale])) && is32Bit(off+scale*8) ->
+ (LEAQ [off+scale*8] {sym} x)
+
// Absorb InvertFlags into branches.
(LT (InvertFlags cmp) yes no) -> (GT cmp yes no)
(GT (InvertFlags cmp) yes no) -> (LT cmp yes no)
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index e178c12..b9a401c 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -9486,6 +9486,46 @@
v.AddArg2(x, y)
return true
}
+ // match: (LEAQ2 [off] {sym} x (MOVQconst [scale]))
+ // cond: is32Bit(off+scale*2)
+ // result: (LEAQ [off+scale*2] {sym} x)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ scale := v_1.AuxInt
+ if !(is32Bit(off + scale*2)) {
+ break
+ }
+ v.reset(OpAMD64LEAQ)
+ v.AuxInt = off + scale*2
+ v.Aux = sym
+ v.AddArg(x)
+ return true
+ }
+ // match: (LEAQ2 [off] {sym} x (MOVLconst [scale]))
+ // cond: is32Bit(off+scale*2)
+ // result: (LEAQ [off+scale*2] {sym} x)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ x := v_0
+ if v_1.Op != OpAMD64MOVLconst {
+ break
+ }
+ scale := v_1.AuxInt
+ if !(is32Bit(off + scale*2)) {
+ break
+ }
+ v.reset(OpAMD64LEAQ)
+ v.AuxInt = off + scale*2
+ v.Aux = sym
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64LEAQ4(v *Value) bool {
@@ -9593,6 +9633,46 @@
v.AddArg2(x, y)
return true
}
+ // match: (LEAQ4 [off] {sym} x (MOVQconst [scale]))
+ // cond: is32Bit(off+scale*4)
+ // result: (LEAQ [off+scale*4] {sym} x)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ scale := v_1.AuxInt
+ if !(is32Bit(off + scale*4)) {
+ break
+ }
+ v.reset(OpAMD64LEAQ)
+ v.AuxInt = off + scale*4
+ v.Aux = sym
+ v.AddArg(x)
+ return true
+ }
+ // match: (LEAQ4 [off] {sym} x (MOVLconst [scale]))
+ // cond: is32Bit(off+scale*4)
+ // result: (LEAQ [off+scale*4] {sym} x)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ x := v_0
+ if v_1.Op != OpAMD64MOVLconst {
+ break
+ }
+ scale := v_1.AuxInt
+ if !(is32Bit(off + scale*4)) {
+ break
+ }
+ v.reset(OpAMD64LEAQ)
+ v.AuxInt = off + scale*4
+ v.Aux = sym
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64LEAQ8(v *Value) bool {
@@ -9662,6 +9742,46 @@
v.AddArg2(x, y)
return true
}
+ // match: (LEAQ8 [off] {sym} x (MOVQconst [scale]))
+ // cond: is32Bit(off+scale*8)
+ // result: (LEAQ [off+scale*8] {sym} x)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ x := v_0
+ if v_1.Op != OpAMD64MOVQconst {
+ break
+ }
+ scale := v_1.AuxInt
+ if !(is32Bit(off + scale*8)) {
+ break
+ }
+ v.reset(OpAMD64LEAQ)
+ v.AuxInt = off + scale*8
+ v.Aux = sym
+ v.AddArg(x)
+ return true
+ }
+ // match: (LEAQ8 [off] {sym} x (MOVLconst [scale]))
+ // cond: is32Bit(off+scale*8)
+ // result: (LEAQ [off+scale*8] {sym} x)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ x := v_0
+ if v_1.Op != OpAMD64MOVLconst {
+ break
+ }
+ scale := v_1.AuxInt
+ if !(is32Bit(off + scale*8)) {
+ break
+ }
+ v.reset(OpAMD64LEAQ)
+ v.AuxInt = off + scale*8
+ v.Aux = sym
+ v.AddArg(x)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64MOVBQSX(v *Value) bool {