cmd/compile: improve bit test code
Some bit test instruction generation stopped triggering after
the change to addressing modes. I suspect this was just because
ANDQload was being generated before the rewrite rules could discover
the BTQ. Fix that by decomposing the ANDQload when it is surrounded
by a TESTQ (thus re-enabling the BTQ rules).
Fixes #44228
Change-Id: I489b4a5a7eb01c65fc8db0753f8cec54097cadb2
Reviewed-on: https://go-review.googlesource.com/c/go/+/291749
Trust: Keith Randall <khr@golang.org>
Trust: Josh Bleecher Snyder <josharian@gmail.com>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Josh Bleecher Snyder <josharian@gmail.com>
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index acd2170..01a8a16 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -2191,6 +2191,11 @@
&& clobber(l) =>
@l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff64(0, int64(off))] ptr mem)
+// Convert ANDload to MOVload when we can do the AND in a containing TEST op.
+// Only do when it's within the same block, so we don't have flags live across basic block boundaries.
+// See issue 44228.
+(TEST(Q|L) a:(AND(Q|L)load [off] {sym} x ptr mem) a) && a.Uses == 2 && a.Block == v.Block && clobber(a) => (TEST(Q|L) (MOV(Q|L)load <a.Type> [off] {sym} ptr mem) x)
+
(MOVBload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read8(sym, int64(off)))])
(MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVLconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
(MOVLload [off] {sym} (SB) _) && symIsRO(sym) => (MOVQconst [int64(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 4074d37..5fb6c30 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -27141,6 +27141,33 @@
}
break
}
+ // match: (TESTL a:(ANDLload [off] {sym} x ptr mem) a)
+ // cond: a.Uses == 2 && a.Block == v.Block && clobber(a)
+ // result: (TESTL (MOVLload <a.Type> [off] {sym} ptr mem) x)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ a := v_0
+ if a.Op != OpAMD64ANDLload {
+ continue
+ }
+ off := auxIntToInt32(a.AuxInt)
+ sym := auxToSym(a.Aux)
+ mem := a.Args[2]
+ x := a.Args[0]
+ ptr := a.Args[1]
+ if a != v_1 || !(a.Uses == 2 && a.Block == v.Block && clobber(a)) {
+ continue
+ }
+ v.reset(OpAMD64TESTL)
+ v0 := b.NewValue0(a.Pos, OpAMD64MOVLload, a.Type)
+ v0.AuxInt = int32ToAuxInt(off)
+ v0.Aux = symToAux(sym)
+ v0.AddArg2(ptr, mem)
+ v.AddArg2(v0, x)
+ return true
+ }
+ break
+ }
return false
}
func rewriteValueAMD64_OpAMD64TESTLconst(v *Value) bool {
@@ -27246,6 +27273,33 @@
}
break
}
+ // match: (TESTQ a:(ANDQload [off] {sym} x ptr mem) a)
+ // cond: a.Uses == 2 && a.Block == v.Block && clobber(a)
+ // result: (TESTQ (MOVQload <a.Type> [off] {sym} ptr mem) x)
+ for {
+ for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+ a := v_0
+ if a.Op != OpAMD64ANDQload {
+ continue
+ }
+ off := auxIntToInt32(a.AuxInt)
+ sym := auxToSym(a.Aux)
+ mem := a.Args[2]
+ x := a.Args[0]
+ ptr := a.Args[1]
+ if a != v_1 || !(a.Uses == 2 && a.Block == v.Block && clobber(a)) {
+ continue
+ }
+ v.reset(OpAMD64TESTQ)
+ v0 := b.NewValue0(a.Pos, OpAMD64MOVQload, a.Type)
+ v0.AuxInt = int32ToAuxInt(off)
+ v0.Aux = symToAux(sym)
+ v0.AddArg2(ptr, mem)
+ v.AddArg2(v0, x)
+ return true
+ }
+ break
+ }
return false
}
func rewriteValueAMD64_OpAMD64TESTQconst(v *Value) bool {
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index 4508eba..806dad1 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -352,3 +352,12 @@
// s390x:"RISBGZ\t[$]48, [$]15, [$]0,"
return x & 0xffff00000000ffff
}
+
+func issue44228a(a []int64, i int) bool {
+ // amd64: "BTQ", -"SHL"
+ return a[i>>6]&(1<<(i&63)) != 0
+}
+func issue44228b(a []int32, i int) bool {
+ // amd64: "BTL", -"SHL"
+ return a[i>>5]&(1<<(i&31)) != 0
+}