cmd/compile: add rules to eliminate unnecessary signed shifts
This change to the rules removes some unnecessary signed shifts
that appear in the math/rand functions. Existing rules did not
cover some of the signed cases.
A little improvement seen in math/rand due to removing 1 of 2
instructions generated for Int31n, which is inlined quite a bit.
Intn1000 46.9ns ± 0% 45.5ns ± 0% -2.99% (p=1.000 n=1+1)
Int63n1000 33.5ns ± 0% 32.8ns ± 0% -2.09% (p=1.000 n=1+1)
Int31n1000 32.7ns ± 0% 32.6ns ± 0% -0.31% (p=1.000 n=1+1)
Float32 32.7ns ± 0% 30.3ns ± 0% -7.34% (p=1.000 n=1+1)
Float64 21.7ns ± 0% 20.9ns ± 0% -3.69% (p=1.000 n=1+1)
Perm3 205ns ± 0% 202ns ± 0% -1.46% (p=1.000 n=1+1)
Perm30 1.71µs ± 0% 1.68µs ± 0% -1.35% (p=1.000 n=1+1)
Perm30ViaShuffle 1.65µs ± 0% 1.65µs ± 0% -0.30% (p=1.000 n=1+1)
ShuffleOverhead 2.83µs ± 0% 2.83µs ± 0% -0.07% (p=1.000 n=1+1)
Read3 18.7ns ± 0% 16.1ns ± 0% -13.90% (p=1.000 n=1+1)
Read64 126ns ± 0% 124ns ± 0% -1.59% (p=1.000 n=1+1)
Read1000 1.75µs ± 0% 1.63µs ± 0% -7.08% (p=1.000 n=1+1)
Change-Id: I11502dfca7d65aafc76749a8d713e9e50c24a858
Reviewed-on: https://go-review.googlesource.com/c/go/+/225917
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 740f9fb..be7a985 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -704,19 +704,24 @@
(MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x)
(MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x)
(MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x)
+(MOVBreg (SRADconst [c] x)) && c>=56 -> (SRADconst [c] x)
(MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x)
(MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x)
(MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x)
+(MOVBreg (SRAWconst [c] x)) && c>=24 -> (SRAWconst [c] x)
(MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x)
(MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x)
(MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x)
+(MOVHreg (SRADconst [c] x)) && c>=48 -> (SRADconst [c] x)
(MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x)
(MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x)
+(MOVHreg (SRAWconst [c] x)) && c>=16 -> (SRAWconst [c] x)
(MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x)
(MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x)
(MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x)
+(MOVWreg (SRADconst [c] x)) && c>=32 -> (SRADconst [c] x)
(MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x)
// Various redundant zero/sign extension combinations.
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 695445a..d5568b6 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -6427,6 +6427,23 @@
v.AddArg(x)
return true
}
+ // match: (MOVBreg (SRADconst [c] x))
+ // cond: c>=56
+ // result: (SRADconst [c] x)
+ for {
+ if v_0.Op != OpPPC64SRADconst {
+ break
+ }
+ c := v_0.AuxInt
+ x := v_0.Args[0]
+ if !(c >= 56) {
+ break
+ }
+ v.reset(OpPPC64SRADconst)
+ v.AuxInt = c
+ v.AddArg(x)
+ return true
+ }
// match: (MOVBreg (SRWconst [c] x))
// cond: c>24
// result: (SRWconst [c] x)
@@ -6461,6 +6478,23 @@
v.AddArg(x)
return true
}
+ // match: (MOVBreg (SRAWconst [c] x))
+ // cond: c>=24
+ // result: (SRAWconst [c] x)
+ for {
+ if v_0.Op != OpPPC64SRAWconst {
+ break
+ }
+ c := v_0.AuxInt
+ x := v_0.Args[0]
+ if !(c >= 24) {
+ break
+ }
+ v.reset(OpPPC64SRAWconst)
+ v.AuxInt = c
+ v.AddArg(x)
+ return true
+ }
// match: (MOVBreg y:(MOVBreg _))
// result: y
for {
@@ -8487,6 +8521,23 @@
v.AddArg(x)
return true
}
+ // match: (MOVHreg (SRADconst [c] x))
+ // cond: c>=48
+ // result: (SRADconst [c] x)
+ for {
+ if v_0.Op != OpPPC64SRADconst {
+ break
+ }
+ c := v_0.AuxInt
+ x := v_0.Args[0]
+ if !(c >= 48) {
+ break
+ }
+ v.reset(OpPPC64SRADconst)
+ v.AuxInt = c
+ v.AddArg(x)
+ return true
+ }
// match: (MOVHreg (SRWconst [c] x))
// cond: c>16
// result: (SRWconst [c] x)
@@ -8504,6 +8555,23 @@
v.AddArg(x)
return true
}
+ // match: (MOVHreg (SRAWconst [c] x))
+ // cond: c>=16
+ // result: (SRAWconst [c] x)
+ for {
+ if v_0.Op != OpPPC64SRAWconst {
+ break
+ }
+ c := v_0.AuxInt
+ x := v_0.Args[0]
+ if !(c >= 16) {
+ break
+ }
+ v.reset(OpPPC64SRAWconst)
+ v.AuxInt = c
+ v.AddArg(x)
+ return true
+ }
// match: (MOVHreg (SRWconst [c] x))
// cond: c==16
// result: (SRAWconst [c] x)
@@ -9648,6 +9716,23 @@
v.AddArg(x)
return true
}
+ // match: (MOVWreg (SRADconst [c] x))
+ // cond: c>=32
+ // result: (SRADconst [c] x)
+ for {
+ if v_0.Op != OpPPC64SRADconst {
+ break
+ }
+ c := v_0.AuxInt
+ x := v_0.Args[0]
+ if !(c >= 32) {
+ break
+ }
+ v.reset(OpPPC64SRADconst)
+ v.AuxInt = c
+ v.AddArg(x)
+ return true
+ }
// match: (MOVWreg (SRDconst [c] x))
// cond: c==32
// result: (SRADconst [c] x)
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index f287ca6..305c39a 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -125,3 +125,26 @@
}
panic("shift too large")
}
+
+func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
+
+ // ppc64le:-".*MOVW"
+ f := int32(v>>32)
+ // ppc64le:".*MOVW"
+ f += int32(v>>31)
+ // ppc64le:-".*MOVH"
+ g := int16(v>>48)
+ // ppc64le:".*MOVH"
+ g += int16(v>>30)
+ // ppc64le:-".*MOVH"
+ g += int16(f>>16)
+ // ppc64le:-".*MOVB"
+ h := int8(v>>56)
+ // ppc64le:".*MOVB"
+ h += int8(v>>28)
+ // ppc64le:-".*MOVB"
+ h += int8(f>>24)
+ // ppc64le:".*MOVB"
+ h += int8(f>>16)
+ return int64(h),uint64(g)
+}