cmd/compile: add rules to eliminate unnecessary signed shifts

This change to the rules removes some unnecessary signed shifts
that appear in the math/rand functions. Existing rules did not
cover some of the signed cases.

A little improvement seen in math/rand due to removing 1 of 2
instructions generated for Int31n, which is inlined quite a bit.

Intn1000                 46.9ns ± 0%  45.5ns ± 0%   -2.99%  (p=1.000 n=1+1)
Int63n1000               33.5ns ± 0%  32.8ns ± 0%   -2.09%  (p=1.000 n=1+1)
Int31n1000               32.7ns ± 0%  32.6ns ± 0%   -0.31%  (p=1.000 n=1+1)
Float32                  32.7ns ± 0%  30.3ns ± 0%   -7.34%  (p=1.000 n=1+1)
Float64                  21.7ns ± 0%  20.9ns ± 0%   -3.69%  (p=1.000 n=1+1)
Perm3                     205ns ± 0%   202ns ± 0%   -1.46%  (p=1.000 n=1+1)
Perm30                   1.71µs ± 0%  1.68µs ± 0%   -1.35%  (p=1.000 n=1+1)
Perm30ViaShuffle         1.65µs ± 0%  1.65µs ± 0%   -0.30%  (p=1.000 n=1+1)
ShuffleOverhead          2.83µs ± 0%  2.83µs ± 0%   -0.07%  (p=1.000 n=1+1)
Read3                    18.7ns ± 0%  16.1ns ± 0%  -13.90%  (p=1.000 n=1+1)
Read64                    126ns ± 0%   124ns ± 0%   -1.59%  (p=1.000 n=1+1)
Read1000                 1.75µs ± 0%  1.63µs ± 0%   -7.08%  (p=1.000 n=1+1)

Change-Id: I11502dfca7d65aafc76749a8d713e9e50c24a858
Reviewed-on: https://go-review.googlesource.com/c/go/+/225917
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 740f9fb..be7a985 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -704,19 +704,24 @@
 (MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x)
 (MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x)
 (MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x)
+(MOVBreg (SRADconst [c] x)) && c>=56 -> (SRADconst [c] x)
 (MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x)
 (MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x)
 (MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x)
+(MOVBreg (SRAWconst [c] x)) && c>=24 -> (SRAWconst [c] x)
 
 (MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x)
 (MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x)
 (MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x)
+(MOVHreg (SRADconst [c] x)) && c>=48 -> (SRADconst [c] x)
 (MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x)
 (MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x)
+(MOVHreg (SRAWconst [c] x)) && c>=16 -> (SRAWconst [c] x)
 (MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x)
 
 (MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x)
 (MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x)
+(MOVWreg (SRADconst [c] x)) && c>=32 -> (SRADconst [c] x)
 (MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x)
 
 // Various redundant zero/sign extension combinations.
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 695445a..d5568b6 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -6427,6 +6427,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBreg (SRADconst [c] x))
+	// cond: c>=56
+	// result: (SRADconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRADconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 56) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBreg (SRWconst [c] x))
 	// cond: c>24
 	// result: (SRWconst [c] x)
@@ -6461,6 +6478,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBreg (SRAWconst [c] x))
+	// cond: c>=24
+	// result: (SRAWconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 24) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBreg y:(MOVBreg _))
 	// result: y
 	for {
@@ -8487,6 +8521,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHreg (SRADconst [c] x))
+	// cond: c>=48
+	// result: (SRADconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRADconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 48) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg (SRWconst [c] x))
 	// cond: c>16
 	// result: (SRWconst [c] x)
@@ -8504,6 +8555,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHreg (SRAWconst [c] x))
+	// cond: c>=16
+	// result: (SRAWconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 16) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg (SRWconst [c] x))
 	// cond: c==16
 	// result: (SRAWconst [c] x)
@@ -9648,6 +9716,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWreg (SRADconst [c] x))
+	// cond: c>=32
+	// result: (SRADconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRADconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 32) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVWreg (SRDconst [c] x))
 	// cond: c==32
 	// result: (SRADconst [c] x)
diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index f287ca6..305c39a 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go
@@ -125,3 +125,26 @@
 	}
 	panic("shift too large")
 }
+
+func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
+
+	// ppc64le:-".*MOVW"
+	f := int32(v>>32)
+	// ppc64le:".*MOVW"
+	f += int32(v>>31)
+	// ppc64le:-".*MOVH"
+	g := int16(v>>48)
+	// ppc64le:".*MOVH"
+	g += int16(v>>30)
+	// ppc64le:-".*MOVH"
+	g += int16(f>>16)
+	// ppc64le:-".*MOVB"
+	h := int8(v>>56)
+	// ppc64le:".*MOVB"
+	h += int8(v>>28)
+	// ppc64le:-".*MOVB"
+	h += int8(f>>24)
+	// ppc64le:".*MOVB"
+	h += int8(f>>16)
+	return int64(h),uint64(g)
+}