cmd/compile: add rules to eliminate unnecessary signed shifts This change to the rules removes some unnecessary signed shifts that appear in the math/rand functions. Existing rules did not cover some of the signed cases. A little improvement seen in math/rand due to removing 1 of 2 instructions generated for Int31n, which is inlined quite a bit. Intn1000 46.9ns ± 0% 45.5ns ± 0% -2.99% (p=1.000 n=1+1) Int63n1000 33.5ns ± 0% 32.8ns ± 0% -2.09% (p=1.000 n=1+1) Int31n1000 32.7ns ± 0% 32.6ns ± 0% -0.31% (p=1.000 n=1+1) Float32 32.7ns ± 0% 30.3ns ± 0% -7.34% (p=1.000 n=1+1) Float64 21.7ns ± 0% 20.9ns ± 0% -3.69% (p=1.000 n=1+1) Perm3 205ns ± 0% 202ns ± 0% -1.46% (p=1.000 n=1+1) Perm30 1.71µs ± 0% 1.68µs ± 0% -1.35% (p=1.000 n=1+1) Perm30ViaShuffle 1.65µs ± 0% 1.65µs ± 0% -0.30% (p=1.000 n=1+1) ShuffleOverhead 2.83µs ± 0% 2.83µs ± 0% -0.07% (p=1.000 n=1+1) Read3 18.7ns ± 0% 16.1ns ± 0% -13.90% (p=1.000 n=1+1) Read64 126ns ± 0% 124ns ± 0% -1.59% (p=1.000 n=1+1) Read1000 1.75µs ± 0% 1.63µs ± 0% -7.08% (p=1.000 n=1+1) Change-Id: I11502dfca7d65aafc76749a8d713e9e50c24a858 Reviewed-on: https://go-review.googlesource.com/c/go/+/225917 Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Cherry Zhang <cherryyz@google.com>

commit: e4a1cf8a5698d7351af0e33d61e4f7078f3ab1ce [log] [tgz]
author: Lynn Boger <laboger@linux.vnet.ibm.com> Thu Mar 26 16:01:40 2020 -0400
committer: Lynn Boger <laboger@linux.vnet.ibm.com> Fri Mar 27 16:05:42 2020 +0000
tree: 4cb51369993067d67eac1850ba373efce872bb96
parent: 3840aced142aeeca6ddf54cb90c07b54e4cf814b [diff]
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 740f9fb..be7a985 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules

@@ -704,19 +704,24 @@
 (MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x)
 (MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x)
 (MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x)
+(MOVBreg (SRADconst [c] x)) && c>=56 -> (SRADconst [c] x)
 (MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x)
 (MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x)
 (MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x)
+(MOVBreg (SRAWconst [c] x)) && c>=24 -> (SRAWconst [c] x)
 
 (MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x)
 (MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x)
 (MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x)
+(MOVHreg (SRADconst [c] x)) && c>=48 -> (SRADconst [c] x)
 (MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x)
 (MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x)
+(MOVHreg (SRAWconst [c] x)) && c>=16 -> (SRAWconst [c] x)
 (MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x)
 
 (MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x)
 (MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x)
+(MOVWreg (SRADconst [c] x)) && c>=32 -> (SRADconst [c] x)
 (MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x)
 
 // Various redundant zero/sign extension combinations.

diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 695445a..d5568b6 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go

@@ -6427,6 +6427,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBreg (SRADconst [c] x))
+	// cond: c>=56
+	// result: (SRADconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRADconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 56) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBreg (SRWconst [c] x))
 	// cond: c>24
 	// result: (SRWconst [c] x)
@@ -6461,6 +6478,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBreg (SRAWconst [c] x))
+	// cond: c>=24
+	// result: (SRAWconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 24) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBreg y:(MOVBreg _))
 	// result: y
 	for {
@@ -8487,6 +8521,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHreg (SRADconst [c] x))
+	// cond: c>=48
+	// result: (SRADconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRADconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 48) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg (SRWconst [c] x))
 	// cond: c>16
 	// result: (SRWconst [c] x)
@@ -8504,6 +8555,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHreg (SRAWconst [c] x))
+	// cond: c>=16
+	// result: (SRAWconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 16) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg (SRWconst [c] x))
 	// cond: c==16
 	// result: (SRAWconst [c] x)
@@ -9648,6 +9716,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWreg (SRADconst [c] x))
+	// cond: c>=32
+	// result: (SRADconst [c] x)
+	for {
+		if v_0.Op != OpPPC64SRADconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 32) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVWreg (SRDconst [c] x))
 	// cond: c==32
 	// result: (SRADconst [c] x)

diff --git a/test/codegen/shift.go b/test/codegen/shift.go
index f287ca6..305c39a 100644
--- a/test/codegen/shift.go
+++ b/test/codegen/shift.go

@@ -125,3 +125,26 @@
 	}
 	panic("shift too large")
 }
+
+func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
+
+	// ppc64le:-".*MOVW"
+	f := int32(v>>32)
+	// ppc64le:".*MOVW"
+	f += int32(v>>31)
+	// ppc64le:-".*MOVH"
+	g := int16(v>>48)
+	// ppc64le:".*MOVH"
+	g += int16(v>>30)
+	// ppc64le:-".*MOVH"
+	g += int16(f>>16)
+	// ppc64le:-".*MOVB"
+	h := int8(v>>56)
+	// ppc64le:".*MOVB"
+	h += int8(v>>28)
+	// ppc64le:-".*MOVB"
+	h += int8(f>>24)
+	// ppc64le:".*MOVB"
+	h += int8(f>>16)
+	return int64(h),uint64(g)
+}
commit	e4a1cf8a5698d7351af0e33d61e4f7078f3ab1ce	[log] [tgz]
author	Lynn Boger <laboger@linux.vnet.ibm.com>	Thu Mar 26 16:01:40 2020 -0400
committer	Lynn Boger <laboger@linux.vnet.ibm.com>	Fri Mar 27 16:05:42 2020 +0000
tree	4cb51369993067d67eac1850ba373efce872bb96
parent	3840aced142aeeca6ddf54cb90c07b54e4cf814b [diff]