cmd/compile: improve rules for PPC64.rules

This adds some improvements to the rules for PPC64 to eliminate
unnecessary zero or sign extends, and fix some rule for truncates
which were not always using the correct sign instruction.

This reduces of size of many functions by 1 or 2 instructions and
can improve performance in cases where the execution time depends
on small loops where at least 1 instruction was removed and where that
loop contributes a significant amount of the total execution time.

Included is a testcase for codegen to verify the sign/zero extend
instructions are omitted.

An example of the improvement (strings):
IndexAnyASCII/256:1-16     392ns ± 0%   369ns ± 0%  -5.79%  (p=0.000 n=1+10)
IndexAnyASCII/256:2-16     397ns ± 0%   376ns ± 0%  -5.23%  (p=0.000 n=1+9)
IndexAnyASCII/256:4-16     405ns ± 0%   384ns ± 0%  -5.19%  (p=1.714 n=1+6)
IndexAnyASCII/256:8-16     427ns ± 0%   403ns ± 0%  -5.57%  (p=0.000 n=1+10)
IndexAnyASCII/256:16-16    441ns ± 0%   418ns ± 1%  -5.33%  (p=0.000 n=1+10)
IndexAnyASCII/4096:1-16   5.62µs ± 0%  5.27µs ± 1%  -6.31%  (p=0.000 n=1+10)
IndexAnyASCII/4096:2-16   5.67µs ± 0%  5.29µs ± 0%  -6.67%  (p=0.222 n=1+8)
IndexAnyASCII/4096:4-16   5.66µs ± 0%  5.28µs ± 1%  -6.66%  (p=0.000 n=1+10)
IndexAnyASCII/4096:8-16   5.66µs ± 0%  5.31µs ± 1%  -6.10%  (p=0.000 n=1+10)
IndexAnyASCII/4096:16-16  5.70µs ± 0%  5.33µs ± 1%  -6.43%  (p=0.182 n=1+10)

Change-Id: I739a6132b505936d39001aada5a978ff2a5f0500
Reviewed-on: https://go-review.googlesource.com/129875
Reviewed-by: David Chase <drchase@google.com>
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 6ef8c7b..2e06fcd 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -660,14 +660,51 @@
 (MOVWreg y:(AND (MOVDconst [c]) _)) && uint64(c) <= 0x7FFFFFFF -> y
 
 // small and of zero-extend -> either zero-extend or small and
-  // degenerate-and
 (ANDconst [c] y:(MOVBZreg _)) && c&0xFF == 0xFF -> y
+(ANDconst [0xFF] y:(MOVBreg _)) -> y
 (ANDconst [c] y:(MOVHZreg _))  && c&0xFFFF == 0xFFFF -> y
-(ANDconst [c] y:(MOVWZreg _))  && c&0xFFFFFFFF == 0xFFFFFFFF -> y
-  // normal case
-(ANDconst [c] (MOVBZreg x)) -> (ANDconst [c&0xFF] x)
-(ANDconst [c] (MOVHZreg x)) -> (ANDconst [c&0xFFFF] x)
-(ANDconst [c] (MOVWZreg x)) -> (ANDconst [c&0xFFFFFFFF] x)
+(ANDconst [0xFFFF] y:(MOVHreg _)) -> y
+
+(AND (MOVDconst [c]) y:(MOVWZreg _))  && c&0xFFFFFFFF == 0xFFFFFFFF -> y
+(AND (MOVDconst [0xFFFFFFFF]) y:(MOVWreg x)) -> (MOVWZreg x)
+// normal case
+(ANDconst [c] (MOV(B|BZ)reg x)) -> (ANDconst [c&0xFF] x)
+(ANDconst [c] (MOV(H|HZ)reg x)) -> (ANDconst [c&0xFFFF] x)
+(ANDconst [c] (MOV(W|WZ)reg x)) -> (ANDconst [c&0xFFFFFFFF] x)
+
+// Eliminate unnecessary sign/zero extend following right shift
+(MOV(B|H|W)Zreg (SRWconst [c] (MOVBZreg x))) -> (SRWconst [c] (MOVBZreg x))
+(MOV(H|W)Zreg (SRWconst [c] (MOVHZreg x))) -> (SRWconst [c] (MOVHZreg x))
+(MOVWZreg (SRWconst [c] (MOVWZreg x))) -> (SRWconst [c] (MOVWZreg x))
+(MOV(B|H|W)reg (SRAWconst [c] (MOVBreg x))) -> (SRAWconst [c] (MOVBreg x))
+(MOV(H|W)reg (SRAWconst [c] (MOVHreg x))) -> (SRAWconst [c] (MOVHreg x))
+(MOVWreg (SRAWconst [c] (MOVWreg x))) -> (SRAWconst [c] (MOVWreg x))
+
+(MOVWZreg (SRWconst [c] x)) && sizeof(x.Type) <= 32 -> (SRWconst [c] x)
+(MOVHZreg (SRWconst [c] x)) && sizeof(x.Type) <= 16 -> (SRWconst [c] x)
+(MOVBZreg (SRWconst [c] x)) && sizeof(x.Type) == 8 -> (SRWconst [c] x)
+(MOVWreg (SRAWconst [c] x)) && sizeof(x.Type) <= 32 -> (SRAWconst [c] x)
+(MOVHreg (SRAWconst [c] x)) && sizeof(x.Type) <= 16 -> (SRAWconst [c] x)
+(MOVBreg (SRAWconst [c] x)) && sizeof(x.Type) == 8 -> (SRAWconst [c] x)
+
+// initial right shift will handle sign/zero extend
+(MOVBZreg (SRDconst [c] x)) && c>=56 -> (SRDconst [c] x)
+(MOVBreg (SRDconst [c] x)) && c>56 -> (SRDconst [c] x)
+(MOVBreg (SRDconst [c] x)) && c==56 -> (SRADconst [c] x)
+(MOVBZreg (SRWconst [c] x)) && c>=24 -> (SRWconst [c] x)
+(MOVBreg (SRWconst [c] x)) && c>24 -> (SRWconst [c] x)
+(MOVBreg (SRWconst [c] x)) && c==24 -> (SRAWconst [c] x)
+
+(MOVHZreg (SRDconst [c] x)) && c>=48 -> (SRDconst [c] x)
+(MOVHreg (SRDconst [c] x)) && c>48 -> (SRDconst [c] x)
+(MOVHreg (SRDconst [c] x)) && c==48 -> (SRADconst [c] x)
+(MOVHZreg (SRWconst [c] x)) && c>=16 -> (SRWconst [c] x)
+(MOVHreg (SRWconst [c] x)) && c>16 -> (SRWconst [c] x)
+(MOVHreg (SRWconst [c] x)) && c==16 -> (SRAWconst [c] x)
+
+(MOVWZreg (SRDconst [c] x)) && c>=32 -> (SRDconst [c] x)
+(MOVWreg (SRDconst [c] x)) && c>32 -> (SRDconst [c] x)
+(MOVWreg (SRDconst [c] x)) && c==32 -> (SRADconst [c] x)
 
 // Various redundant zero/sign extension combinations.
 (MOVBZreg y:(MOVBZreg _)) -> y  // repeat
@@ -851,22 +888,38 @@
 (ZeroExt16to(32|64) x) -> (MOVHZreg x)
 (ZeroExt32to64 x) -> (MOVWZreg x)
 
-(Trunc(16|32|64)to8  x) -> (MOVBreg x)
-(Trunc(32|64)to16 x) -> (MOVHreg x)
-(Trunc64to32 x) -> (MOVWreg x)
+(Trunc(16|32|64)to8  x) && isSigned(x.Type) -> (MOVBreg x)
+(Trunc(16|32|64)to8  x) -> (MOVBZreg x)
+(Trunc(32|64)to16 x) && isSigned(x.Type) -> (MOVHreg x)
+(Trunc(32|64)to16 x) -> (MOVHZreg x)
+(Trunc64to32 x) && isSigned(x.Type) -> (MOVWreg x)
+(Trunc64to32 x) -> (MOVWZreg x)
 
 (Slicemask <t> x) -> (SRADconst (NEG <t> x) [63])
 
 // Note that MOV??reg returns a 64-bit int, x is not necessarily that wide
 // This may interact with other patterns in the future. (Compare with arm64)
-(MOVBZreg x:(MOVBZload _ _))  -> x
-(MOVHZreg x:(MOVHZload _ _))  -> x
-(MOVHreg x:(MOVHload _ _))  -> x
+(MOV(B|H|W)Zreg x:(MOVBZload _ _)) -> x
+(MOV(H|W)Zreg x:(MOVHZload _ _)) -> x
+(MOV(H|W)reg x:(MOVHload _ _)) -> x
+(MOVWZreg x:(MOVWZload _ _)) -> x
+(MOVWreg x:(MOVWload _ _)) -> x
+
+// don't extend if argument is already extended
+(MOVBreg x:(Arg <t>)) && is8BitInt(t) && isSigned(t) -> x
+(MOVBZreg x:(Arg <t>)) && is8BitInt(t) && !isSigned(t) -> x
+(MOVHreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && isSigned(t) -> x
+(MOVHZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t)) && !isSigned(t) -> x
+(MOVWreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t) -> x
+(MOVWZreg x:(Arg <t>)) && (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t) -> x
 
 (MOVBZreg (MOVDconst [c]))  -> (MOVDconst [int64(uint8(c))])
 (MOVBreg (MOVDconst [c]))  -> (MOVDconst [int64(int8(c))])
 (MOVHZreg (MOVDconst [c]))  -> (MOVDconst [int64(uint16(c))])
 (MOVHreg (MOVDconst [c]))  -> (MOVDconst [int64(int16(c))])
+(MOVWreg (MOVDconst [c])) -> (MOVDconst [int64(int32(c))])
+(MOVWZreg (MOVDconst [c])) -> (MOVDconst [int64(uint32(c))])
+
 
 // Lose widening ops fed to to stores
 (MOVBstore [off] {sym} ptr (MOV(B|BZ|H|HZ|W|WZ)reg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index ba6a862..57738c9 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -388,9 +388,9 @@
 	case OpPPC64ADDconst:
 		return rewriteValuePPC64_OpPPC64ADDconst_0(v)
 	case OpPPC64AND:
-		return rewriteValuePPC64_OpPPC64AND_0(v)
+		return rewriteValuePPC64_OpPPC64AND_0(v) || rewriteValuePPC64_OpPPC64AND_10(v)
 	case OpPPC64ANDconst:
-		return rewriteValuePPC64_OpPPC64ANDconst_0(v)
+		return rewriteValuePPC64_OpPPC64ANDconst_0(v) || rewriteValuePPC64_OpPPC64ANDconst_10(v)
 	case OpPPC64CMP:
 		return rewriteValuePPC64_OpPPC64CMP_0(v)
 	case OpPPC64CMPU:
@@ -452,7 +452,7 @@
 	case OpPPC64MOVBZreg:
 		return rewriteValuePPC64_OpPPC64MOVBZreg_0(v)
 	case OpPPC64MOVBreg:
-		return rewriteValuePPC64_OpPPC64MOVBreg_0(v)
+		return rewriteValuePPC64_OpPPC64MOVBreg_0(v) || rewriteValuePPC64_OpPPC64MOVBreg_10(v)
 	case OpPPC64MOVBstore:
 		return rewriteValuePPC64_OpPPC64MOVBstore_0(v) || rewriteValuePPC64_OpPPC64MOVBstore_10(v) || rewriteValuePPC64_OpPPC64MOVBstore_20(v)
 	case OpPPC64MOVBstorezero:
@@ -468,11 +468,11 @@
 	case OpPPC64MOVHZload:
 		return rewriteValuePPC64_OpPPC64MOVHZload_0(v)
 	case OpPPC64MOVHZreg:
-		return rewriteValuePPC64_OpPPC64MOVHZreg_0(v)
+		return rewriteValuePPC64_OpPPC64MOVHZreg_0(v) || rewriteValuePPC64_OpPPC64MOVHZreg_10(v)
 	case OpPPC64MOVHload:
 		return rewriteValuePPC64_OpPPC64MOVHload_0(v)
 	case OpPPC64MOVHreg:
-		return rewriteValuePPC64_OpPPC64MOVHreg_0(v)
+		return rewriteValuePPC64_OpPPC64MOVHreg_0(v) || rewriteValuePPC64_OpPPC64MOVHreg_10(v)
 	case OpPPC64MOVHstore:
 		return rewriteValuePPC64_OpPPC64MOVHstore_0(v)
 	case OpPPC64MOVHstorezero:
@@ -482,11 +482,11 @@
 	case OpPPC64MOVWZload:
 		return rewriteValuePPC64_OpPPC64MOVWZload_0(v)
 	case OpPPC64MOVWZreg:
-		return rewriteValuePPC64_OpPPC64MOVWZreg_0(v)
+		return rewriteValuePPC64_OpPPC64MOVWZreg_0(v) || rewriteValuePPC64_OpPPC64MOVWZreg_10(v)
 	case OpPPC64MOVWload:
 		return rewriteValuePPC64_OpPPC64MOVWload_0(v)
 	case OpPPC64MOVWreg:
-		return rewriteValuePPC64_OpPPC64MOVWreg_0(v)
+		return rewriteValuePPC64_OpPPC64MOVWreg_0(v) || rewriteValuePPC64_OpPPC64MOVWreg_10(v)
 	case OpPPC64MOVWstore:
 		return rewriteValuePPC64_OpPPC64MOVWstore_0(v)
 	case OpPPC64MOVWstorezero:
@@ -5533,6 +5533,95 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (AND (MOVDconst [c]) y:(MOVWZreg _))
+	// cond: c&0xFFFFFFFF == 0xFFFFFFFF
+	// result: y
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		y := v.Args[1]
+		if y.Op != OpPPC64MOVWZreg {
+			break
+		}
+		if !(c&0xFFFFFFFF == 0xFFFFFFFF) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND y:(MOVWZreg _) (MOVDconst [c]))
+	// cond: c&0xFFFFFFFF == 0xFFFFFFFF
+	// result: y
+	for {
+		_ = v.Args[1]
+		y := v.Args[0]
+		if y.Op != OpPPC64MOVWZreg {
+			break
+		}
+		v_1 := v.Args[1]
+		if v_1.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := v_1.AuxInt
+		if !(c&0xFFFFFFFF == 0xFFFFFFFF) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
+	// match: (AND (MOVDconst [0xFFFFFFFF]) y:(MOVWreg x))
+	// cond:
+	// result: (MOVWZreg x)
+	for {
+		_ = v.Args[1]
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		if v_0.AuxInt != 0xFFFFFFFF {
+			break
+		}
+		y := v.Args[1]
+		if y.Op != OpPPC64MOVWreg {
+			break
+		}
+		x := y.Args[0]
+		v.reset(OpPPC64MOVWZreg)
+		v.AddArg(x)
+		return true
+	}
+	// match: (AND y:(MOVWreg x) (MOVDconst [0xFFFFFFFF]))
+	// cond:
+	// result: (MOVWZreg x)
+	for {
+		_ = v.Args[1]
+		y := v.Args[0]
+		if y.Op != OpPPC64MOVWreg {
+			break
+		}
+		x := y.Args[0]
+		v_1 := v.Args[1]
+		if v_1.Op != OpPPC64MOVDconst {
+			break
+		}
+		if v_1.AuxInt != 0xFFFFFFFF {
+			break
+		}
+		v.reset(OpPPC64MOVWZreg)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64AND_10(v *Value) bool {
 	// match: (AND (MOVDconst [c]) x:(MOVBZload _ _))
 	// cond:
 	// result: (ANDconst [c&0xFF] x)
@@ -5673,6 +5762,22 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (ANDconst [0xFF] y:(MOVBreg _))
+	// cond:
+	// result: y
+	for {
+		if v.AuxInt != 0xFF {
+			break
+		}
+		y := v.Args[0]
+		if y.Op != OpPPC64MOVBreg {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = y.Type
+		v.AddArg(y)
+		return true
+	}
 	// match: (ANDconst [c] y:(MOVHZreg _))
 	// cond: c&0xFFFF == 0xFFFF
 	// result: y
@@ -5690,16 +5795,15 @@
 		v.AddArg(y)
 		return true
 	}
-	// match: (ANDconst [c] y:(MOVWZreg _))
-	// cond: c&0xFFFFFFFF == 0xFFFFFFFF
+	// match: (ANDconst [0xFFFF] y:(MOVHreg _))
+	// cond:
 	// result: y
 	for {
-		c := v.AuxInt
-		y := v.Args[0]
-		if y.Op != OpPPC64MOVWZreg {
+		if v.AuxInt != 0xFFFF {
 			break
 		}
-		if !(c&0xFFFFFFFF == 0xFFFFFFFF) {
+		y := v.Args[0]
+		if y.Op != OpPPC64MOVHreg {
 			break
 		}
 		v.reset(OpCopy)
@@ -5707,6 +5811,21 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (ANDconst [c] (MOVBreg x))
+	// cond:
+	// result: (ANDconst [c&0xFF] x)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVBreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpPPC64ANDconst)
+		v.AuxInt = c & 0xFF
+		v.AddArg(x)
+		return true
+	}
 	// match: (ANDconst [c] (MOVBZreg x))
 	// cond:
 	// result: (ANDconst [c&0xFF] x)
@@ -5722,6 +5841,24 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (ANDconst [c] (MOVHreg x))
+	// cond:
+	// result: (ANDconst [c&0xFFFF] x)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVHreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpPPC64ANDconst)
+		v.AuxInt = c & 0xFFFF
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64ANDconst_10(v *Value) bool {
 	// match: (ANDconst [c] (MOVHZreg x))
 	// cond:
 	// result: (ANDconst [c&0xFFFF] x)
@@ -5737,6 +5874,21 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (ANDconst [c] (MOVWreg x))
+	// cond:
+	// result: (ANDconst [c&0xFFFFFFFF] x)
+	for {
+		c := v.AuxInt
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVWreg {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpPPC64ANDconst)
+		v.AuxInt = c & 0xFFFFFFFF
+		v.AddArg(x)
+		return true
+	}
 	// match: (ANDconst [c] (MOVWZreg x))
 	// cond:
 	// result: (ANDconst [c&0xFFFFFFFF] x)
@@ -7061,6 +7213,10 @@
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVBZreg_0(v *Value) bool {
+	b := v.Block
+	_ = b
+	typ := &b.Func.Config.Types
+	_ = typ
 	// match: (MOVBZreg y:(ANDconst [c] _))
 	// cond: uint64(c) <= 0xFF
 	// result: y
@@ -7078,6 +7234,81 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (MOVBZreg (SRWconst [c] (MOVBZreg x)))
+	// cond:
+	// result: (SRWconst [c] (MOVBZreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVBZreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVBZreg (SRWconst [c] x))
+	// cond: sizeof(x.Type) == 8
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(sizeof(x.Type) == 8) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVBZreg (SRDconst [c] x))
+	// cond: c>=56
+	// result: (SRDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 56) {
+			break
+		}
+		v.reset(OpPPC64SRDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVBZreg (SRWconst [c] x))
+	// cond: c>=24
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 24) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBZreg y:(MOVBZreg _))
 	// cond:
 	// result: y
@@ -7118,6 +7349,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBZreg x:(Arg <t>))
+	// cond: is8BitInt(t) && !isSigned(t)
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpArg {
+			break
+		}
+		t := x.Type
+		if !(is8BitInt(t) && !isSigned(t)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBZreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(uint8(c))])
@@ -7134,6 +7382,10 @@
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVBreg_0(v *Value) bool {
+	b := v.Block
+	_ = b
+	typ := &b.Func.Config.Types
+	_ = typ
 	// match: (MOVBreg y:(ANDconst [c] _))
 	// cond: uint64(c) <= 0x7F
 	// result: y
@@ -7151,6 +7403,117 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (MOVBreg (SRAWconst [c] (MOVBreg x)))
+	// cond:
+	// result: (SRAWconst [c] (MOVBreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVBreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVBreg (SRAWconst [c] x))
+	// cond: sizeof(x.Type) == 8
+	// result: (SRAWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(sizeof(x.Type) == 8) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVBreg (SRDconst [c] x))
+	// cond: c>56
+	// result: (SRDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c > 56) {
+			break
+		}
+		v.reset(OpPPC64SRDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVBreg (SRDconst [c] x))
+	// cond: c==56
+	// result: (SRADconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c == 56) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVBreg (SRWconst [c] x))
+	// cond: c>24
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c > 24) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVBreg (SRWconst [c] x))
+	// cond: c==24
+	// result: (SRAWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c == 24) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVBreg y:(MOVBreg _))
 	// cond:
 	// result: y
@@ -7177,6 +7540,26 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBreg x:(Arg <t>))
+	// cond: is8BitInt(t) && isSigned(t)
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpArg {
+			break
+		}
+		t := x.Type
+		if !(is8BitInt(t) && isSigned(t)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64MOVBreg_10(v *Value) bool {
 	// match: (MOVBreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(int8(c))])
@@ -8591,6 +8974,10 @@
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVHZreg_0(v *Value) bool {
+	b := v.Block
+	_ = b
+	typ := &b.Func.Config.Types
+	_ = typ
 	// match: (MOVHZreg y:(ANDconst [c] _))
 	// cond: uint64(c) <= 0xFFFF
 	// result: y
@@ -8608,6 +8995,102 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (MOVHZreg (SRWconst [c] (MOVBZreg x)))
+	// cond:
+	// result: (SRWconst [c] (MOVBZreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVBZreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVHZreg (SRWconst [c] (MOVHZreg x)))
+	// cond:
+	// result: (SRWconst [c] (MOVHZreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVHZreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVHZreg (SRWconst [c] x))
+	// cond: sizeof(x.Type) <= 16
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(sizeof(x.Type) <= 16) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVHZreg (SRDconst [c] x))
+	// cond: c>=48
+	// result: (SRDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 48) {
+			break
+		}
+		v.reset(OpPPC64SRDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVHZreg (SRWconst [c] x))
+	// cond: c>=16
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 16) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHZreg y:(MOVHZreg _))
 	// cond:
 	// result: y
@@ -8661,6 +9144,23 @@
 		v.AddArg(x)
 		return true
 	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64MOVHZreg_10(v *Value) bool {
+	// match: (MOVHZreg x:(MOVBZload _ _))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpPPC64MOVBZload {
+			break
+		}
+		_ = x.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHZreg x:(MOVHZload _ _))
 	// cond:
 	// result: x
@@ -8675,6 +9175,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHZreg x:(Arg <t>))
+	// cond: (is8BitInt(t) || is16BitInt(t)) && !isSigned(t)
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpArg {
+			break
+		}
+		t := x.Type
+		if !((is8BitInt(t) || is16BitInt(t)) && !isSigned(t)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHZreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(uint16(c))])
@@ -8743,6 +9260,10 @@
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVHreg_0(v *Value) bool {
+	b := v.Block
+	_ = b
+	typ := &b.Func.Config.Types
+	_ = typ
 	// match: (MOVHreg y:(ANDconst [c] _))
 	// cond: uint64(c) <= 0x7FFF
 	// result: y
@@ -8760,6 +9281,138 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (MOVHreg (SRAWconst [c] (MOVBreg x)))
+	// cond:
+	// result: (SRAWconst [c] (MOVBreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVBreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVHreg (SRAWconst [c] (MOVHreg x)))
+	// cond:
+	// result: (SRAWconst [c] (MOVHreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVHreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVHreg (SRAWconst [c] x))
+	// cond: sizeof(x.Type) <= 16
+	// result: (SRAWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(sizeof(x.Type) <= 16) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVHreg (SRDconst [c] x))
+	// cond: c>48
+	// result: (SRDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c > 48) {
+			break
+		}
+		v.reset(OpPPC64SRDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVHreg (SRDconst [c] x))
+	// cond: c==48
+	// result: (SRADconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c == 48) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVHreg (SRWconst [c] x))
+	// cond: c>16
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c > 16) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVHreg (SRWconst [c] x))
+	// cond: c==16
+	// result: (SRAWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c == 16) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg y:(MOVHreg _))
 	// cond:
 	// result: y
@@ -8786,6 +9439,9 @@
 		v.AddArg(y)
 		return true
 	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64MOVHreg_10(v *Value) bool {
 	// match: (MOVHreg y:(MOVHZreg x))
 	// cond:
 	// result: (MOVHreg x)
@@ -8813,6 +9469,23 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVHreg x:(Arg <t>))
+	// cond: (is8BitInt(t) || is16BitInt(t)) && isSigned(t)
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpArg {
+			break
+		}
+		t := x.Type
+		if !((is8BitInt(t) || is16BitInt(t)) && isSigned(t)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVHreg (MOVDconst [c]))
 	// cond:
 	// result: (MOVDconst [int64(int16(c))])
@@ -9234,6 +9907,10 @@
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVWZreg_0(v *Value) bool {
+	b := v.Block
+	_ = b
+	typ := &b.Func.Config.Types
+	_ = typ
 	// match: (MOVWZreg y:(ANDconst [c] _))
 	// cond: uint64(c) <= 0xFFFFFFFF
 	// result: y
@@ -9295,6 +9972,105 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (MOVWZreg (SRWconst [c] (MOVBZreg x)))
+	// cond:
+	// result: (SRWconst [c] (MOVBZreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVBZreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVBZreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVWZreg (SRWconst [c] (MOVHZreg x)))
+	// cond:
+	// result: (SRWconst [c] (MOVHZreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVHZreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVHZreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVWZreg (SRWconst [c] (MOVWZreg x)))
+	// cond:
+	// result: (SRWconst [c] (MOVWZreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVWZreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVWZreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVWZreg (SRWconst [c] x))
+	// cond: sizeof(x.Type) <= 32
+	// result: (SRWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(sizeof(x.Type) <= 32) {
+			break
+		}
+		v.reset(OpPPC64SRWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWZreg (SRDconst [c] x))
+	// cond: c>=32
+	// result: (SRDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c >= 32) {
+			break
+		}
+		v.reset(OpPPC64SRDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVWZreg y:(MOVWZreg _))
 	// cond:
 	// result: y
@@ -9321,6 +10097,9 @@
 		v.AddArg(y)
 		return true
 	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64MOVWZreg_10(v *Value) bool {
 	// match: (MOVWZreg y:(MOVBZreg _))
 	// cond:
 	// result: y
@@ -9375,6 +10154,78 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWZreg x:(MOVBZload _ _))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpPPC64MOVBZload {
+			break
+		}
+		_ = x.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWZreg x:(MOVHZload _ _))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpPPC64MOVHZload {
+			break
+		}
+		_ = x.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWZreg x:(MOVWZload _ _))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpPPC64MOVWZload {
+			break
+		}
+		_ = x.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWZreg x:(Arg <t>))
+	// cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpArg {
+			break
+		}
+		t := x.Type
+		if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && !isSigned(t)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWZreg (MOVDconst [c]))
+	// cond:
+	// result: (MOVDconst [int64(uint32(c))])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		v.reset(OpPPC64MOVDconst)
+		v.AuxInt = int64(uint32(c))
+		return true
+	}
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVWload_0(v *Value) bool {
@@ -9430,6 +10281,10 @@
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVWreg_0(v *Value) bool {
+	b := v.Block
+	_ = b
+	typ := &b.Func.Config.Types
+	_ = typ
 	// match: (MOVWreg y:(ANDconst [c] _))
 	// cond: uint64(c) <= 0xFFFF
 	// result: y
@@ -9491,6 +10346,123 @@
 		v.AddArg(y)
 		return true
 	}
+	// match: (MOVWreg (SRAWconst [c] (MOVBreg x)))
+	// cond:
+	// result: (SRAWconst [c] (MOVBreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVBreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVBreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVWreg (SRAWconst [c] (MOVHreg x)))
+	// cond:
+	// result: (SRAWconst [c] (MOVHreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVHreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVHreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVWreg (SRAWconst [c] (MOVWreg x)))
+	// cond:
+	// result: (SRAWconst [c] (MOVWreg x))
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpPPC64MOVWreg {
+			break
+		}
+		x := v_0_0.Args[0]
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v0 := b.NewValue0(v.Pos, OpPPC64MOVWreg, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (MOVWreg (SRAWconst [c] x))
+	// cond: sizeof(x.Type) <= 32
+	// result: (SRAWconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRAWconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(sizeof(x.Type) <= 32) {
+			break
+		}
+		v.reset(OpPPC64SRAWconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWreg (SRDconst [c] x))
+	// cond: c>32
+	// result: (SRDconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c > 32) {
+			break
+		}
+		v.reset(OpPPC64SRDconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWreg (SRDconst [c] x))
+	// cond: c==32
+	// result: (SRADconst [c] x)
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64SRDconst {
+			break
+		}
+		c := v_0.AuxInt
+		x := v_0.Args[0]
+		if !(c == 32) {
+			break
+		}
+		v.reset(OpPPC64SRADconst)
+		v.AuxInt = c
+		v.AddArg(x)
+		return true
+	}
 	// match: (MOVWreg y:(MOVWreg _))
 	// cond:
 	// result: y
@@ -9504,6 +10476,9 @@
 		v.AddArg(y)
 		return true
 	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64MOVWreg_10(v *Value) bool {
 	// match: (MOVWreg y:(MOVHreg _))
 	// cond:
 	// result: y
@@ -9543,6 +10518,64 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVWreg x:(MOVHload _ _))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpPPC64MOVHload {
+			break
+		}
+		_ = x.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWreg x:(MOVWload _ _))
+	// cond:
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpPPC64MOVWload {
+			break
+		}
+		_ = x.Args[1]
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWreg x:(Arg <t>))
+	// cond: (is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)
+	// result: x
+	for {
+		x := v.Args[0]
+		if x.Op != OpArg {
+			break
+		}
+		t := x.Type
+		if !((is8BitInt(t) || is16BitInt(t) || is32BitInt(t)) && isSigned(t)) {
+			break
+		}
+		v.reset(OpCopy)
+		v.Type = x.Type
+		v.AddArg(x)
+		return true
+	}
+	// match: (MOVWreg (MOVDconst [c]))
+	// cond:
+	// result: (MOVDconst [int64(int32(c))])
+	for {
+		v_0 := v.Args[0]
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := v_0.AuxInt
+		v.reset(OpPPC64MOVDconst)
+		v.AuxInt = int64(int32(c))
+		return true
+	}
 	return false
 }
 func rewriteValuePPC64_OpPPC64MOVWstore_0(v *Value) bool {
@@ -27043,69 +28076,141 @@
 }
 func rewriteValuePPC64_OpTrunc16to8_0(v *Value) bool {
 	// match: (Trunc16to8 x)
-	// cond:
+	// cond: isSigned(x.Type)
 	// result: (MOVBreg x)
 	for {
 		x := v.Args[0]
+		if !(isSigned(x.Type)) {
+			break
+		}
 		v.reset(OpPPC64MOVBreg)
 		v.AddArg(x)
 		return true
 	}
+	// match: (Trunc16to8 x)
+	// cond:
+	// result: (MOVBZreg x)
+	for {
+		x := v.Args[0]
+		v.reset(OpPPC64MOVBZreg)
+		v.AddArg(x)
+		return true
+	}
 }
 func rewriteValuePPC64_OpTrunc32to16_0(v *Value) bool {
 	// match: (Trunc32to16 x)
-	// cond:
+	// cond: isSigned(x.Type)
 	// result: (MOVHreg x)
 	for {
 		x := v.Args[0]
+		if !(isSigned(x.Type)) {
+			break
+		}
 		v.reset(OpPPC64MOVHreg)
 		v.AddArg(x)
 		return true
 	}
+	// match: (Trunc32to16 x)
+	// cond:
+	// result: (MOVHZreg x)
+	for {
+		x := v.Args[0]
+		v.reset(OpPPC64MOVHZreg)
+		v.AddArg(x)
+		return true
+	}
 }
 func rewriteValuePPC64_OpTrunc32to8_0(v *Value) bool {
 	// match: (Trunc32to8 x)
-	// cond:
+	// cond: isSigned(x.Type)
 	// result: (MOVBreg x)
 	for {
 		x := v.Args[0]
+		if !(isSigned(x.Type)) {
+			break
+		}
 		v.reset(OpPPC64MOVBreg)
 		v.AddArg(x)
 		return true
 	}
+	// match: (Trunc32to8 x)
+	// cond:
+	// result: (MOVBZreg x)
+	for {
+		x := v.Args[0]
+		v.reset(OpPPC64MOVBZreg)
+		v.AddArg(x)
+		return true
+	}
 }
 func rewriteValuePPC64_OpTrunc64to16_0(v *Value) bool {
 	// match: (Trunc64to16 x)
-	// cond:
+	// cond: isSigned(x.Type)
 	// result: (MOVHreg x)
 	for {
 		x := v.Args[0]
+		if !(isSigned(x.Type)) {
+			break
+		}
 		v.reset(OpPPC64MOVHreg)
 		v.AddArg(x)
 		return true
 	}
+	// match: (Trunc64to16 x)
+	// cond:
+	// result: (MOVHZreg x)
+	for {
+		x := v.Args[0]
+		v.reset(OpPPC64MOVHZreg)
+		v.AddArg(x)
+		return true
+	}
 }
 func rewriteValuePPC64_OpTrunc64to32_0(v *Value) bool {
 	// match: (Trunc64to32 x)
-	// cond:
+	// cond: isSigned(x.Type)
 	// result: (MOVWreg x)
 	for {
 		x := v.Args[0]
+		if !(isSigned(x.Type)) {
+			break
+		}
 		v.reset(OpPPC64MOVWreg)
 		v.AddArg(x)
 		return true
 	}
+	// match: (Trunc64to32 x)
+	// cond:
+	// result: (MOVWZreg x)
+	for {
+		x := v.Args[0]
+		v.reset(OpPPC64MOVWZreg)
+		v.AddArg(x)
+		return true
+	}
 }
 func rewriteValuePPC64_OpTrunc64to8_0(v *Value) bool {
 	// match: (Trunc64to8 x)
-	// cond:
+	// cond: isSigned(x.Type)
 	// result: (MOVBreg x)
 	for {
 		x := v.Args[0]
+		if !(isSigned(x.Type)) {
+			break
+		}
 		v.reset(OpPPC64MOVBreg)
 		v.AddArg(x)
 		return true
 	}
+	// match: (Trunc64to8 x)
+	// cond:
+	// result: (MOVBZreg x)
+	for {
+		x := v.Args[0]
+		v.reset(OpPPC64MOVBZreg)
+		v.AddArg(x)
+		return true
+	}
 }
 func rewriteValuePPC64_OpWB_0(v *Value) bool {
 	// match: (WB {fn} destptr srcptr mem)
diff --git a/test/codegen/noextend.go b/test/codegen/noextend.go
new file mode 100644
index 0000000..ee49002
--- /dev/null
+++ b/test/codegen/noextend.go
@@ -0,0 +1,213 @@
+// asmcheck
+
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+var sval64 [8]int64
+var sval32 [8]int32
+var sval16 [8]int16
+var sval8 [8]int8
+var val64 [8]uint64
+var val32 [8]uint32
+var val16 [8]uint16
+var val8 [8]uint8
+
+// ----------------------------- //
+//    avoid zero/sign extensions //
+// ----------------------------- //
+
+func set16(x8 int8, u8 uint8, y8 int8, z8 uint8) {
+	// Truncate not needed, load does sign/zero extend
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval16[0] = int16(x8)
+
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(u8)
+
+	// AND not needed due to size
+        // ppc64le:-"ANDCC"
+        sval16[1] = 255 & int16(x8+y8)
+
+        // ppc64le:-"ANDCC"
+        val16[1] = 255 & uint16(u8+z8)
+
+}
+func shiftidx(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
+        // ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+        sval16[0] = int16(val16[x8>>1])
+
+        // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+        val16[0] = uint16(sval16[u8>>2])
+
+        // ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+        sval16[1] = int16(val16[x16>>1])
+
+        // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+        val16[1] = uint16(sval16[u16>>2])
+
+}
+
+func setnox(x8 int8, u8 uint8, y8 int8, z8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32) {
+	// Truncate not needed due to sign/zero extension on load
+
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval16[0] = int16(x8)
+
+	// ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val16[0] = uint16(u8)
+
+	// AND not needed due to size
+	// ppc64le:-"ANDCC"
+        sval16[1] = 255 & int16(x8+y8)
+
+        // ppc64le:-"ANDCC"
+        val16[1] = 255 & uint16(u8+z8)
+
+	// ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	sval32[0] = int32(x8)
+
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	sval32[1] = int32(x16)
+
+	//ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+	val32[0] = uint32(u8)
+
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	val32[1] = uint32(u16)
+
+        // ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+        sval64[0] = int64(x8)
+
+        // ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+        sval64[1] = int64(x16)
+
+	// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
+	sval64[2] = int64(x32)
+
+        //ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+        val64[0] = uint64(u8)
+
+        // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+        val64[1] = uint64(u16)
+
+	// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
+	val64[2] = uint64(u32)
+}
+
+func cmp16(x8 int8, u8 uint8, x32 int32, u32 uint32, x64 int64, u64 uint64) bool {
+        // ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+	if int16(x8) == sval16[0] {
+		return true
+	}
+
+        // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+        if uint16(u8) == val16[0] {
+                return true
+	}
+
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint16(u32>>16) == val16[0] {
+		return true
+	}
+
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint16(u64>>48) == val16[0] {
+		return true
+	}
+
+	// Verify the truncates are using the correct sign.
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if int16(x32) == sval16[0] {
+		return true
+	}
+
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if uint16(u32) == val16[0] {
+		return true
+	}
+
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if int16(x64) == sval16[0] {
+		return true
+	}
+
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if uint16(u64) == val16[0] {
+		return true
+	}
+
+	return false
+}
+
+func cmp32(x8 int8, u8 uint8, x16 int16, u16 uint16, x64 int64, u64 uint64) bool {
+        // ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+        if int32(x8) == sval32[0] {
+                return true
+        }
+
+        // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+        if uint32(u8) == val32[0] {
+                return true
+        }
+
+	// ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+	if int32(x16) == sval32[0] {
+		return true
+	}
+
+	// ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+	if uint32(u16) == val32[0] {
+		return true
+	}
+
+	// Verify the truncates are using the correct sign.
+	// ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
+	if int32(x64) == sval32[0] {
+		return true
+	}
+
+	// ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
+	if uint32(u64) == val32[0] {
+		return true
+	}
+
+	return false
+}
+
+
+func cmp64(x8 int8, u8 uint8, x16 int16, u16 uint16, x32 int32, u32 uint32)  bool {
+        // ppc64le:-"MOVB\tR\\d+,\\sR\\d+"
+        if int64(x8) == sval64[0] {
+                return true
+        }
+
+        // ppc64le:-"MOVBZ\tR\\d+,\\sR\\d+"
+        if uint64(u8) == val64[0] {
+                return true
+        }
+
+        // ppc64le:-"MOVH\tR\\d+,\\sR\\d+"
+        if int64(x16) == sval64[0] {
+                return true
+        }
+
+        // ppc64le:-"MOVHZ\tR\\d+,\\sR\\d+"
+        if uint64(u16) == val64[0] {
+                return true
+        }
+
+        // ppc64le:-"MOVW\tR\\d+,\\sR\\d+"
+        if int64(x32) == sval64[0] {
+                return true
+        }
+
+        // ppc64le:-"MOVWZ\tR\\d+,\\sR\\d+"
+        if uint64(u32) == val64[0] {
+                return true
+        }
+        return false
+}
+