cmd/compile: generate subfic on ppc64

This merges an lis + subf into subfic, and for 32b constants
lwa + subf into oris + ori + subf.

The carry bit is no longer used in code generation, therefore
I think we can clobber it as needed.  Note, lowered borrow/carry
arithmetic is self-contained and thus is not affected.

A few extra rules are added to ensure early transformations to
SUBFCconst don't trip up earlier rules, fold constant operations,
or otherwise simplify lowering.  Likewise, tests are added to
ensure all rules are hit.  Generic constant folding catches
trivial cases, however some lowering rules insert arithmetic
which can introduce new opportunities (e.g BitLen or Slicemask).

I couldn't find a specific benchmark to demonstrate noteworthy
improvements, but this is generating subfic in many of the default
bent test binaries, so we are at least saving a little code space.

Change-Id: Iad7c6e5767eaa9dc24dc1c989bd1c8cfe1982012
Reviewed-on: https://go-review.googlesource.com/c/go/+/249461
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
diff --git a/src/cmd/compile/internal/ppc64/ssa.go b/src/cmd/compile/internal/ppc64/ssa.go
index 9c4c01e..f8d9ac2 100644
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@@ -649,6 +649,14 @@
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
 
+	case ssa.OpPPC64SUBFCconst:
+		p := s.Prog(v.Op.Asm())
+		p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt})
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = v.Args[0].Reg()
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = v.Reg()
+
 	case ssa.OpPPC64ANDCCconst:
 		p := s.Prog(v.Op.Asm())
 		p.Reg = v.Args[0].Reg()
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 509cfe1..e5fb1e9 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -110,13 +110,21 @@
 // Rotate generation with non-const shift
 // these match patterns from math/bits/RotateLeft[32|64], but there could be others
 (ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+(ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
 ( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
 (XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
+(XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))) => (ROTL x y)
 
+
+(ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 (ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 ( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
+(XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 (XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))) => (ROTLW x y)
 
+
 // Lowering rotates
 (RotateLeft32 x y) => (ROTLW x y)
 (RotateLeft64 x y) => (ROTL x y)
@@ -192,11 +200,15 @@
 (Rsh64Ux64 x (AND y (MOVDconst [63]))) => (SRD x (ANDconst <typ.Int64> [63] y))
 (Rsh64Ux64 x (ANDconst <typ.UInt> [63] y)) => (SRD x (ANDconst <typ.UInt> [63] y))
 (Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRD x (SUBFCconst <typ.UInt> [64]  (ANDconst <typ.UInt> [63] y)))
 (Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
 (Rsh64x64 x (AND y (MOVDconst [63]))) => (SRAD x (ANDconst <typ.Int64> [63] y))
 (Rsh64x64 x (ANDconst <typ.UInt> [63] y)) => (SRAD x (ANDconst <typ.UInt> [63] y))
 (Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))) => (SRAD x (SUBFCconst <typ.UInt> [64]  (ANDconst <typ.UInt> [63] y)))
 (Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
+(Rsh64x64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63])))) => (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
 
 (Lsh64x64 x y)  => (SLD  x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
 (Rsh64x64 x y) => (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
@@ -208,12 +220,16 @@
 (Rsh32Ux64 x (AND y (MOVDconst [31]))) => (SRW x (ANDconst <typ.Int32> [31] y))
 (Rsh32Ux64 x (ANDconst <typ.UInt> [31] y)) => (SRW x (ANDconst <typ.UInt> [31] y))
 (Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 (Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 
 (Rsh32x64 x (AND y (MOVDconst [31]))) => (SRAW x (ANDconst <typ.Int32> [31] y))
 (Rsh32x64 x (ANDconst <typ.UInt> [31] y)) => (SRAW x (ANDconst <typ.UInt> [31] y))
 (Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 (Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
+(Rsh32x64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31])))) => (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
 
 (Rsh32x64 x y)  => (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
 (Rsh32Ux64 x y) => (SRW  x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
@@ -299,8 +315,8 @@
 (Ctz16 x) => (POPCNTW (MOVHZreg (ANDN <typ.Int16> (ADDconst <typ.Int16> [-1] x) x)))
 (Ctz8 x)  => (POPCNTB (MOVBZreg (ANDN <typ.UInt8> (ADDconst <typ.UInt8> [-1] x) x)))
 
-(BitLen64 x) => (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
-(BitLen32 x) => (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
+(BitLen64 x) => (SUBFCconst [64] (CNTLZD <typ.Int> x))
+(BitLen32 x) => (SUBFCconst [32] (CNTLZW <typ.Int> x))
 
 (PopCount64 ...) => (POPCNTD ...)
 (PopCount32 x) => (POPCNTW (MOVWZreg x))
@@ -770,10 +786,19 @@
 (ADDconst [c] (ADDconst [d] x)) && is32Bit(c+d) => (ADDconst [c+d] x)
 (ADDconst [0] x) => x
 (SUB x (MOVDconst [c])) && is32Bit(-c) => (ADDconst [-c] x)
-// TODO deal with subtract-from-const
 
 (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x)
 
+// Subtract from (with carry, but ignored) constant.
+// Note, these clobber the carry bit.
+(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x)
+(SUBFCconst [c] (NEG x)) => (ADDconst [c] x)
+(SUBFCconst [c] (SUBFCconst [d] x)) && is32Bit(c-d) => (ADDconst [c-d] x)
+(SUBFCconst [0] x) => (NEG x)
+(ADDconst [c] (SUBFCconst [d] x)) && is32Bit(c+d) => (SUBFCconst [c+d] x)
+(NEG (ADDconst [c] x)) && is32Bit(-c) => (SUBFCconst [-c] x)
+(NEG (SUBFCconst [c] x)) && is32Bit(-c) => (ADDconst [-c] x)
+
 // Use register moves instead of stores and loads to move int<=>float values
 // Common with math Float64bits, Float64frombits
 (MOVDload [off] {sym} ptr (FMOVDstore [off] {sym} ptr x _)) => (MFVSRD x)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index f912224..44f6a74 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -175,6 +175,7 @@
 		{name: "FADD", argLength: 2, reg: fp21, asm: "FADD", commutative: true},   // arg0+arg1
 		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true}, // arg0+arg1
 		{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"},                        // arg0-arg1
+		{name: "SUBFCconst", argLength: 1, reg: gp11, asm: "SUBC", aux: "Int64"},  // auxInt - arg0 (with carry)
 		{name: "FSUB", argLength: 2, reg: fp21, asm: "FSUB"},                      // arg0-arg1
 		{name: "FSUBS", argLength: 2, reg: fp21, asm: "FSUBS"},                    // arg0-arg1
 
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index e181174..4540189 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1828,6 +1828,7 @@
 	OpPPC64FADD
 	OpPPC64FADDS
 	OpPPC64SUB
+	OpPPC64SUBFCconst
 	OpPPC64FSUB
 	OpPPC64FSUBS
 	OpPPC64MULLD
@@ -24314,6 +24315,20 @@
 		},
 	},
 	{
+		name:    "SUBFCconst",
+		auxType: auxInt64,
+		argLen:  1,
+		asm:     ppc64.ASUBC,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+			},
+			outputs: []outputInfo{
+				{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
+			},
+		},
+	},
+	{
 		name:   "FSUB",
 		argLen: 2,
 		asm:    ppc64.AFSUB,
diff --git a/src/cmd/compile/internal/ssa/rewritePPC64.go b/src/cmd/compile/internal/ssa/rewritePPC64.go
index 1a0b03e..152cdfd 100644
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@@ -568,6 +568,8 @@
 		return rewriteValuePPC64_OpPPC64MOVWstorezero(v)
 	case OpPPC64MTVSRD:
 		return rewriteValuePPC64_OpPPC64MTVSRD(v)
+	case OpPPC64NEG:
+		return rewriteValuePPC64_OpPPC64NEG(v)
 	case OpPPC64NOR:
 		return rewriteValuePPC64_OpPPC64NOR(v)
 	case OpPPC64NotEqual:
@@ -596,6 +598,8 @@
 		return rewriteValuePPC64_OpPPC64SRW(v)
 	case OpPPC64SUB:
 		return rewriteValuePPC64_OpPPC64SUB(v)
+	case OpPPC64SUBFCconst:
+		return rewriteValuePPC64_OpPPC64SUBFCconst(v)
 	case OpPPC64XOR:
 		return rewriteValuePPC64_OpPPC64XOR(v)
 	case OpPPC64XORconst:
@@ -1021,15 +1025,14 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (BitLen32 x)
-	// result: (SUB (MOVDconst [32]) (CNTLZW <typ.Int> x))
+	// result: (SUBFCconst [32] (CNTLZW <typ.Int> x))
 	for {
 		x := v_0
-		v.reset(OpPPC64SUB)
-		v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v0.AuxInt = int64ToAuxInt(32)
-		v1 := b.NewValue0(v.Pos, OpPPC64CNTLZW, typ.Int)
-		v1.AddArg(x)
-		v.AddArg2(v0, v1)
+		v.reset(OpPPC64SUBFCconst)
+		v.AuxInt = int64ToAuxInt(32)
+		v0 := b.NewValue0(v.Pos, OpPPC64CNTLZW, typ.Int)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
 }
@@ -1038,15 +1041,14 @@
 	b := v.Block
 	typ := &b.Func.Config.Types
 	// match: (BitLen64 x)
-	// result: (SUB (MOVDconst [64]) (CNTLZD <typ.Int> x))
+	// result: (SUBFCconst [64] (CNTLZD <typ.Int> x))
 	for {
 		x := v_0
-		v.reset(OpPPC64SUB)
-		v0 := b.NewValue0(v.Pos, OpPPC64MOVDconst, typ.Int64)
-		v0.AuxInt = int64ToAuxInt(64)
-		v1 := b.NewValue0(v.Pos, OpPPC64CNTLZD, typ.Int)
-		v1.AddArg(x)
-		v.AddArg2(v0, v1)
+		v.reset(OpPPC64SUBFCconst)
+		v.AuxInt = int64ToAuxInt(64)
+		v0 := b.NewValue0(v.Pos, OpPPC64CNTLZD, typ.Int)
+		v0.AddArg(x)
+		v.AddArg(v0)
 		return true
 	}
 }
@@ -3957,6 +3959,76 @@
 		}
 		break
 	}
+	// match: (ADD (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))))
+	// result: (ROTL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpPPC64SLD {
+				continue
+			}
+			_ = v_0.Args[1]
+			x := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int64 || auxIntToInt64(v_0_1.AuxInt) != 63 {
+				continue
+			}
+			y := v_0_1.Args[0]
+			if v_1.Op != OpPPC64SRD {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			v_1_1 := v_1.Args[1]
+			if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 64 {
+				continue
+			}
+			v_1_1_0 := v_1_1.Args[0]
+			if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 63 || y != v_1_1_0.Args[0] {
+				continue
+			}
+			v.reset(OpPPC64ROTL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))))
+	// result: (ROTLW x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpPPC64SLW {
+				continue
+			}
+			_ = v_0.Args[1]
+			x := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int32 || auxIntToInt64(v_0_1.AuxInt) != 31 {
+				continue
+			}
+			y := v_0_1.Args[0]
+			if v_1.Op != OpPPC64SRW {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			v_1_1 := v_1.Args[1]
+			if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 32 {
+				continue
+			}
+			v_1_1_0 := v_1_1.Args[0]
+			if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 31 || y != v_1_1_0.Args[0] {
+				continue
+			}
+			v.reset(OpPPC64ROTLW)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (ADD (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))))
 	// result: (ROTLW x y)
 	for {
@@ -4069,6 +4141,24 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (ADDconst [c] (SUBFCconst [d] x))
+	// cond: is32Bit(c+d)
+	// result: (SUBFCconst [c+d] x)
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpPPC64SUBFCconst {
+			break
+		}
+		d := auxIntToInt64(v_0.AuxInt)
+		x := v_0.Args[0]
+		if !(is32Bit(c + d)) {
+			break
+		}
+		v.reset(OpPPC64SUBFCconst)
+		v.AuxInt = int64ToAuxInt(c + d)
+		v.AddArg(x)
+		return true
+	}
 	return false
 }
 func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
@@ -10336,6 +10426,44 @@
 	}
 	return false
 }
+func rewriteValuePPC64_OpPPC64NEG(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (NEG (ADDconst [c] x))
+	// cond: is32Bit(-c)
+	// result: (SUBFCconst [-c] x)
+	for {
+		if v_0.Op != OpPPC64ADDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		x := v_0.Args[0]
+		if !(is32Bit(-c)) {
+			break
+		}
+		v.reset(OpPPC64SUBFCconst)
+		v.AuxInt = int64ToAuxInt(-c)
+		v.AddArg(x)
+		return true
+	}
+	// match: (NEG (SUBFCconst [c] x))
+	// cond: is32Bit(-c)
+	// result: (ADDconst [-c] x)
+	for {
+		if v_0.Op != OpPPC64SUBFCconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		x := v_0.Args[0]
+		if !(is32Bit(-c)) {
+			break
+		}
+		v.reset(OpPPC64ADDconst)
+		v.AuxInt = int64ToAuxInt(-c)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
 func rewriteValuePPC64_OpPPC64NOR(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@@ -10510,6 +10638,76 @@
 		}
 		break
 	}
+	// match: ( OR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))))
+	// result: (ROTL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpPPC64SLD {
+				continue
+			}
+			_ = v_0.Args[1]
+			x := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int64 || auxIntToInt64(v_0_1.AuxInt) != 63 {
+				continue
+			}
+			y := v_0_1.Args[0]
+			if v_1.Op != OpPPC64SRD {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			v_1_1 := v_1.Args[1]
+			if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 64 {
+				continue
+			}
+			v_1_1_0 := v_1_1.Args[0]
+			if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 63 || y != v_1_1_0.Args[0] {
+				continue
+			}
+			v.reset(OpPPC64ROTL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: ( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))))
+	// result: (ROTLW x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpPPC64SLW {
+				continue
+			}
+			_ = v_0.Args[1]
+			x := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int32 || auxIntToInt64(v_0_1.AuxInt) != 31 {
+				continue
+			}
+			y := v_0_1.Args[0]
+			if v_1.Op != OpPPC64SRW {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			v_1_1 := v_1.Args[1]
+			if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 32 {
+				continue
+			}
+			v_1_1_0 := v_1_1.Args[0]
+			if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 31 || y != v_1_1_0.Args[0] {
+				continue
+			}
+			v.reset(OpPPC64ROTLW)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: ( OR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))))
 	// result: (ROTLW x y)
 	for {
@@ -12109,6 +12307,69 @@
 		v.AddArg(x)
 		return true
 	}
+	// match: (SUB (MOVDconst [c]) x)
+	// cond: is32Bit(c)
+	// result: (SUBFCconst [c] x)
+	for {
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		x := v_1
+		if !(is32Bit(c)) {
+			break
+		}
+		v.reset(OpPPC64SUBFCconst)
+		v.AuxInt = int64ToAuxInt(c)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64SUBFCconst(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (SUBFCconst [c] (NEG x))
+	// result: (ADDconst [c] x)
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpPPC64NEG {
+			break
+		}
+		x := v_0.Args[0]
+		v.reset(OpPPC64ADDconst)
+		v.AuxInt = int64ToAuxInt(c)
+		v.AddArg(x)
+		return true
+	}
+	// match: (SUBFCconst [c] (SUBFCconst [d] x))
+	// cond: is32Bit(c-d)
+	// result: (ADDconst [c-d] x)
+	for {
+		c := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpPPC64SUBFCconst {
+			break
+		}
+		d := auxIntToInt64(v_0.AuxInt)
+		x := v_0.Args[0]
+		if !(is32Bit(c - d)) {
+			break
+		}
+		v.reset(OpPPC64ADDconst)
+		v.AuxInt = int64ToAuxInt(c - d)
+		v.AddArg(x)
+		return true
+	}
+	// match: (SUBFCconst [0] x)
+	// result: (NEG x)
+	for {
+		if auxIntToInt64(v.AuxInt) != 0 {
+			break
+		}
+		x := v_0
+		v.reset(OpPPC64NEG)
+		v.AddArg(x)
+		return true
+	}
 	return false
 }
 func rewriteValuePPC64_OpPPC64XOR(v *Value) bool {
@@ -12204,6 +12465,76 @@
 		}
 		break
 	}
+	// match: (XOR (SLD x (ANDconst <typ.Int64> [63] y)) (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y))))
+	// result: (ROTL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpPPC64SLD {
+				continue
+			}
+			_ = v_0.Args[1]
+			x := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int64 || auxIntToInt64(v_0_1.AuxInt) != 63 {
+				continue
+			}
+			y := v_0_1.Args[0]
+			if v_1.Op != OpPPC64SRD {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			v_1_1 := v_1.Args[1]
+			if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 64 {
+				continue
+			}
+			v_1_1_0 := v_1_1.Args[0]
+			if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 63 || y != v_1_1_0.Args[0] {
+				continue
+			}
+			v.reset(OpPPC64ROTL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y))))
+	// result: (ROTLW x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpPPC64SLW {
+				continue
+			}
+			_ = v_0.Args[1]
+			x := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			if v_0_1.Op != OpPPC64ANDconst || v_0_1.Type != typ.Int32 || auxIntToInt64(v_0_1.AuxInt) != 31 {
+				continue
+			}
+			y := v_0_1.Args[0]
+			if v_1.Op != OpPPC64SRW {
+				continue
+			}
+			_ = v_1.Args[1]
+			if x != v_1.Args[0] {
+				continue
+			}
+			v_1_1 := v_1.Args[1]
+			if v_1_1.Op != OpPPC64SUBFCconst || v_1_1.Type != typ.UInt || auxIntToInt64(v_1_1.AuxInt) != 32 {
+				continue
+			}
+			v_1_1_0 := v_1_1.Args[0]
+			if v_1_1_0.Op != OpPPC64ANDconst || v_1_1_0.Type != typ.UInt || auxIntToInt64(v_1_1_0.AuxInt) != 31 || y != v_1_1_0.Args[0] {
+				continue
+			}
+			v.reset(OpPPC64ROTLW)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (XOR (SLW x (ANDconst <typ.Int32> [31] y)) (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y))))
 	// result: (ROTLW x y)
 	for {
@@ -13175,6 +13506,28 @@
 		v.AddArg2(x, v0)
 		return true
 	}
+	// match: (Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
+	// result: (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 31 {
+			break
+		}
+		y := v_1_0.Args[0]
+		v.reset(OpPPC64SRW)
+		v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+		v0.AuxInt = int64ToAuxInt(32)
+		v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+		v1.AuxInt = int64ToAuxInt(31)
+		v1.AddArg(y)
+		v0.AddArg(v1)
+		v.AddArg2(x, v0)
+		return true
+	}
 	// match: (Rsh32Ux64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31]))))
 	// result: (SRW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
 	for {
@@ -13212,6 +13565,37 @@
 		}
 		break
 	}
+	// match: (Rsh32Ux64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31]))))
+	// result: (SRW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt {
+			break
+		}
+		_ = v_1_0.Args[1]
+		v_1_0_0 := v_1_0.Args[0]
+		v_1_0_1 := v_1_0.Args[1]
+		for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 {
+			y := v_1_0_0
+			if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 31 {
+				continue
+			}
+			v.reset(OpPPC64SRW)
+			v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+			v0.AuxInt = int64ToAuxInt(32)
+			v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+			v1.AuxInt = int64ToAuxInt(31)
+			v1.AddArg(y)
+			v0.AddArg(v1)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
 	// match: (Rsh32Ux64 x y)
 	// result: (SRW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
 	for {
@@ -13482,6 +13866,28 @@
 		v.AddArg2(x, v0)
 		return true
 	}
+	// match: (Rsh32x64 x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
+	// result: (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 31 {
+			break
+		}
+		y := v_1_0.Args[0]
+		v.reset(OpPPC64SRAW)
+		v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+		v0.AuxInt = int64ToAuxInt(32)
+		v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+		v1.AuxInt = int64ToAuxInt(31)
+		v1.AddArg(y)
+		v0.AddArg(v1)
+		v.AddArg2(x, v0)
+		return true
+	}
 	// match: (Rsh32x64 x (SUB <typ.UInt> (MOVDconst [32]) (AND <typ.UInt> y (MOVDconst [31]))))
 	// result: (SRAW x (SUB <typ.UInt> (MOVDconst [32]) (ANDconst <typ.UInt> [31] y)))
 	for {
@@ -13519,6 +13925,37 @@
 		}
 		break
 	}
+	// match: (Rsh32x64 x (SUBFCconst <typ.UInt> [32] (AND <typ.UInt> y (MOVDconst [31]))))
+	// result: (SRAW x (SUBFCconst <typ.UInt> [32] (ANDconst <typ.UInt> [31] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 32 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt {
+			break
+		}
+		_ = v_1_0.Args[1]
+		v_1_0_0 := v_1_0.Args[0]
+		v_1_0_1 := v_1_0.Args[1]
+		for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 {
+			y := v_1_0_0
+			if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 31 {
+				continue
+			}
+			v.reset(OpPPC64SRAW)
+			v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+			v0.AuxInt = int64ToAuxInt(32)
+			v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+			v1.AuxInt = int64ToAuxInt(31)
+			v1.AddArg(y)
+			v0.AddArg(v1)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
 	// match: (Rsh32x64 x y)
 	// result: (SRAW x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [32]))))
 	for {
@@ -13787,6 +14224,28 @@
 		v.AddArg2(x, v0)
 		return true
 	}
+	// match: (Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
+	// result: (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 63 {
+			break
+		}
+		y := v_1_0.Args[0]
+		v.reset(OpPPC64SRD)
+		v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+		v0.AuxInt = int64ToAuxInt(64)
+		v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+		v1.AuxInt = int64ToAuxInt(63)
+		v1.AddArg(y)
+		v0.AddArg(v1)
+		v.AddArg2(x, v0)
+		return true
+	}
 	// match: (Rsh64Ux64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63]))))
 	// result: (SRD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
 	for {
@@ -13824,6 +14283,37 @@
 		}
 		break
 	}
+	// match: (Rsh64Ux64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63]))))
+	// result: (SRD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt {
+			break
+		}
+		_ = v_1_0.Args[1]
+		v_1_0_0 := v_1_0.Args[0]
+		v_1_0_1 := v_1_0.Args[1]
+		for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 {
+			y := v_1_0_0
+			if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 63 {
+				continue
+			}
+			v.reset(OpPPC64SRD)
+			v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+			v0.AuxInt = int64ToAuxInt(64)
+			v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+			v1.AuxInt = int64ToAuxInt(63)
+			v1.AddArg(y)
+			v0.AddArg(v1)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
 	// match: (Rsh64Ux64 x y)
 	// result: (SRD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
 	for {
@@ -14094,6 +14584,28 @@
 		v.AddArg2(x, v0)
 		return true
 	}
+	// match: (Rsh64x64 x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
+	// result: (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64ANDconst || v_1_0.Type != typ.UInt || auxIntToInt64(v_1_0.AuxInt) != 63 {
+			break
+		}
+		y := v_1_0.Args[0]
+		v.reset(OpPPC64SRAD)
+		v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+		v0.AuxInt = int64ToAuxInt(64)
+		v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+		v1.AuxInt = int64ToAuxInt(63)
+		v1.AddArg(y)
+		v0.AddArg(v1)
+		v.AddArg2(x, v0)
+		return true
+	}
 	// match: (Rsh64x64 x (SUB <typ.UInt> (MOVDconst [64]) (AND <typ.UInt> y (MOVDconst [63]))))
 	// result: (SRAD x (SUB <typ.UInt> (MOVDconst [64]) (ANDconst <typ.UInt> [63] y)))
 	for {
@@ -14131,6 +14643,37 @@
 		}
 		break
 	}
+	// match: (Rsh64x64 x (SUBFCconst <typ.UInt> [64] (AND <typ.UInt> y (MOVDconst [63]))))
+	// result: (SRAD x (SUBFCconst <typ.UInt> [64] (ANDconst <typ.UInt> [63] y)))
+	for {
+		x := v_0
+		if v_1.Op != OpPPC64SUBFCconst || v_1.Type != typ.UInt || auxIntToInt64(v_1.AuxInt) != 64 {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpPPC64AND || v_1_0.Type != typ.UInt {
+			break
+		}
+		_ = v_1_0.Args[1]
+		v_1_0_0 := v_1_0.Args[0]
+		v_1_0_1 := v_1_0.Args[1]
+		for _i0 := 0; _i0 <= 1; _i0, v_1_0_0, v_1_0_1 = _i0+1, v_1_0_1, v_1_0_0 {
+			y := v_1_0_0
+			if v_1_0_1.Op != OpPPC64MOVDconst || auxIntToInt64(v_1_0_1.AuxInt) != 63 {
+				continue
+			}
+			v.reset(OpPPC64SRAD)
+			v0 := b.NewValue0(v.Pos, OpPPC64SUBFCconst, typ.UInt)
+			v0.AuxInt = int64ToAuxInt(64)
+			v1 := b.NewValue0(v.Pos, OpPPC64ANDconst, typ.UInt)
+			v1.AuxInt = int64ToAuxInt(63)
+			v1.AddArg(y)
+			v0.AddArg(v1)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
 	// match: (Rsh64x64 x y)
 	// result: (SRAD x (ISEL [0] y (MOVDconst [-1]) (CMPU y (MOVDconst [64]))))
 	for {
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index afd4d66..0bdb66a 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -42,6 +42,48 @@
 	return arr[0] - arr[1]
 }
 
+func SubFromConst(a int) int {
+	// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	b := 40 - a
+	return b
+}
+
+func SubFromConstNeg(a int) int {
+	// ppc64le: `ADD\t[$]40,\sR[0-9]+,\sR`
+	// ppc64: `ADD\t[$]40,\sR[0-9]+,\sR`
+	c := 40 - (-a)
+	return c
+}
+
+func SubSubFromConst(a int) int {
+	// ppc64le: `ADD\t[$]20,\sR[0-9]+,\sR`
+	// ppc64: `ADD\t[$]20,\sR[0-9]+,\sR`
+	c := 40 - (20 - a)
+	return c
+}
+
+func AddSubFromConst(a int) int {
+	// ppc64le: `SUBC\tR[0-9]+,\s[$]60,\sR`
+	// ppc64: `SUBC\tR[0-9]+,\s[$]60,\sR`
+	c := 40 + (20 - a)
+	return c
+}
+
+func NegSubFromConst(a int) int {
+	// ppc64le: `ADD\t[$]-20,\sR[0-9]+,\sR`
+	// ppc64: `ADD\t[$]-20,\sR[0-9]+,\sR`
+	c := -(20 - a)
+	return c
+}
+
+func NegAddFromConstNeg(a int) int {
+	// ppc64le: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	// ppc64: `SUBC\tR[0-9]+,\s[$]40,\sR`
+	c := -(-40 + a)
+	return c
+}
+
 // -------------------- //
 //    Multiplication    //
 // -------------------- //
diff --git a/test/codegen/mathbits.go b/test/codegen/mathbits.go
index 942605d..4c35f26 100644
--- a/test/codegen/mathbits.go
+++ b/test/codegen/mathbits.go
@@ -76,9 +76,17 @@
 	// arm:"CLZ" arm64:"CLZ"
 	// mips:"CLZ"
 	// wasm:"I64Clz"
+	// ppc64le:"SUBC","CNTLZD"
+	// ppc64:"SUBC","CNTLZD"
 	return bits.Len64(n)
 }
 
+func SubFromLen64(n uint64) int {
+	// ppc64le:"CNTLZD",-"SUBC"
+	// ppc64:"CNTLZD",-"SUBC"
+	return 64 - bits.Len64(n)
+}
+
 func Len32(n uint32) int {
 	// amd64:"BSRQ","LEAQ",-"CMOVQEQ"
 	// s390x:"FLOGR"
@@ -291,6 +299,12 @@
 	return bits.TrailingZeros64(n)
 }
 
+func TrailingZeros64Subtract(n uint64) int {
+	// ppc64le/power8:"NEG","SUBC","ANDN","POPCNTD"
+	// ppc64le/power9:"SUBC","CNTTZD"
+	return bits.TrailingZeros64(1 - n)
+}
+
 func TrailingZeros32(n uint32) int {
 	// amd64:"BTSQ\\t\\$32","BSFQ"
 	// arm:"CLZ"
diff --git a/test/codegen/slices.go b/test/codegen/slices.go
index 40e857f..38e8a62 100644
--- a/test/codegen/slices.go
+++ b/test/codegen/slices.go
@@ -347,3 +347,24 @@
 		42,
 	}
 }
+
+// --------------------------------------- //
+//   Test PPC64 SUBFCconst folding rules   //
+//   triggered by slice operations.        //
+// --------------------------------------- //
+
+func SliceWithConstCompare(a []int, b int) []int {
+	var c []int = []int{1, 2, 3, 4, 5}
+	if b+len(a) < len(c) {
+		// ppc64le:-"NEG"
+		// ppc64:-"NEG"
+		return c[b:]
+	}
+	return a
+}
+
+func SliceWithSubtractBound(a []int, b int) []int {
+	// ppc64le:"SUBC",-"NEG"
+	// ppc64:"SUBC",-"NEG"
+	return a[(3 - b):]
+}