cmd/compile: optimize ARM64's code with MADD/MSUB
MADD does MUL-ADD in a single instruction, and MSUB does the
similiar simplification for MUL-SUB.
The CL implements the optimization with MADD/MSUB.
1. The total size of pkg/android_arm64/ decreases about 20KB,
excluding cmd/compile/.
2. The go1 benchmark shows a little improvement for RegexpMatchHard_32-4
and Template-4, excluding noise.
name old time/op new time/op delta
BinaryTree17-4 16.3s ± 1% 16.5s ± 1% +1.41% (p=0.000 n=26+28)
Fannkuch11-4 8.79s ± 1% 8.76s ± 0% -0.36% (p=0.000 n=26+28)
FmtFprintfEmpty-4 172ns ± 0% 172ns ± 0% ~ (all equal)
FmtFprintfString-4 362ns ± 1% 364ns ± 0% +0.55% (p=0.000 n=30+30)
FmtFprintfInt-4 416ns ± 0% 416ns ± 0% ~ (p=0.099 n=22+30)
FmtFprintfIntInt-4 655ns ± 1% 660ns ± 1% +0.76% (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4 810ns ± 0% 809ns ± 0% -0.08% (p=0.009 n=29+29)
FmtFprintfFloat-4 1.08µs ± 0% 1.09µs ± 0% +0.61% (p=0.000 n=30+29)
FmtManyArgs-4 2.70µs ± 0% 2.69µs ± 0% -0.23% (p=0.000 n=29+28)
GobDecode-4 32.2ms ± 1% 32.1ms ± 1% -0.39% (p=0.000 n=27+26)
GobEncode-4 27.4ms ± 2% 27.4ms ± 1% ~ (p=0.864 n=28+28)
Gzip-4 1.53s ± 1% 1.52s ± 1% -0.30% (p=0.031 n=29+29)
Gunzip-4 146ms ± 0% 146ms ± 0% -0.14% (p=0.001 n=25+30)
HTTPClientServer-4 1.00ms ± 4% 0.98ms ± 6% -1.65% (p=0.001 n=29+30)
JSONEncode-4 67.3ms ± 1% 67.2ms ± 1% ~ (p=0.520 n=28+28)
JSONDecode-4 329ms ± 5% 330ms ± 4% ~ (p=0.142 n=30+30)
Mandelbrot200-4 17.3ms ± 0% 17.3ms ± 0% ~ (p=0.055 n=26+29)
GoParse-4 16.9ms ± 1% 17.0ms ± 1% +0.82% (p=0.000 n=30+30)
RegexpMatchEasy0_32-4 382ns ± 0% 382ns ± 0% ~ (all equal)
RegexpMatchEasy0_1K-4 1.33µs ± 0% 1.33µs ± 0% -0.25% (p=0.000 n=30+27)
RegexpMatchEasy1_32-4 361ns ± 0% 361ns ± 0% -0.08% (p=0.002 n=30+28)
RegexpMatchEasy1_1K-4 2.11µs ± 0% 2.09µs ± 0% -0.54% (p=0.000 n=30+29)
RegexpMatchMedium_32-4 594ns ± 0% 592ns ± 0% -0.32% (p=0.000 n=30+30)
RegexpMatchMedium_1K-4 173µs ± 0% 172µs ± 0% -0.77% (p=0.000 n=29+27)
RegexpMatchHard_32-4 10.4µs ± 0% 10.1µs ± 0% -3.63% (p=0.000 n=28+27)
RegexpMatchHard_1K-4 306µs ± 0% 301µs ± 0% -1.64% (p=0.000 n=29+30)
Revcomp-4 2.51s ± 1% 2.52s ± 0% +0.18% (p=0.017 n=26+27)
Template-4 394ms ± 3% 382ms ± 3% -3.22% (p=0.000 n=28+28)
TimeParse-4 1.67µs ± 0% 1.67µs ± 0% +0.05% (p=0.030 n=27+30)
TimeFormat-4 1.72µs ± 0% 1.70µs ± 0% -0.79% (p=0.000 n=28+26)
[Geo mean] 259µs 259µs -0.33%
name old speed new speed delta
GobDecode-4 23.8MB/s ± 1% 23.9MB/s ± 1% +0.40% (p=0.001 n=27+26)
GobEncode-4 28.0MB/s ± 2% 28.0MB/s ± 1% ~ (p=0.863 n=28+28)
Gzip-4 12.7MB/s ± 1% 12.7MB/s ± 1% +0.32% (p=0.026 n=29+29)
Gunzip-4 133MB/s ± 0% 133MB/s ± 0% +0.15% (p=0.001 n=24+30)
JSONEncode-4 28.8MB/s ± 1% 28.9MB/s ± 1% ~ (p=0.475 n=28+28)
JSONDecode-4 5.89MB/s ± 4% 5.87MB/s ± 5% ~ (p=0.174 n=29+30)
GoParse-4 3.43MB/s ± 0% 3.40MB/s ± 1% -0.83% (p=0.000 n=28+30)
RegexpMatchEasy0_32-4 83.6MB/s ± 0% 83.6MB/s ± 0% ~ (p=0.848 n=28+29)
RegexpMatchEasy0_1K-4 768MB/s ± 0% 770MB/s ± 0% +0.25% (p=0.000 n=30+27)
RegexpMatchEasy1_32-4 88.5MB/s ± 0% 88.5MB/s ± 0% ~ (p=0.086 n=29+29)
RegexpMatchEasy1_1K-4 486MB/s ± 0% 489MB/s ± 0% +0.54% (p=0.000 n=30+29)
RegexpMatchMedium_32-4 1.68MB/s ± 0% 1.69MB/s ± 0% +0.60% (p=0.000 n=30+23)
RegexpMatchMedium_1K-4 5.90MB/s ± 0% 5.95MB/s ± 0% +0.85% (p=0.000 n=18+20)
RegexpMatchHard_32-4 3.07MB/s ± 0% 3.18MB/s ± 0% +3.72% (p=0.000 n=29+26)
RegexpMatchHard_1K-4 3.35MB/s ± 0% 3.40MB/s ± 0% +1.69% (p=0.000 n=30+30)
Revcomp-4 101MB/s ± 0% 101MB/s ± 0% -0.18% (p=0.018 n=26+27)
Template-4 4.92MB/s ± 4% 5.09MB/s ± 3% +3.31% (p=0.000 n=28+28)
[Geo mean] 22.4MB/s 22.6MB/s +0.62%
Change-Id: I8f304b272785739f57b3c8f736316f658f8c1b2a
Reviewed-on: https://go-review.googlesource.com/129119
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 3712a73..db7064c 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -212,7 +212,11 @@
ssa.OpARM64FMSUBS,
ssa.OpARM64FMSUBD,
ssa.OpARM64FNMSUBS,
- ssa.OpARM64FNMSUBD:
+ ssa.OpARM64FNMSUBD,
+ ssa.OpARM64MADD,
+ ssa.OpARM64MADDW,
+ ssa.OpARM64MSUB,
+ ssa.OpARM64MSUBW:
rt := v.Reg()
ra := v.Args[0].Reg()
rm := v.Args[1].Reg()
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index d207806..374ece2 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -594,6 +594,34 @@
(EQ (CMPWconst [0] x) yes no) -> (ZW x yes no)
(NE (CMPWconst [0] x) yes no) -> (NZW x yes no)
+(EQ (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (EQ (CMN a (MUL <x.Type> x y)) yes no)
+(NE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (NE (CMN a (MUL <x.Type> x y)) yes no)
+(LT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (LT (CMN a (MUL <x.Type> x y)) yes no)
+(LE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (LE (CMN a (MUL <x.Type> x y)) yes no)
+(GT (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (GT (CMN a (MUL <x.Type> x y)) yes no)
+(GE (CMPconst [0] z:(MADD a x y)) yes no) && z.Uses==1 -> (GE (CMN a (MUL <x.Type> x y)) yes no)
+
+(EQ (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (EQ (CMP a (MUL <x.Type> x y)) yes no)
+(NE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (NE (CMP a (MUL <x.Type> x y)) yes no)
+(LE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (LE (CMP a (MUL <x.Type> x y)) yes no)
+(LT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (LT (CMP a (MUL <x.Type> x y)) yes no)
+(GE (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (GE (CMP a (MUL <x.Type> x y)) yes no)
+(GT (CMPconst [0] z:(MSUB a x y)) yes no) && z.Uses==1 -> (GT (CMP a (MUL <x.Type> x y)) yes no)
+
+(EQ (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (EQ (CMNW a (MULW <x.Type> x y)) yes no)
+(NE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (NE (CMNW a (MULW <x.Type> x y)) yes no)
+(LE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LE (CMNW a (MULW <x.Type> x y)) yes no)
+(LT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (LT (CMNW a (MULW <x.Type> x y)) yes no)
+(GE (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GE (CMNW a (MULW <x.Type> x y)) yes no)
+(GT (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 -> (GT (CMNW a (MULW <x.Type> x y)) yes no)
+
+(EQ (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (EQ (CMPW a (MULW <x.Type> x y)) yes no)
+(NE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (NE (CMPW a (MULW <x.Type> x y)) yes no)
+(LE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LE (CMPW a (MULW <x.Type> x y)) yes no)
+(LT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (LT (CMPW a (MULW <x.Type> x y)) yes no)
+(GE (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GE (CMPW a (MULW <x.Type> x y)) yes no)
+(GT (CMPWconst [0] z:(MSUBW a x y)) yes no) && z.Uses==1 -> (GT (CMPW a (MULW <x.Type> x y)) yes no)
+
// Absorb bit-tests into block
(Z (ANDconst [c] x) yes no) && oneBit(c) -> (TBZ {ntz(c)} x yes no)
(NZ (ANDconst [c] x) yes no) && oneBit(c) -> (TBNZ {ntz(c)} x yes no)
@@ -1058,6 +1086,17 @@
(MUL (NEG x) y) -> (MNEG x y)
(MULW (NEG x) y) -> (MNEGW x y)
+// madd/msub
+(ADD a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
+(SUB a l:(MUL x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
+(ADD a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUB a x y)
+(SUB a l:(MNEG x y)) && l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADD a x y)
+
+(ADD a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
+(SUB a l:(MULW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
+(ADD a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MSUBW a x y)
+(SUB a l:(MNEGW x y)) && l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l) -> (MADDW a x y)
+
// mul by constant
(MUL x (MOVDconst [-1])) -> (NEG x)
(MUL _ (MOVDconst [0])) -> (MOVDconst [0])
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 96f2ac3..2c434f4 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -139,6 +139,7 @@
gp1flags = regInfo{inputs: []regMask{gpg}}
gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
+ gp31 = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}}
gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
@@ -235,6 +236,10 @@
{name: "FMSUBD", argLength: 3, reg: fp31, asm: "FMSUBD"}, // +arg0 - (arg1 * arg2)
{name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS"}, // -arg0 + (arg1 * arg2)
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2)
+ {name: "MADD", argLength: 3, reg: gp31, asm: "MADD"}, // +arg0 + (arg1 * arg2)
+ {name: "MADDW", argLength: 3, reg: gp31, asm: "MADDW"}, // +arg0 + (arg1 * arg2), 32-bit
+ {name: "MSUB", argLength: 3, reg: gp31, asm: "MSUB"}, // +arg0 - (arg1 * arg2)
+ {name: "MSUBW", argLength: 3, reg: gp31, asm: "MSUBW"}, // +arg0 - (arg1 * arg2), 32-bit
// shifts
{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 1c9d263..6243bfc 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1129,6 +1129,10 @@
OpARM64FMSUBD
OpARM64FNMSUBS
OpARM64FNMSUBD
+ OpARM64MADD
+ OpARM64MADDW
+ OpARM64MSUB
+ OpARM64MSUBW
OpARM64SLL
OpARM64SLLconst
OpARM64SRL
@@ -14955,6 +14959,66 @@
},
},
{
+ name: "MADD",
+ argLen: 3,
+ asm: arm64.AMADD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "MADDW",
+ argLen: 3,
+ asm: arm64.AMADDW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "MSUB",
+ argLen: 3,
+ asm: arm64.AMSUB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
+ name: "MSUBW",
+ argLen: 3,
+ asm: arm64.AMSUBW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
+ },
+ outputs: []outputInfo{
+ {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+ },
+ },
+ },
+ {
name: "SLL",
argLen: 2,
asm: arm64.ALSL,
diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go
index fc93273..a84d1af 100644
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@@ -16,7 +16,7 @@
func rewriteValueARM64(v *Value) bool {
switch v.Op {
case OpARM64ADD:
- return rewriteValueARM64_OpARM64ADD_0(v)
+ return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v)
case OpARM64ADDconst:
return rewriteValueARM64_OpARM64ADDconst_0(v)
case OpARM64ADDshiftLL:
@@ -284,7 +284,7 @@
case OpARM64STP:
return rewriteValueARM64_OpARM64STP_0(v)
case OpARM64SUB:
- return rewriteValueARM64_OpARM64SUB_0(v)
+ return rewriteValueARM64_OpARM64SUB_0(v) || rewriteValueARM64_OpARM64SUB_10(v)
case OpARM64SUBconst:
return rewriteValueARM64_OpARM64SUBconst_0(v)
case OpARM64SUBshiftLL:
@@ -905,6 +905,185 @@
v.AddArg(x)
return true
}
+ // match: (ADD a l:(MUL x y))
+ // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MADD a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MUL {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MADD)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD l:(MUL x y) a)
+ // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MADD a x y)
+ for {
+ _ = v.Args[1]
+ l := v.Args[0]
+ if l.Op != OpARM64MUL {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ a := v.Args[1]
+ if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MADD)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD a l:(MNEG x y))
+ // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MSUB a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MNEG {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MSUB)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD l:(MNEG x y) a)
+ // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MSUB a x y)
+ for {
+ _ = v.Args[1]
+ l := v.Args[0]
+ if l.Op != OpARM64MNEG {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ a := v.Args[1]
+ if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MSUB)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD a l:(MULW x y))
+ // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MADDW a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MULW {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MADDW)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD l:(MULW x y) a)
+ // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MADDW a x y)
+ for {
+ _ = v.Args[1]
+ l := v.Args[0]
+ if l.Op != OpARM64MULW {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ a := v.Args[1]
+ if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MADDW)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD a l:(MNEGW x y))
+ // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MSUBW a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MNEGW {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MSUBW)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (ADD l:(MNEGW x y) a)
+ // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MSUBW a x y)
+ for {
+ _ = v.Args[1]
+ l := v.Args[0]
+ if l.Op != OpARM64MNEGW {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ a := v.Args[1]
+ if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MSUBW)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ return false
+}
+func rewriteValueARM64_OpARM64ADD_10(v *Value) bool {
// match: (ADD x (NEG y))
// cond:
// result: (SUB x y)
@@ -24624,6 +24803,94 @@
v.AddArg(x)
return true
}
+ // match: (SUB a l:(MUL x y))
+ // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MSUB a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MUL {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MSUB)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (SUB a l:(MNEG x y))
+ // cond: l.Uses==1 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MADD a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MNEG {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MADD)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (SUB a l:(MULW x y))
+ // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MSUBW a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MULW {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MSUBW)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ // match: (SUB a l:(MNEGW x y))
+ // cond: l.Uses==1 && a.Type.Size() != 8 && x.Op!=OpARM64MOVDconst && y.Op!=OpARM64MOVDconst && a.Op!=OpARM64MOVDconst && clobber(l)
+ // result: (MADDW a x y)
+ for {
+ _ = v.Args[1]
+ a := v.Args[0]
+ l := v.Args[1]
+ if l.Op != OpARM64MNEGW {
+ break
+ }
+ _ = l.Args[1]
+ x := l.Args[0]
+ y := l.Args[1]
+ if !(l.Uses == 1 && a.Type.Size() != 8 && x.Op != OpARM64MOVDconst && y.Op != OpARM64MOVDconst && a.Op != OpARM64MOVDconst && clobber(l)) {
+ break
+ }
+ v.reset(OpARM64MADDW)
+ v.AddArg(a)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
// match: (SUB x x)
// cond:
// result: (MOVDconst [0])
@@ -24721,6 +24988,9 @@
v.AddArg(y)
return true
}
+ return false
+}
+func rewriteValueARM64_OpARM64SUB_10(v *Value) bool {
// match: (SUB x0 x1:(SRAconst [c] y))
// cond: clobberIfDead(x1)
// result: (SUBshiftRA x0 y [c])
@@ -32608,6 +32878,138 @@
b.Aux = nil
return true
}
+ // match: (EQ (CMPconst [0] z:(MADD a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (EQ (CMN a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADD {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64EQ
+ v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (EQ (CMPconst [0] z:(MSUB a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (EQ (CMP a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUB {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64EQ
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (EQ (CMPWconst [0] z:(MADDW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (EQ (CMNW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADDW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64EQ
+ v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (EQ (CMPWconst [0] z:(MSUBW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (EQ (CMPW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUBW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64EQ
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
// match: (EQ (TSTconst [c] x) yes no)
// cond: oneBit(c)
// result: (TBZ {ntz(c)} x yes no)
@@ -32784,6 +33186,138 @@
b.Aux = nil
return true
}
+ // match: (GE (CMPconst [0] z:(MADD a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GE (CMN a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADD {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GE
+ v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (GE (CMPconst [0] z:(MSUB a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GE (CMP a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUB {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GE
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (GE (CMPWconst [0] z:(MADDW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GE (CMNW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADDW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GE
+ v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (GE (CMPWconst [0] z:(MSUBW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GE (CMPW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUBW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GE
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
// match: (GE (CMPWconst [0] x) yes no)
// cond:
// result: (TBZ {int64(31)} x yes no)
@@ -32956,6 +33490,138 @@
b.Aux = nil
return true
}
+ // match: (GT (CMPconst [0] z:(MADD a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GT (CMN a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADD {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GT
+ v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (GT (CMPconst [0] z:(MSUB a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GT (CMP a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUB {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GT
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (GT (CMPWconst [0] z:(MADDW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GT (CMNW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADDW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GT
+ v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (GT (CMPWconst [0] z:(MSUBW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (GT (CMPW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUBW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64GT
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
// match: (GT (FlagEQ) yes no)
// cond:
// result: (First nil no yes)
@@ -33248,6 +33914,138 @@
b.Aux = nil
return true
}
+ // match: (LE (CMPconst [0] z:(MADD a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LE (CMN a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADD {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LE
+ v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (LE (CMPconst [0] z:(MSUB a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LE (CMP a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUB {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LE
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (LE (CMPWconst [0] z:(MADDW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LE (CMNW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADDW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LE
+ v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (LE (CMPWconst [0] z:(MSUBW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LE (CMPW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUBW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LE
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
// match: (LE (FlagEQ) yes no)
// cond:
// result: (First nil yes no)
@@ -33386,6 +34184,138 @@
b.Aux = nil
return true
}
+ // match: (LT (CMPconst [0] z:(MADD a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LT (CMN a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADD {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LT
+ v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (LT (CMPconst [0] z:(MSUB a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LT (CMP a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUB {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LT
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (LT (CMPWconst [0] z:(MADDW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LT (CMNW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADDW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LT
+ v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (LT (CMPWconst [0] z:(MSUBW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (LT (CMPW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUBW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64LT
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
// match: (LT (CMPWconst [0] x) yes no)
// cond:
// result: (TBNZ {int64(31)} x yes no)
@@ -33706,6 +34636,138 @@
b.Aux = nil
return true
}
+ // match: (NE (CMPconst [0] z:(MADD a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (NE (CMN a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADD {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64NE
+ v0 := b.NewValue0(v.Pos, OpARM64CMN, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (NE (CMPconst [0] z:(MSUB a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (NE (CMP a (MUL <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUB {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64NE
+ v0 := b.NewValue0(v.Pos, OpARM64CMP, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MUL, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (NE (CMPWconst [0] z:(MADDW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (NE (CMNW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MADDW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64NE
+ v0 := b.NewValue0(v.Pos, OpARM64CMNW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
+ // match: (NE (CMPWconst [0] z:(MSUBW a x y)) yes no)
+ // cond: z.Uses==1
+ // result: (NE (CMPW a (MULW <x.Type> x y)) yes no)
+ for {
+ v := b.Control
+ if v.Op != OpARM64CMPWconst {
+ break
+ }
+ if v.AuxInt != 0 {
+ break
+ }
+ z := v.Args[0]
+ if z.Op != OpARM64MSUBW {
+ break
+ }
+ _ = z.Args[2]
+ a := z.Args[0]
+ x := z.Args[1]
+ y := z.Args[2]
+ if !(z.Uses == 1) {
+ break
+ }
+ b.Kind = BlockARM64NE
+ v0 := b.NewValue0(v.Pos, OpARM64CMPW, types.TypeFlags)
+ v0.AddArg(a)
+ v1 := b.NewValue0(v.Pos, OpARM64MULW, x.Type)
+ v1.AddArg(x)
+ v1.AddArg(y)
+ v0.AddArg(v1)
+ b.SetControl(v0)
+ b.Aux = nil
+ return true
+ }
// match: (NE (TSTconst [c] x) yes no)
// cond: oneBit(c)
// result: (TBNZ {ntz(c)} x yes no)
diff --git a/test/codegen/arithmetic.go b/test/codegen/arithmetic.go
index 09a2fa0..c053925 100644
--- a/test/codegen/arithmetic.go
+++ b/test/codegen/arithmetic.go
@@ -205,3 +205,9 @@
// amd64:"LEAQ\t1"
return 2*x + 1
}
+
+func MULA(a, b, c uint32) uint32 {
+ // arm:`MULA`
+ // arm64:`MADDW`
+ return a*b + c
+}