cmd/compile: optimize AMD64's bit wise operation
Currently "arr[idx] |= 0x80" is compiled to MOVLload->BTSL->MOVLstore.
And this CL optimizes it to a single BTSLconstmodify. Other bit wise
operations with a direct memory operand are also implemented.
1. The size of the executable bin/go decreases about 4KB, and the total size
of pkg/linux_amd64 (excluding cmd/compile) decreases about 0.6KB.
2. There a little improvement in the go1 benchmark test (excluding noise).
name old time/op new time/op delta
BinaryTree17-4 2.66s ± 4% 2.66s ± 3% ~ (p=0.596 n=49+49)
Fannkuch11-4 2.38s ± 2% 2.32s ± 2% -2.69% (p=0.000 n=50+50)
FmtFprintfEmpty-4 42.7ns ± 4% 43.2ns ± 7% +1.31% (p=0.009 n=50+50)
FmtFprintfString-4 71.0ns ± 5% 72.0ns ± 3% +1.33% (p=0.000 n=50+50)
FmtFprintfInt-4 80.7ns ± 4% 80.6ns ± 3% ~ (p=0.931 n=50+50)
FmtFprintfIntInt-4 125ns ± 3% 126ns ± 4% ~ (p=0.051 n=50+50)
FmtFprintfPrefixedInt-4 158ns ± 1% 142ns ± 3% -9.84% (p=0.000 n=36+50)
FmtFprintfFloat-4 215ns ± 4% 212ns ± 4% -1.23% (p=0.002 n=50+50)
FmtManyArgs-4 519ns ± 3% 510ns ± 3% -1.77% (p=0.000 n=50+50)
GobDecode-4 6.49ms ± 6% 6.52ms ± 5% ~ (p=0.866 n=50+50)
GobEncode-4 5.93ms ± 8% 6.01ms ± 7% ~ (p=0.076 n=50+50)
Gzip-4 222ms ± 4% 224ms ± 8% +0.80% (p=0.001 n=50+50)
Gunzip-4 36.6ms ± 5% 36.4ms ± 4% ~ (p=0.093 n=50+50)
HTTPClientServer-4 59.1µs ± 1% 58.9µs ± 2% -0.24% (p=0.039 n=49+48)
JSONEncode-4 9.23ms ± 4% 9.21ms ± 5% ~ (p=0.244 n=50+50)
JSONDecode-4 48.8ms ± 4% 48.7ms ± 4% ~ (p=0.653 n=50+50)
Mandelbrot200-4 3.81ms ± 4% 3.80ms ± 3% ~ (p=0.834 n=50+50)
GoParse-4 3.20ms ± 5% 3.19ms ± 5% ~ (p=0.494 n=50+50)
RegexpMatchEasy0_32-4 78.1ns ± 2% 77.4ns ± 3% -0.86% (p=0.005 n=50+50)
RegexpMatchEasy0_1K-4 233ns ± 3% 233ns ± 3% ~ (p=0.074 n=50+50)
RegexpMatchEasy1_32-4 74.2ns ± 3% 73.4ns ± 3% -1.06% (p=0.000 n=50+50)
RegexpMatchEasy1_1K-4 369ns ± 2% 364ns ± 4% -1.41% (p=0.000 n=36+50)
RegexpMatchMedium_32-4 109ns ± 4% 107ns ± 3% -2.06% (p=0.001 n=50+50)
RegexpMatchMedium_1K-4 31.5µs ± 3% 30.8µs ± 3% -2.20% (p=0.000 n=50+50)
RegexpMatchHard_32-4 1.57µs ± 3% 1.56µs ± 2% -0.57% (p=0.016 n=50+50)
RegexpMatchHard_1K-4 47.4µs ± 4% 47.0µs ± 3% -0.82% (p=0.008 n=50+50)
Revcomp-4 414ms ± 7% 412ms ± 7% ~ (p=0.285 n=50+50)
Template-4 64.3ms ± 4% 62.7ms ± 3% -2.44% (p=0.000 n=50+50)
TimeParse-4 316ns ± 3% 313ns ± 3% ~ (p=0.122 n=50+50)
TimeFormat-4 291ns ± 3% 293ns ± 3% +0.80% (p=0.001 n=50+50)
[Geo mean] 46.5µs 46.2µs -0.81%
name old speed new speed delta
GobDecode-4 118MB/s ± 6% 118MB/s ± 5% ~ (p=0.863 n=50+50)
GobEncode-4 130MB/s ± 9% 128MB/s ± 8% ~ (p=0.076 n=50+50)
Gzip-4 87.4MB/s ± 4% 86.8MB/s ± 7% -0.78% (p=0.002 n=50+50)
Gunzip-4 531MB/s ± 5% 533MB/s ± 4% ~ (p=0.093 n=50+50)
JSONEncode-4 210MB/s ± 4% 211MB/s ± 5% ~ (p=0.247 n=50+50)
JSONDecode-4 39.8MB/s ± 4% 39.9MB/s ± 4% ~ (p=0.654 n=50+50)
GoParse-4 18.1MB/s ± 5% 18.2MB/s ± 5% ~ (p=0.493 n=50+50)
RegexpMatchEasy0_32-4 410MB/s ± 2% 413MB/s ± 3% +0.86% (p=0.004 n=50+50)
RegexpMatchEasy0_1K-4 4.39GB/s ± 3% 4.38GB/s ± 3% ~ (p=0.063 n=50+50)
RegexpMatchEasy1_32-4 432MB/s ± 3% 436MB/s ± 3% +1.07% (p=0.000 n=50+50)
RegexpMatchEasy1_1K-4 2.77GB/s ± 2% 2.81GB/s ± 4% +1.46% (p=0.000 n=36+50)
RegexpMatchMedium_32-4 9.16MB/s ± 3% 9.35MB/s ± 4% +2.09% (p=0.001 n=50+50)
RegexpMatchMedium_1K-4 32.5MB/s ± 3% 33.2MB/s ± 3% +2.25% (p=0.000 n=50+50)
RegexpMatchHard_32-4 20.4MB/s ± 3% 20.5MB/s ± 2% +0.56% (p=0.017 n=50+50)
RegexpMatchHard_1K-4 21.6MB/s ± 4% 21.8MB/s ± 3% +0.83% (p=0.008 n=50+50)
Revcomp-4 613MB/s ± 4% 618MB/s ± 7% ~ (p=0.152 n=48+50)
Template-4 30.2MB/s ± 4% 30.9MB/s ± 3% +2.49% (p=0.000 n=50+50)
[Geo mean] 127MB/s 128MB/s +0.64%
Change-Id: If405198283855d75697f66cf894b2bef458f620e
Reviewed-on: https://go-review.googlesource.com/135422
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 818bc35..b4c4b1f 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -695,6 +695,7 @@
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
+ ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
p := s.Prog(v.Op.Asm())
@@ -763,7 +764,8 @@
}
fallthrough
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
- ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
+ ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify,
+ ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
sc := v.AuxValAndOff()
off := sc.Off()
val := sc.Val()
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 803b889..76a4fc9 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -709,7 +709,17 @@
(ANDL x (MOVLconst [c])) -> (ANDLconst [c] x)
(AND(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [c & d] x)
+(BTR(L|Q)const [c] (AND(L|Q)const [d] x)) -> (AND(L|Q)const [d &^ 1<<uint32(c)] x)
+(AND(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [c &^ 1<<uint32(d)] x)
+(BTR(L|Q)const [c] (BTR(L|Q)const [d] x)) -> (AND(L|Q)const [^(1<<uint32(c) | 1<<uint32(d))] x)
(XOR(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ d] x)
+(BTC(L|Q)const [c] (XOR(L|Q)const [d] x)) -> (XOR(L|Q)const [d ^ 1<<uint32(c)] x)
+(XOR(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [c ^ 1<<uint32(d)] x)
+(BTC(L|Q)const [c] (BTC(L|Q)const [d] x)) -> (XOR(L|Q)const [1<<uint32(c) ^ 1<<uint32(d)] x)
+(OR(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [c | d] x)
+(OR(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [c | 1<<uint32(d)] x)
+(BTS(L|Q)const [c] (OR(L|Q)const [d] x)) -> (OR(L|Q)const [d | 1<<uint32(c)] x)
+(BTS(L|Q)const [c] (BTS(L|Q)const [d] x)) -> (OR(L|Q)const [1<<uint32(d) | 1<<uint32(c)] x)
(MULLconst [c] (MULLconst [d] x)) -> (MULLconst [int64(int32(c * d))] x)
(MULQconst [c] (MULQconst [d] x)) && is32Bit(c*d) -> (MULQconst [c * d] x)
@@ -1051,14 +1061,14 @@
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
-((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
- ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
- ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
-((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
- ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem)
-((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
- ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem)
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(off1+off2) ->
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem)
// Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validValAndOff(c,off) ->
@@ -1106,18 +1116,18 @@
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
-((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
- ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
-((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) ->
- ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
-((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
-((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
- ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// generating indexed loads and stores
(MOV(B|W|L|Q|SS|SD)load [off1] {sym1} (LEAQ1 [off2] {sym2} ptr idx) mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
@@ -1424,6 +1434,12 @@
(XORLconst [c] (MOVLconst [d])) -> (MOVLconst [c^d])
(NOTQ (MOVQconst [c])) -> (MOVQconst [^c])
(NOTL (MOVLconst [c])) -> (MOVLconst [^c])
+(BTSQconst [c] (MOVQconst [d])) -> (MOVQconst [d|(1<<uint32(c))])
+(BTSLconst [c] (MOVLconst [d])) -> (MOVLconst [d|(1<<uint32(c))])
+(BTRQconst [c] (MOVQconst [d])) -> (MOVQconst [d&^(1<<uint32(c))])
+(BTRLconst [c] (MOVLconst [d])) -> (MOVLconst [d&^(1<<uint32(c))])
+(BTCQconst [c] (MOVQconst [d])) -> (MOVQconst [d^(1<<uint32(c))])
+(BTCLconst [c] (MOVLconst [d])) -> (MOVLconst [d^(1<<uint32(c))])
// generic simplifications
// TODO: more of this
@@ -2304,11 +2320,11 @@
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
-(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
- ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
+(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
-(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
- ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
+(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem)
// Merge ADDQconst and LEAQ into atomic loads.
(MOVQatomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(off1+off2) ->
@@ -2392,12 +2408,12 @@
(MOVWQZX (MOVBQZX x)) -> (MOVBQZX x)
(MOVBQZX (MOVBQZX x)) -> (MOVBQZX x)
-(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
- && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
- ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
-(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
- && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) ->
- ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+ && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) ->
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+ && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a) ->
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
// float <-> int register moves, with no conversion.
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 68faceb..017c070 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -289,6 +289,20 @@
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
+ // direct bit operation on memory operand
+ {name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
+ {name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
+ {name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
+ {name: "BTSLmodify", argLength: 3, reg: gpstore, asm: "BTSL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
+ {name: "BTRQmodify", argLength: 3, reg: gpstore, asm: "BTRQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
+ {name: "BTRLmodify", argLength: 3, reg: gpstore, asm: "BTRL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
+ {name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+ {name: "BTCLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+ {name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+ {name: "BTSLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+ {name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+ {name: "BTRLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
+
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 77b9875..fe63633 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -550,6 +550,18 @@
OpAMD64BTRQconst
OpAMD64BTSLconst
OpAMD64BTSQconst
+ OpAMD64BTCQmodify
+ OpAMD64BTCLmodify
+ OpAMD64BTSQmodify
+ OpAMD64BTSLmodify
+ OpAMD64BTRQmodify
+ OpAMD64BTRLmodify
+ OpAMD64BTCQconstmodify
+ OpAMD64BTCLconstmodify
+ OpAMD64BTSQconstmodify
+ OpAMD64BTSLconstmodify
+ OpAMD64BTRQconstmodify
+ OpAMD64BTRLconstmodify
OpAMD64TESTQ
OpAMD64TESTL
OpAMD64TESTW
@@ -6902,6 +6914,180 @@
},
},
{
+ name: "BTCQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTCQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTCLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTCL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTSQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTSQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTSLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTSL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTRQmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTRQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTRLmodify",
+ auxType: auxSymOff,
+ argLen: 3,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTRL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTCQconstmodify",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTCQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTCLconstmodify",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTCL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTSQconstmodify",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTSQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTSLconstmodify",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTSL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTRQconstmodify",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTRQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
+ name: "BTRLconstmodify",
+ auxType: auxSymValAndOff,
+ argLen: 2,
+ clobberFlags: true,
+ faultOnNilArg0: true,
+ symEffect: SymRead | SymWrite,
+ asm: x86.ABTRL,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15 SB
+ },
+ },
+ },
+ {
name: "TESTQ",
argLen: 2,
commutative: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 98b36a9..cd82a56 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -65,18 +65,46 @@
return rewriteValueAMD64_OpAMD64ANDQmodify_0(v)
case OpAMD64BSFQ:
return rewriteValueAMD64_OpAMD64BSFQ_0(v)
+ case OpAMD64BTCLconst:
+ return rewriteValueAMD64_OpAMD64BTCLconst_0(v)
+ case OpAMD64BTCLconstmodify:
+ return rewriteValueAMD64_OpAMD64BTCLconstmodify_0(v)
+ case OpAMD64BTCLmodify:
+ return rewriteValueAMD64_OpAMD64BTCLmodify_0(v)
+ case OpAMD64BTCQconst:
+ return rewriteValueAMD64_OpAMD64BTCQconst_0(v)
+ case OpAMD64BTCQconstmodify:
+ return rewriteValueAMD64_OpAMD64BTCQconstmodify_0(v)
+ case OpAMD64BTCQmodify:
+ return rewriteValueAMD64_OpAMD64BTCQmodify_0(v)
case OpAMD64BTLconst:
return rewriteValueAMD64_OpAMD64BTLconst_0(v)
case OpAMD64BTQconst:
return rewriteValueAMD64_OpAMD64BTQconst_0(v)
case OpAMD64BTRLconst:
return rewriteValueAMD64_OpAMD64BTRLconst_0(v)
+ case OpAMD64BTRLconstmodify:
+ return rewriteValueAMD64_OpAMD64BTRLconstmodify_0(v)
+ case OpAMD64BTRLmodify:
+ return rewriteValueAMD64_OpAMD64BTRLmodify_0(v)
case OpAMD64BTRQconst:
return rewriteValueAMD64_OpAMD64BTRQconst_0(v)
+ case OpAMD64BTRQconstmodify:
+ return rewriteValueAMD64_OpAMD64BTRQconstmodify_0(v)
+ case OpAMD64BTRQmodify:
+ return rewriteValueAMD64_OpAMD64BTRQmodify_0(v)
case OpAMD64BTSLconst:
return rewriteValueAMD64_OpAMD64BTSLconst_0(v)
+ case OpAMD64BTSLconstmodify:
+ return rewriteValueAMD64_OpAMD64BTSLconstmodify_0(v)
+ case OpAMD64BTSLmodify:
+ return rewriteValueAMD64_OpAMD64BTSLmodify_0(v)
case OpAMD64BTSQconst:
return rewriteValueAMD64_OpAMD64BTSQconst_0(v)
+ case OpAMD64BTSQconstmodify:
+ return rewriteValueAMD64_OpAMD64BTSQconstmodify_0(v)
+ case OpAMD64BTSQmodify:
+ return rewriteValueAMD64_OpAMD64BTSQmodify_0(v)
case OpAMD64CMOVLCC:
return rewriteValueAMD64_OpAMD64CMOVLCC_0(v)
case OpAMD64CMOVLCS:
@@ -278,7 +306,7 @@
case OpAMD64MOVQloadidx8:
return rewriteValueAMD64_OpAMD64MOVQloadidx8_0(v)
case OpAMD64MOVQstore:
- return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v)
+ return rewriteValueAMD64_OpAMD64MOVQstore_0(v) || rewriteValueAMD64_OpAMD64MOVQstore_10(v) || rewriteValueAMD64_OpAMD64MOVQstore_20(v) || rewriteValueAMD64_OpAMD64MOVQstore_30(v)
case OpAMD64MOVQstoreconst:
return rewriteValueAMD64_OpAMD64MOVQstoreconst_0(v)
case OpAMD64MOVQstoreconstidx1:
@@ -3590,6 +3618,22 @@
v.AddArg(x)
return true
}
+ // match: (ANDLconst [c] (BTRLconst [d] x))
+ // cond:
+ // result: (ANDLconst [c &^ 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTRLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ANDLconst)
+ v.AuxInt = c &^ 1 << uint32(d)
+ v.AddArg(x)
+ return true
+ }
// match: (ANDLconst [ 0xFF] x)
// cond:
// result: (MOVBQZX x)
@@ -4101,6 +4145,22 @@
v.AddArg(x)
return true
}
+ // match: (ANDQconst [c] (BTRQconst [d] x))
+ // cond:
+ // result: (ANDQconst [c &^ 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTRQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ANDQconst)
+ v.AuxInt = c &^ 1 << uint32(d)
+ v.AddArg(x)
+ return true
+ }
// match: (ANDQconst [ 0xFF] x)
// cond:
// result: (MOVBQZX x)
@@ -4429,6 +4489,320 @@
}
return false
}
+func rewriteValueAMD64_OpAMD64BTCLconst_0(v *Value) bool {
+ // match: (BTCLconst [c] (XORLconst [d] x))
+ // cond:
+ // result: (XORLconst [d ^ 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64XORLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64XORLconst)
+ v.AuxInt = d ^ 1<<uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTCLconst [c] (BTCLconst [d] x))
+ // cond:
+ // result: (XORLconst [1<<uint32(c) ^ 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTCLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64XORLconst)
+ v.AuxInt = 1<<uint32(c) ^ 1<<uint32(d)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTCLconst [c] (MOVLconst [d]))
+ // cond:
+ // result: (MOVLconst [d^(1<<uint32(c))])
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64MOVLconst {
+ break
+ }
+ d := v_0.AuxInt
+ v.reset(OpAMD64MOVLconst)
+ v.AuxInt = d ^ (1 << uint32(c))
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTCLconstmodify_0(v *Value) bool {
+ // match: (BTCLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2)
+ // result: (BTCLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2)) {
+ break
+ }
+ v.reset(OpAMD64BTCLconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTCLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+ // result: (BTCLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTCLconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTCLmodify_0(v *Value) bool {
+ // match: (BTCLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (BTCLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64BTCLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTCLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (BTCLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTCLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTCQconst_0(v *Value) bool {
+ // match: (BTCQconst [c] (XORQconst [d] x))
+ // cond:
+ // result: (XORQconst [d ^ 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64XORQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64XORQconst)
+ v.AuxInt = d ^ 1<<uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTCQconst [c] (BTCQconst [d] x))
+ // cond:
+ // result: (XORQconst [1<<uint32(c) ^ 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTCQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64XORQconst)
+ v.AuxInt = 1<<uint32(c) ^ 1<<uint32(d)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTCQconst [c] (MOVQconst [d]))
+ // cond:
+ // result: (MOVQconst [d^(1<<uint32(c))])
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64MOVQconst {
+ break
+ }
+ d := v_0.AuxInt
+ v.reset(OpAMD64MOVQconst)
+ v.AuxInt = d ^ (1 << uint32(c))
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTCQconstmodify_0(v *Value) bool {
+ // match: (BTCQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2)
+ // result: (BTCQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2)) {
+ break
+ }
+ v.reset(OpAMD64BTCQconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTCQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+ // result: (BTCQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTCQconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTCQmodify_0(v *Value) bool {
+ // match: (BTCQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (BTCQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64BTCQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTCQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (BTCQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTCQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64BTLconst_0(v *Value) bool {
// match: (BTLconst [c] (SHRQconst [d] x))
// cond: (c+d)<64
@@ -4643,6 +5017,160 @@
v.AddArg(x)
return true
}
+ // match: (BTRLconst [c] (ANDLconst [d] x))
+ // cond:
+ // result: (ANDLconst [d &^ 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ANDLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ANDLconst)
+ v.AuxInt = d &^ 1 << uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTRLconst [c] (BTRLconst [d] x))
+ // cond:
+ // result: (ANDLconst [^(1<<uint32(c) | 1<<uint32(d))] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTRLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ANDLconst)
+ v.AuxInt = ^(1<<uint32(c) | 1<<uint32(d))
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTRLconst [c] (MOVLconst [d]))
+ // cond:
+ // result: (MOVLconst [d&^(1<<uint32(c))])
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64MOVLconst {
+ break
+ }
+ d := v_0.AuxInt
+ v.reset(OpAMD64MOVLconst)
+ v.AuxInt = d &^ (1 << uint32(c))
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTRLconstmodify_0(v *Value) bool {
+ // match: (BTRLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2)
+ // result: (BTRLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2)) {
+ break
+ }
+ v.reset(OpAMD64BTRLconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTRLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+ // result: (BTRLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTRLconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTRLmodify_0(v *Value) bool {
+ // match: (BTRLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (BTRLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64BTRLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTRLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (BTRLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTRLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64BTRQconst_0(v *Value) bool {
@@ -4682,6 +5210,160 @@
v.AddArg(x)
return true
}
+ // match: (BTRQconst [c] (ANDQconst [d] x))
+ // cond:
+ // result: (ANDQconst [d &^ 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ANDQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ANDQconst)
+ v.AuxInt = d &^ 1 << uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTRQconst [c] (BTRQconst [d] x))
+ // cond:
+ // result: (ANDQconst [^(1<<uint32(c) | 1<<uint32(d))] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTRQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ANDQconst)
+ v.AuxInt = ^(1<<uint32(c) | 1<<uint32(d))
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTRQconst [c] (MOVQconst [d]))
+ // cond:
+ // result: (MOVQconst [d&^(1<<uint32(c))])
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64MOVQconst {
+ break
+ }
+ d := v_0.AuxInt
+ v.reset(OpAMD64MOVQconst)
+ v.AuxInt = d &^ (1 << uint32(c))
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTRQconstmodify_0(v *Value) bool {
+ // match: (BTRQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2)
+ // result: (BTRQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2)) {
+ break
+ }
+ v.reset(OpAMD64BTRQconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTRQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+ // result: (BTRQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTRQconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTRQmodify_0(v *Value) bool {
+ // match: (BTRQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (BTRQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64BTRQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTRQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (BTRQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTRQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64BTSLconst_0(v *Value) bool {
@@ -4721,6 +5403,160 @@
v.AddArg(x)
return true
}
+ // match: (BTSLconst [c] (ORLconst [d] x))
+ // cond:
+ // result: (ORLconst [d | 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ORLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORLconst)
+ v.AuxInt = d | 1<<uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTSLconst [c] (BTSLconst [d] x))
+ // cond:
+ // result: (ORLconst [1<<uint32(d) | 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTSLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORLconst)
+ v.AuxInt = 1<<uint32(d) | 1<<uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTSLconst [c] (MOVLconst [d]))
+ // cond:
+ // result: (MOVLconst [d|(1<<uint32(c))])
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64MOVLconst {
+ break
+ }
+ d := v_0.AuxInt
+ v.reset(OpAMD64MOVLconst)
+ v.AuxInt = d | (1 << uint32(c))
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTSLconstmodify_0(v *Value) bool {
+ // match: (BTSLconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2)
+ // result: (BTSLconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2)) {
+ break
+ }
+ v.reset(OpAMD64BTSLconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTSLconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+ // result: (BTSLconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTSLconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTSLmodify_0(v *Value) bool {
+ // match: (BTSLmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (BTSLmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64BTSLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTSLmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (BTSLmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTSLmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64BTSQconst_0(v *Value) bool {
@@ -4760,6 +5596,160 @@
v.AddArg(x)
return true
}
+ // match: (BTSQconst [c] (ORQconst [d] x))
+ // cond:
+ // result: (ORQconst [d | 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ORQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORQconst)
+ v.AuxInt = d | 1<<uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTSQconst [c] (BTSQconst [d] x))
+ // cond:
+ // result: (ORQconst [1<<uint32(d) | 1<<uint32(c)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTSQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORQconst)
+ v.AuxInt = 1<<uint32(d) | 1<<uint32(c)
+ v.AddArg(x)
+ return true
+ }
+ // match: (BTSQconst [c] (MOVQconst [d]))
+ // cond:
+ // result: (MOVQconst [d|(1<<uint32(c))])
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64MOVQconst {
+ break
+ }
+ d := v_0.AuxInt
+ v.reset(OpAMD64MOVQconst)
+ v.AuxInt = d | (1 << uint32(c))
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTSQconstmodify_0(v *Value) bool {
+ // match: (BTSQconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2)
+ // result: (BTSQconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2)) {
+ break
+ }
+ v.reset(OpAMD64BTSQconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTSQconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
+ // cond: ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)
+ // result: (BTSQconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
+ for {
+ valoff1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[1]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ mem := v.Args[1]
+ if !(ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTSQconstmodify)
+ v.AuxInt = ValAndOff(valoff1).add(off2)
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64BTSQmodify_0(v *Value) bool {
+ // match: (BTSQmodify [off1] {sym} (ADDQconst [off2] base) val mem)
+ // cond: is32Bit(off1+off2)
+ // result: (BTSQmodify [off1+off2] {sym} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ADDQconst {
+ break
+ }
+ off2 := v_0.AuxInt
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1 + off2)) {
+ break
+ }
+ v.reset(OpAMD64BTSQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = sym
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (BTSQmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
+ // cond: is32Bit(off1+off2) && canMergeSym(sym1, sym2)
+ // result: (BTSQmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+ for {
+ off1 := v.AuxInt
+ sym1 := v.Aux
+ _ = v.Args[2]
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64LEAQ {
+ break
+ }
+ off2 := v_0.AuxInt
+ sym2 := v_0.Aux
+ base := v_0.Args[0]
+ val := v.Args[1]
+ mem := v.Args[2]
+ if !(is32Bit(off1+off2) && canMergeSym(sym1, sym2)) {
+ break
+ }
+ v.reset(OpAMD64BTSQmodify)
+ v.AuxInt = off1 + off2
+ v.Aux = mergeSym(sym1, sym2)
+ v.AddArg(base)
+ v.AddArg(val)
+ v.AddArg(mem)
+ return true
+ }
return false
}
func rewriteValueAMD64_OpAMD64CMOVLCC_0(v *Value) bool {
@@ -15876,8 +16866,140 @@
v.AddArg(mem)
return true
}
+ // match: (MOVLstore {sym} [off] ptr y:(BTCL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (BTCLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64BTCL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64BTCLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(BTRL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (BTRLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64BTRL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64BTRLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore {sym} [off] ptr y:(BTSL l:(MOVLload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (BTSLmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64BTSL {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64BTSLmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool {
// match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (ADDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -15905,7 +17027,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64ADDLconstmodify)
@@ -15916,7 +17038,7 @@
return true
}
// match: (MOVLstore [off] {sym} ptr a:(ANDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (ANDLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -15944,7 +17066,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64ANDLconstmodify)
@@ -15955,7 +17077,7 @@
return true
}
// match: (MOVLstore [off] {sym} ptr a:(ORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (ORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -15983,7 +17105,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64ORLconstmodify)
@@ -15993,11 +17115,8 @@
v.AddArg(mem)
return true
}
- return false
-}
-func rewriteValueAMD64_OpAMD64MOVLstore_30(v *Value) bool {
// match: (MOVLstore [off] {sym} ptr a:(XORLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (XORLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -16025,7 +17144,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64XORLconstmodify)
@@ -16035,6 +17154,123 @@
v.AddArg(mem)
return true
}
+ // match: (MOVLstore [off] {sym} ptr a:(BTCLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
+ // result: (BTCLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ a := v.Args[1]
+ if a.Op != OpAMD64BTCLconst {
+ break
+ }
+ c := a.AuxInt
+ l := a.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ ptr2 := l.Args[0]
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
+ break
+ }
+ v.reset(OpAMD64BTCLconstmodify)
+ v.AuxInt = makeValAndOff(c, off)
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore [off] {sym} ptr a:(BTRLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
+ // result: (BTRLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ a := v.Args[1]
+ if a.Op != OpAMD64BTRLconst {
+ break
+ }
+ c := a.AuxInt
+ l := a.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ ptr2 := l.Args[0]
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
+ break
+ }
+ v.reset(OpAMD64BTRLconstmodify)
+ v.AuxInt = makeValAndOff(c, off)
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVLstore [off] {sym} ptr a:(BTSLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
+ // result: (BTSLconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ a := v.Args[1]
+ if a.Op != OpAMD64BTSLconst {
+ break
+ }
+ c := a.AuxInt
+ l := a.Args[0]
+ if l.Op != OpAMD64MOVLload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ ptr2 := l.Args[0]
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
+ break
+ }
+ v.reset(OpAMD64BTSLconstmodify)
+ v.AuxInt = makeValAndOff(c, off)
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
// match: (MOVLstore [off] {sym} ptr (MOVLf2i val) mem)
// cond:
// result: (MOVSSstore [off] {sym} ptr val mem)
@@ -18527,8 +19763,137 @@
v.AddArg(mem)
return true
}
+ // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (BTCQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64BTCQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64BTCQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (BTRQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64BTRQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64BTRQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem)
+ // cond: y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l)
+ // result: (BTSQmodify [off] {sym} ptr x mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ y := v.Args[1]
+ if y.Op != OpAMD64BTSQ {
+ break
+ }
+ _ = y.Args[1]
+ l := y.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ if ptr != l.Args[0] {
+ break
+ }
+ mem := l.Args[1]
+ x := y.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(y.Uses == 1 && l.Uses == 1 && clobber(y) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64BTSQmodify)
+ v.AuxInt = off
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(x)
+ v.AddArg(mem)
+ return true
+ }
// match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (ADDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -18556,7 +19921,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64ADDQconstmodify)
@@ -18567,7 +19932,7 @@
return true
}
// match: (MOVQstore [off] {sym} ptr a:(ANDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (ANDQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -18595,7 +19960,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64ANDQconstmodify)
@@ -18606,7 +19971,7 @@
return true
}
// match: (MOVQstore [off] {sym} ptr a:(ORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (ORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -18634,7 +19999,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64ORQconstmodify)
@@ -18645,7 +20010,7 @@
return true
}
// match: (MOVQstore [off] {sym} ptr a:(XORQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
- // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
// result: (XORQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
for {
off := v.AuxInt
@@ -18673,7 +20038,7 @@
if mem != v.Args[2] {
break
}
- if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off)) {
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
break
}
v.reset(OpAMD64XORQconstmodify)
@@ -18683,6 +20048,126 @@
v.AddArg(mem)
return true
}
+ // match: (MOVQstore [off] {sym} ptr a:(BTCQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
+ // result: (BTCQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ a := v.Args[1]
+ if a.Op != OpAMD64BTCQconst {
+ break
+ }
+ c := a.AuxInt
+ l := a.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ ptr2 := l.Args[0]
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
+ break
+ }
+ v.reset(OpAMD64BTCQconstmodify)
+ v.AuxInt = makeValAndOff(c, off)
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+ // match: (MOVQstore [off] {sym} ptr a:(BTRQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
+ // result: (BTRQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ a := v.Args[1]
+ if a.Op != OpAMD64BTRQconst {
+ break
+ }
+ c := a.AuxInt
+ l := a.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ ptr2 := l.Args[0]
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
+ break
+ }
+ v.reset(OpAMD64BTRQconstmodify)
+ v.AuxInt = makeValAndOff(c, off)
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpAMD64MOVQstore_30(v *Value) bool {
+ // match: (MOVQstore [off] {sym} ptr a:(BTSQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
+ // cond: isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c,off) && clobber(l) && clobber(a)
+ // result: (BTSQconstmodify {sym} [makeValAndOff(c,off)] ptr mem)
+ for {
+ off := v.AuxInt
+ sym := v.Aux
+ _ = v.Args[2]
+ ptr := v.Args[0]
+ a := v.Args[1]
+ if a.Op != OpAMD64BTSQconst {
+ break
+ }
+ c := a.AuxInt
+ l := a.Args[0]
+ if l.Op != OpAMD64MOVQload {
+ break
+ }
+ if l.AuxInt != off {
+ break
+ }
+ if l.Aux != sym {
+ break
+ }
+ _ = l.Args[1]
+ ptr2 := l.Args[0]
+ mem := l.Args[1]
+ if mem != v.Args[2] {
+ break
+ }
+ if !(isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && validValAndOff(c, off) && clobber(l) && clobber(a)) {
+ break
+ }
+ v.reset(OpAMD64BTSQconstmodify)
+ v.AuxInt = makeValAndOff(c, off)
+ v.Aux = sym
+ v.AddArg(ptr)
+ v.AddArg(mem)
+ return true
+ }
// match: (MOVQstore [off] {sym} ptr (MOVQf2i val) mem)
// cond:
// result: (MOVSDstore [off] {sym} ptr val mem)
@@ -33141,6 +34626,38 @@
v.AddArg(x)
return true
}
+ // match: (ORLconst [c] (ORLconst [d] x))
+ // cond:
+ // result: (ORLconst [c | d] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ORLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORLconst)
+ v.AuxInt = c | d
+ v.AddArg(x)
+ return true
+ }
+ // match: (ORLconst [c] (BTSLconst [d] x))
+ // cond:
+ // result: (ORLconst [c | 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTSLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORLconst)
+ v.AuxInt = c | 1<<uint32(d)
+ v.AddArg(x)
+ return true
+ }
// match: (ORLconst [c] x)
// cond: int32(c)==0
// result: x
@@ -44160,6 +45677,38 @@
v.AddArg(x)
return true
}
+ // match: (ORQconst [c] (ORQconst [d] x))
+ // cond:
+ // result: (ORQconst [c | d] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64ORQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORQconst)
+ v.AuxInt = c | d
+ v.AddArg(x)
+ return true
+ }
+ // match: (ORQconst [c] (BTSQconst [d] x))
+ // cond:
+ // result: (ORQconst [c | 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTSQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64ORQconst)
+ v.AuxInt = c | 1<<uint32(d)
+ v.AddArg(x)
+ return true
+ }
// match: (ORQconst [0] x)
// cond:
// result: x
@@ -54886,6 +56435,22 @@
v.AddArg(x)
return true
}
+ // match: (XORLconst [c] (BTCLconst [d] x))
+ // cond:
+ // result: (XORLconst [c ^ 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTCLconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64XORLconst)
+ v.AuxInt = c ^ 1<<uint32(d)
+ v.AddArg(x)
+ return true
+ }
// match: (XORLconst [c] x)
// cond: int32(c)==0
// result: x
@@ -55409,6 +56974,22 @@
v.AddArg(x)
return true
}
+ // match: (XORQconst [c] (BTCQconst [d] x))
+ // cond:
+ // result: (XORQconst [c ^ 1<<uint32(d)] x)
+ for {
+ c := v.AuxInt
+ v_0 := v.Args[0]
+ if v_0.Op != OpAMD64BTCQconst {
+ break
+ }
+ d := v_0.AuxInt
+ x := v_0.Args[0]
+ v.reset(OpAMD64XORQconst)
+ v.AuxInt = c ^ 1<<uint32(d)
+ v.AddArg(x)
+ return true
+ }
// match: (XORQconst [0] x)
// cond:
// result: x
diff --git a/test/codegen/bits.go b/test/codegen/bits.go
index e95e3f6..65d57c8 100644
--- a/test/codegen/bits.go
+++ b/test/codegen/bits.go
@@ -270,6 +270,12 @@
a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z]+\)`
a[2] ^= 240
+ // amd64:`BTRL\s[$]15,\s12\([A-Z]+\)`,-`ANDL`
+ a[3] &= 0xffff7fff
+ // amd64:`BTSL\s[$]14,\s16\([A-Z]+\)`,-`ORL`
+ a[4] |= 0x4000
+ // amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL`
+ a[5] ^= 0x2000
}
// Check AND masking on arm64 (Issue #19857)