[dev.ssa] cmd/compile: strength-reduce 64-bit constant divides
The frontend does this for 32 bits and below, but SSA needs
to do it for 64 bits. The algorithms are all copied from
cgen.go:cgen_div.
Speeds up TimeFormat substantially: ~40% slower to ~10% slower.
Change-Id: I023ea2eb6040df98ccd9105e15ca6ea695610a7a
Reviewed-on: https://go-review.googlesource.com/19302
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 465d703..15457b8 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -45,6 +45,8 @@
(Div8 x y) -> (DIVW (SignExt8to16 x) (SignExt8to16 y))
(Div8u x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))
+(Hmul64 x y) -> (HMULQ x y)
+(Hmul64u x y) -> (HMULQU x y)
(Hmul32 x y) -> (HMULL x y)
(Hmul32u x y) -> (HMULLU x y)
(Hmul16 x y) -> (HMULW x y)
@@ -52,6 +54,8 @@
(Hmul8 x y) -> (HMULB x y)
(Hmul8u x y) -> (HMULBU x y)
+(Avg64u x y) -> (AVGQU x y)
+
(Mod64 x y) -> (MODQ x y)
(Mod64u x y) -> (MODQU x y)
(Mod32 x y) -> (MODL x y)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 7fcf247..d139145 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -193,13 +193,17 @@
{name: "MULWconst", reg: gp11, asm: "IMULW", aux: "Int16"}, // arg0 * auxint
{name: "MULBconst", reg: gp11, asm: "IMULW", aux: "Int8"}, // arg0 * auxint
+ {name: "HMULQ", reg: gp11hmul, asm: "IMULQ"}, // (arg0 * arg1) >> width
{name: "HMULL", reg: gp11hmul, asm: "IMULL"}, // (arg0 * arg1) >> width
{name: "HMULW", reg: gp11hmul, asm: "IMULW"}, // (arg0 * arg1) >> width
{name: "HMULB", reg: gp11hmul, asm: "IMULB"}, // (arg0 * arg1) >> width
+ {name: "HMULQU", reg: gp11hmul, asm: "MULQ"}, // (arg0 * arg1) >> width
{name: "HMULLU", reg: gp11hmul, asm: "MULL"}, // (arg0 * arg1) >> width
{name: "HMULWU", reg: gp11hmul, asm: "MULW"}, // (arg0 * arg1) >> width
{name: "HMULBU", reg: gp11hmul, asm: "MULB"}, // (arg0 * arg1) >> width
+ {name: "AVGQU", reg: gp21}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
+
{name: "DIVQ", reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
{name: "DIVL", reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
{name: "DIVW", reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules
index 28fe9ff..2b811cc 100644
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -514,3 +514,100 @@
(Arg <t.FieldType(1)> {n} [off+t.FieldOff(1)])
(Arg <t.FieldType(2)> {n} [off+t.FieldOff(2)])
(Arg <t.FieldType(3)> {n} [off+t.FieldOff(3)]))
+
+// strength reduction of divide by a constant.
+// Note: frontend does <=32 bits. We only need to do 64 bits here.
+// TODO: Do them all here?
+
+// Div/mod by 1. Currently handled by frontend.
+//(Div64 n (Const64 [1])) -> n
+//(Div64u n (Const64 [1])) -> n
+//(Mod64 n (Const64 [1])) -> (Const64 [0])
+//(Mod64u n (Const64 [1])) -> (Const64 [0])
+
+// Unsigned divide by power of 2. Currently handled by frontend.
+//(Div64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (Rsh64Ux64 n (Const64 <t> [log2(c)]))
+//(Mod64u <t> n (Const64 [c])) && isPowerOfTwo(c) -> (And64 n (Const64 <t> [c-1]))
+
+// Signed divide by power of 2. Currently handled by frontend.
+// n / c = n >> log(c) if n >= 0
+// = (n+c-1) >> log(c) if n < 0
+// We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
+//(Div64 <t> n (Const64 [c])) && isPowerOfTwo(c) ->
+// (Rsh64x64
+// (Add64 <t>
+// n
+// (Rsh64Ux64 <t>
+// (Rsh64x64 <t> n (Const64 <t> [63]))
+// (Const64 <t> [64-log2(c)])))
+// (Const64 <t> [log2(c)]))
+
+// Unsigned divide, not a power of 2. Strength reduce to a multiply.
+(Div64u <t> x (Const64 [c])) && umagic64ok(c) && !umagic64a(c) ->
+ (Rsh64Ux64
+ (Hmul64u <t>
+ (Const64 <t> [umagic64m(c)])
+ x)
+ (Const64 <t> [umagic64s(c)]))
+(Div64u <t> x (Const64 [c])) && umagic64ok(c) && umagic64a(c) ->
+ (Rsh64Ux64
+ (Avg64u <t>
+ (Hmul64u <t>
+ x
+ (Const64 <t> [umagic64m(c)]))
+ x)
+ (Const64 <t> [umagic64s(c)-1]))
+
+// Signed divide, not a power of 2. Strength reduce to a multiply.
+(Div64 <t> x (Const64 [c])) && c > 0 && smagic64ok(c) && smagic64m(c) > 0 ->
+ (Sub64 <t>
+ (Rsh64x64 <t>
+ (Hmul64 <t>
+ (Const64 <t> [smagic64m(c)])
+ x)
+ (Const64 <t> [smagic64s(c)]))
+ (Rsh64x64 <t>
+ x
+ (Const64 <t> [63])))
+(Div64 <t> x (Const64 [c])) && c > 0 && smagic64ok(c) && smagic64m(c) < 0 ->
+ (Sub64 <t>
+ (Rsh64x64 <t>
+ (Add64 <t>
+ (Hmul64 <t>
+ (Const64 <t> [smagic64m(c)])
+ x)
+ x)
+ (Const64 <t> [smagic64s(c)]))
+ (Rsh64x64 <t>
+ x
+ (Const64 <t> [63])))
+(Div64 <t> x (Const64 [c])) && c < 0 && smagic64ok(c) && smagic64m(c) > 0 ->
+ (Neg64 <t>
+ (Sub64 <t>
+ (Rsh64x64 <t>
+ (Hmul64 <t>
+ (Const64 <t> [smagic64m(c)])
+ x)
+ (Const64 <t> [smagic64s(c)]))
+ (Rsh64x64 <t>
+ x
+ (Const64 <t> [63]))))
+(Div64 <t> x (Const64 [c])) && c < 0 && smagic64ok(c) && smagic64m(c) < 0 ->
+ (Neg64 <t>
+ (Sub64 <t>
+ (Rsh64x64 <t>
+ (Add64 <t>
+ (Hmul64 <t>
+ (Const64 <t> [smagic64m(c)])
+ x)
+ x)
+ (Const64 <t> [smagic64s(c)]))
+ (Rsh64x64 <t>
+ x
+ (Const64 <t> [63]))))
+
+// A%B = A-(A/B*B).
+// This implements % with two * and a bunch of ancillary ops.
+// One of the * is free if the user's code also computes A/B.
+(Mod64 <t> x (Const64 [c])) && smagic64ok(c) -> (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c])))
+(Mod64u <t> x (Const64 [c])) && umagic64ok(c) -> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 3c7aa84..ec74859 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -41,7 +41,11 @@
{name: "Hmul16u"},
{name: "Hmul32"},
{name: "Hmul32u"},
- // frontend currently doesn't generate a 64 bit hmul
+ {name: "Hmul64"},
+ {name: "Hmul64u"},
+
+ // Weird special instruction for strength reduction of divides.
+ {name: "Avg64u"}, // (uint64(arg0) + uint64(arg1)) / 2, correct to all 64 bits.
{name: "Div8"}, // arg0 / arg1
{name: "Div8u"},
diff --git a/src/cmd/compile/internal/ssa/magic.go b/src/cmd/compile/internal/ssa/magic.go
new file mode 100644
index 0000000..a8e84d5
--- /dev/null
+++ b/src/cmd/compile/internal/ssa/magic.go
@@ -0,0 +1,260 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+// A copy of the code in ../gc/subr.go.
+// We can't use it directly because it would generate
+// an import cycle. TODO: move to a common support package.
+
+// argument passing to/from
+// smagic and umagic
+type magic struct {
+ W int // input for both - width
+ S int // output for both - shift
+ Bad int // output for both - unexpected failure
+
+ // magic multiplier for signed literal divisors
+ Sd int64 // input - literal divisor
+ Sm int64 // output - multiplier
+
+ // magic multiplier for unsigned literal divisors
+ Ud uint64 // input - literal divisor
+ Um uint64 // output - multiplier
+ Ua int // output - adder
+}
+
+// magic number for signed division
+// see hacker's delight chapter 10
+func smagic(m *magic) {
+ var mask uint64
+
+ m.Bad = 0
+ switch m.W {
+ default:
+ m.Bad = 1
+ return
+
+ case 8:
+ mask = 0xff
+
+ case 16:
+ mask = 0xffff
+
+ case 32:
+ mask = 0xffffffff
+
+ case 64:
+ mask = 0xffffffffffffffff
+ }
+
+ two31 := mask ^ (mask >> 1)
+
+ p := m.W - 1
+ ad := uint64(m.Sd)
+ if m.Sd < 0 {
+ ad = -uint64(m.Sd)
+ }
+
+ // bad denominators
+ if ad == 0 || ad == 1 || ad == two31 {
+ m.Bad = 1
+ return
+ }
+
+ t := two31
+ ad &= mask
+
+ anc := t - 1 - t%ad
+ anc &= mask
+
+ q1 := two31 / anc
+ r1 := two31 - q1*anc
+ q1 &= mask
+ r1 &= mask
+
+ q2 := two31 / ad
+ r2 := two31 - q2*ad
+ q2 &= mask
+ r2 &= mask
+
+ var delta uint64
+ for {
+ p++
+ q1 <<= 1
+ r1 <<= 1
+ q1 &= mask
+ r1 &= mask
+ if r1 >= anc {
+ q1++
+ r1 -= anc
+ q1 &= mask
+ r1 &= mask
+ }
+
+ q2 <<= 1
+ r2 <<= 1
+ q2 &= mask
+ r2 &= mask
+ if r2 >= ad {
+ q2++
+ r2 -= ad
+ q2 &= mask
+ r2 &= mask
+ }
+
+ delta = ad - r2
+ delta &= mask
+ if q1 < delta || (q1 == delta && r1 == 0) {
+ continue
+ }
+
+ break
+ }
+
+ m.Sm = int64(q2 + 1)
+ if uint64(m.Sm)&two31 != 0 {
+ m.Sm |= ^int64(mask)
+ }
+ m.S = p - m.W
+}
+
+// magic number for unsigned division
+// see hacker's delight chapter 10
+func umagic(m *magic) {
+ var mask uint64
+
+ m.Bad = 0
+ m.Ua = 0
+
+ switch m.W {
+ default:
+ m.Bad = 1
+ return
+
+ case 8:
+ mask = 0xff
+
+ case 16:
+ mask = 0xffff
+
+ case 32:
+ mask = 0xffffffff
+
+ case 64:
+ mask = 0xffffffffffffffff
+ }
+
+ two31 := mask ^ (mask >> 1)
+
+ m.Ud &= mask
+ if m.Ud == 0 || m.Ud == two31 {
+ m.Bad = 1
+ return
+ }
+
+ nc := mask - (-m.Ud&mask)%m.Ud
+ p := m.W - 1
+
+ q1 := two31 / nc
+ r1 := two31 - q1*nc
+ q1 &= mask
+ r1 &= mask
+
+ q2 := (two31 - 1) / m.Ud
+ r2 := (two31 - 1) - q2*m.Ud
+ q2 &= mask
+ r2 &= mask
+
+ var delta uint64
+ for {
+ p++
+ if r1 >= nc-r1 {
+ q1 <<= 1
+ q1++
+ r1 <<= 1
+ r1 -= nc
+ } else {
+ q1 <<= 1
+ r1 <<= 1
+ }
+
+ q1 &= mask
+ r1 &= mask
+ if r2+1 >= m.Ud-r2 {
+ if q2 >= two31-1 {
+ m.Ua = 1
+ }
+
+ q2 <<= 1
+ q2++
+ r2 <<= 1
+ r2++
+ r2 -= m.Ud
+ } else {
+ if q2 >= two31 {
+ m.Ua = 1
+ }
+
+ q2 <<= 1
+ r2 <<= 1
+ r2++
+ }
+
+ q2 &= mask
+ r2 &= mask
+
+ delta = m.Ud - 1 - r2
+ delta &= mask
+
+ if p < m.W+m.W {
+ if q1 < delta || (q1 == delta && r1 == 0) {
+ continue
+ }
+ }
+
+ break
+ }
+
+ m.Um = q2 + 1
+ m.S = p - m.W
+}
+
+// adaptors for use by rewrite rules
+func smagic64ok(d int64) bool {
+ m := magic{W: 64, Sd: d}
+ smagic(&m)
+ return m.Bad == 0
+}
+func smagic64m(d int64) int64 {
+ m := magic{W: 64, Sd: d}
+ smagic(&m)
+ return m.Sm
+}
+func smagic64s(d int64) int64 {
+ m := magic{W: 64, Sd: d}
+ smagic(&m)
+ return int64(m.S)
+}
+
+func umagic64ok(d int64) bool {
+ m := magic{W: 64, Ud: uint64(d)}
+ umagic(&m)
+ return m.Bad == 0
+}
+func umagic64m(d int64) int64 {
+ m := magic{W: 64, Ud: uint64(d)}
+ umagic(&m)
+ return int64(m.Um)
+}
+func umagic64s(d int64) int64 {
+ m := magic{W: 64, Ud: uint64(d)}
+ umagic(&m)
+ return int64(m.S)
+}
+func umagic64a(d int64) bool {
+ m := magic{W: 64, Ud: uint64(d)}
+ umagic(&m)
+ return m.Ua != 0
+}
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 8ce9c82..dfd9df8 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -109,12 +109,15 @@
OpAMD64MULLconst
OpAMD64MULWconst
OpAMD64MULBconst
+ OpAMD64HMULQ
OpAMD64HMULL
OpAMD64HMULW
OpAMD64HMULB
+ OpAMD64HMULQU
OpAMD64HMULLU
OpAMD64HMULWU
OpAMD64HMULBU
+ OpAMD64AVGQU
OpAMD64DIVQ
OpAMD64DIVL
OpAMD64DIVW
@@ -331,6 +334,9 @@
OpHmul16u
OpHmul32
OpHmul32u
+ OpHmul64
+ OpHmul64u
+ OpAvg64u
OpDiv8
OpDiv8u
OpDiv16
@@ -1145,6 +1151,20 @@
},
},
{
+ name: "HMULQ",
+ asm: x86.AIMULQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1}, // .AX
+ {1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+ },
+ clobbers: 8589934593, // .AX .FLAGS
+ outputs: []regMask{
+ 4, // .DX
+ },
+ },
+ },
+ {
name: "HMULL",
asm: x86.AIMULL,
reg: regInfo{
@@ -1187,6 +1207,20 @@
},
},
{
+ name: "HMULQU",
+ asm: x86.AMULQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 1}, // .AX
+ {1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+ },
+ clobbers: 8589934593, // .AX .FLAGS
+ outputs: []regMask{
+ 4, // .DX
+ },
+ },
+ },
+ {
name: "HMULLU",
asm: x86.AMULL,
reg: regInfo{
@@ -1229,6 +1263,19 @@
},
},
{
+ name: "AVGQU",
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+ {1, 65535}, // .AX .CX .DX .BX .SP .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+ },
+ clobbers: 8589934592, // .FLAGS
+ outputs: []regMask{
+ 65519, // .AX .CX .DX .BX .BP .SI .DI .R8 .R9 .R10 .R11 .R12 .R13 .R14 .R15
+ },
+ },
+ },
+ {
name: "DIVQ",
asm: x86.AIDIVQ,
reg: regInfo{
@@ -3662,6 +3709,18 @@
generic: true,
},
{
+ name: "Hmul64",
+ generic: true,
+ },
+ {
+ name: "Hmul64u",
+ generic: true,
+ },
+ {
+ name: "Avg64u",
+ generic: true,
+ },
+ {
name: "Div8",
generic: true,
},
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index a559344..601e9b8 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -63,6 +63,8 @@
return rewriteValueAMD64_OpAnd64(v, config)
case OpAnd8:
return rewriteValueAMD64_OpAnd8(v, config)
+ case OpAvg64u:
+ return rewriteValueAMD64_OpAvg64u(v, config)
case OpAMD64CMPB:
return rewriteValueAMD64_OpAMD64CMPB(v, config)
case OpAMD64CMPBconst:
@@ -217,6 +219,10 @@
return rewriteValueAMD64_OpHmul32(v, config)
case OpHmul32u:
return rewriteValueAMD64_OpHmul32u(v, config)
+ case OpHmul64:
+ return rewriteValueAMD64_OpHmul64(v, config)
+ case OpHmul64u:
+ return rewriteValueAMD64_OpHmul64u(v, config)
case OpHmul8:
return rewriteValueAMD64_OpHmul8(v, config)
case OpHmul8u:
@@ -1972,6 +1978,22 @@
}
return false
}
+func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Avg64u x y)
+ // cond:
+ // result: (AVGQU x y)
+ for {
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpAMD64AVGQU)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -3755,6 +3777,38 @@
}
return false
}
+func rewriteValueAMD64_OpHmul64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Hmul64 x y)
+ // cond:
+ // result: (HMULQ x y)
+ for {
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpAMD64HMULQ)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ return false
+}
+func rewriteValueAMD64_OpHmul64u(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Hmul64u x y)
+ // cond:
+ // result: (HMULQU x y)
+ for {
+ x := v.Args[0]
+ y := v.Args[1]
+ v.reset(OpAMD64HMULQU)
+ v.AddArg(x)
+ v.AddArg(y)
+ return true
+ }
+ return false
+}
func rewriteValueAMD64_OpHmul8(v *Value, config *Config) bool {
b := v.Block
_ = b
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 0c091c7..a5d8a4d 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -47,6 +47,10 @@
return rewriteValuegeneric_OpConstString(v, config)
case OpConvert:
return rewriteValuegeneric_OpConvert(v, config)
+ case OpDiv64:
+ return rewriteValuegeneric_OpDiv64(v, config)
+ case OpDiv64u:
+ return rewriteValuegeneric_OpDiv64u(v, config)
case OpEq16:
return rewriteValuegeneric_OpEq16(v, config)
case OpEq32:
@@ -167,6 +171,10 @@
return rewriteValuegeneric_OpLsh8x64(v, config)
case OpLsh8x8:
return rewriteValuegeneric_OpLsh8x8(v, config)
+ case OpMod64:
+ return rewriteValuegeneric_OpMod64(v, config)
+ case OpMod64u:
+ return rewriteValuegeneric_OpMod64u(v, config)
case OpMul16:
return rewriteValuegeneric_OpMul16(v, config)
case OpMul32:
@@ -1053,6 +1061,215 @@
}
return false
}
+func rewriteValuegeneric_OpDiv64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c > 0 && smagic64ok(c) && smagic64m(c) > 0
+ // result: (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c > 0 && smagic64ok(c) && smagic64m(c) > 0) {
+ break
+ }
+ v.reset(OpSub64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v1 := b.NewValue0(v.Line, OpHmul64, t)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = smagic64m(c)
+ v1.AddArg(v2)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = smagic64s(c)
+ v0.AddArg(v3)
+ v.AddArg(v0)
+ v4 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v4.AddArg(x)
+ v5 := b.NewValue0(v.Line, OpConst64, t)
+ v5.AuxInt = 63
+ v4.AddArg(v5)
+ v.AddArg(v4)
+ return true
+ }
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c > 0 && smagic64ok(c) && smagic64m(c) < 0
+ // result: (Sub64 <t> (Rsh64x64 <t> (Add64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c > 0 && smagic64ok(c) && smagic64m(c) < 0) {
+ break
+ }
+ v.reset(OpSub64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v1 := b.NewValue0(v.Line, OpAdd64, t)
+ v2 := b.NewValue0(v.Line, OpHmul64, t)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = smagic64m(c)
+ v2.AddArg(v3)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v4 := b.NewValue0(v.Line, OpConst64, t)
+ v4.AuxInt = smagic64s(c)
+ v0.AddArg(v4)
+ v.AddArg(v0)
+ v5 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v5.AddArg(x)
+ v6 := b.NewValue0(v.Line, OpConst64, t)
+ v6.AuxInt = 63
+ v5.AddArg(v6)
+ v.AddArg(v5)
+ return true
+ }
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c < 0 && smagic64ok(c) && smagic64m(c) > 0
+ // result: (Neg64 <t> (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63]))))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c < 0 && smagic64ok(c) && smagic64m(c) > 0) {
+ break
+ }
+ v.reset(OpNeg64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpSub64, t)
+ v1 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v2 := b.NewValue0(v.Line, OpHmul64, t)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = smagic64m(c)
+ v2.AddArg(v3)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v4 := b.NewValue0(v.Line, OpConst64, t)
+ v4.AuxInt = smagic64s(c)
+ v1.AddArg(v4)
+ v0.AddArg(v1)
+ v5 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v5.AddArg(x)
+ v6 := b.NewValue0(v.Line, OpConst64, t)
+ v6.AuxInt = 63
+ v5.AddArg(v6)
+ v0.AddArg(v5)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c < 0 && smagic64ok(c) && smagic64m(c) < 0
+ // result: (Neg64 <t> (Sub64 <t> (Rsh64x64 <t> (Add64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63]))))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c < 0 && smagic64ok(c) && smagic64m(c) < 0) {
+ break
+ }
+ v.reset(OpNeg64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpSub64, t)
+ v1 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v2 := b.NewValue0(v.Line, OpAdd64, t)
+ v3 := b.NewValue0(v.Line, OpHmul64, t)
+ v4 := b.NewValue0(v.Line, OpConst64, t)
+ v4.AuxInt = smagic64m(c)
+ v3.AddArg(v4)
+ v3.AddArg(x)
+ v2.AddArg(v3)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v5 := b.NewValue0(v.Line, OpConst64, t)
+ v5.AuxInt = smagic64s(c)
+ v1.AddArg(v5)
+ v0.AddArg(v1)
+ v6 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v6.AddArg(x)
+ v7 := b.NewValue0(v.Line, OpConst64, t)
+ v7.AuxInt = 63
+ v6.AddArg(v7)
+ v0.AddArg(v6)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
+func rewriteValuegeneric_OpDiv64u(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Div64u <t> x (Const64 [c]))
+ // cond: umagic64ok(c) && !umagic64a(c)
+ // result: (Rsh64Ux64 (Hmul64u <t> (Const64 <t> [umagic64m(c)]) x) (Const64 <t> [umagic64s(c)]))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(umagic64ok(c) && !umagic64a(c)) {
+ break
+ }
+ v.reset(OpRsh64Ux64)
+ v0 := b.NewValue0(v.Line, OpHmul64u, t)
+ v1 := b.NewValue0(v.Line, OpConst64, t)
+ v1.AuxInt = umagic64m(c)
+ v0.AddArg(v1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = umagic64s(c)
+ v.AddArg(v2)
+ return true
+ }
+ // match: (Div64u <t> x (Const64 [c]))
+ // cond: umagic64ok(c) && umagic64a(c)
+ // result: (Rsh64Ux64 (Avg64u <t> (Hmul64u <t> x (Const64 <t> [umagic64m(c)])) x) (Const64 <t> [umagic64s(c)-1]))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(umagic64ok(c) && umagic64a(c)) {
+ break
+ }
+ v.reset(OpRsh64Ux64)
+ v0 := b.NewValue0(v.Line, OpAvg64u, t)
+ v1 := b.NewValue0(v.Line, OpHmul64u, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = umagic64m(c)
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = umagic64s(c) - 1
+ v.AddArg(v3)
+ return true
+ }
+ return false
+}
func rewriteValuegeneric_OpEq16(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -3061,6 +3278,72 @@
}
return false
}
+func rewriteValuegeneric_OpMod64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Mod64 <t> x (Const64 [c]))
+ // cond: smagic64ok(c)
+ // result: (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(smagic64ok(c)) {
+ break
+ }
+ v.reset(OpSub64)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpMul64, t)
+ v1 := b.NewValue0(v.Line, OpDiv64, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = c
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = c
+ v0.AddArg(v3)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
+func rewriteValuegeneric_OpMod64u(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Mod64u <t> x (Const64 [c]))
+ // cond: umagic64ok(c)
+ // result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(umagic64ok(c)) {
+ break
+ }
+ v.reset(OpSub64)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpMul64, t)
+ v1 := b.NewValue0(v.Line, OpDiv64u, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = c
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = c
+ v0.AddArg(v3)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
func rewriteValuegeneric_OpMul16(v *Value, config *Config) bool {
b := v.Block
_ = b