[dev.ssa] cmd/compile: strength-reduce 64-bit constant divides
The frontend does this for 32 bits and below, but SSA needs
to do it for 64 bits. The algorithms are all copied from
cgen.go:cgen_div.
Speeds up TimeFormat substantially: ~40% slower to ~10% slower.
Change-Id: I023ea2eb6040df98ccd9105e15ca6ea695610a7a
Reviewed-on: https://go-review.googlesource.com/19302
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Todd Neal <todd@tneal.org>
diff --git a/src/cmd/compile/internal/ssa/rewritegeneric.go b/src/cmd/compile/internal/ssa/rewritegeneric.go
index 0c091c7..a5d8a4d 100644
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@@ -47,6 +47,10 @@
return rewriteValuegeneric_OpConstString(v, config)
case OpConvert:
return rewriteValuegeneric_OpConvert(v, config)
+ case OpDiv64:
+ return rewriteValuegeneric_OpDiv64(v, config)
+ case OpDiv64u:
+ return rewriteValuegeneric_OpDiv64u(v, config)
case OpEq16:
return rewriteValuegeneric_OpEq16(v, config)
case OpEq32:
@@ -167,6 +171,10 @@
return rewriteValuegeneric_OpLsh8x64(v, config)
case OpLsh8x8:
return rewriteValuegeneric_OpLsh8x8(v, config)
+ case OpMod64:
+ return rewriteValuegeneric_OpMod64(v, config)
+ case OpMod64u:
+ return rewriteValuegeneric_OpMod64u(v, config)
case OpMul16:
return rewriteValuegeneric_OpMul16(v, config)
case OpMul32:
@@ -1053,6 +1061,215 @@
}
return false
}
+func rewriteValuegeneric_OpDiv64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c > 0 && smagic64ok(c) && smagic64m(c) > 0
+ // result: (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c > 0 && smagic64ok(c) && smagic64m(c) > 0) {
+ break
+ }
+ v.reset(OpSub64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v1 := b.NewValue0(v.Line, OpHmul64, t)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = smagic64m(c)
+ v1.AddArg(v2)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = smagic64s(c)
+ v0.AddArg(v3)
+ v.AddArg(v0)
+ v4 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v4.AddArg(x)
+ v5 := b.NewValue0(v.Line, OpConst64, t)
+ v5.AuxInt = 63
+ v4.AddArg(v5)
+ v.AddArg(v4)
+ return true
+ }
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c > 0 && smagic64ok(c) && smagic64m(c) < 0
+ // result: (Sub64 <t> (Rsh64x64 <t> (Add64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c > 0 && smagic64ok(c) && smagic64m(c) < 0) {
+ break
+ }
+ v.reset(OpSub64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v1 := b.NewValue0(v.Line, OpAdd64, t)
+ v2 := b.NewValue0(v.Line, OpHmul64, t)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = smagic64m(c)
+ v2.AddArg(v3)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v1.AddArg(x)
+ v0.AddArg(v1)
+ v4 := b.NewValue0(v.Line, OpConst64, t)
+ v4.AuxInt = smagic64s(c)
+ v0.AddArg(v4)
+ v.AddArg(v0)
+ v5 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v5.AddArg(x)
+ v6 := b.NewValue0(v.Line, OpConst64, t)
+ v6.AuxInt = 63
+ v5.AddArg(v6)
+ v.AddArg(v5)
+ return true
+ }
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c < 0 && smagic64ok(c) && smagic64m(c) > 0
+ // result: (Neg64 <t> (Sub64 <t> (Rsh64x64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63]))))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c < 0 && smagic64ok(c) && smagic64m(c) > 0) {
+ break
+ }
+ v.reset(OpNeg64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpSub64, t)
+ v1 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v2 := b.NewValue0(v.Line, OpHmul64, t)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = smagic64m(c)
+ v2.AddArg(v3)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v4 := b.NewValue0(v.Line, OpConst64, t)
+ v4.AuxInt = smagic64s(c)
+ v1.AddArg(v4)
+ v0.AddArg(v1)
+ v5 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v5.AddArg(x)
+ v6 := b.NewValue0(v.Line, OpConst64, t)
+ v6.AuxInt = 63
+ v5.AddArg(v6)
+ v0.AddArg(v5)
+ v.AddArg(v0)
+ return true
+ }
+ // match: (Div64 <t> x (Const64 [c]))
+ // cond: c < 0 && smagic64ok(c) && smagic64m(c) < 0
+ // result: (Neg64 <t> (Sub64 <t> (Rsh64x64 <t> (Add64 <t> (Hmul64 <t> (Const64 <t> [smagic64m(c)]) x) x) (Const64 <t> [smagic64s(c)])) (Rsh64x64 <t> x (Const64 <t> [63]))))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(c < 0 && smagic64ok(c) && smagic64m(c) < 0) {
+ break
+ }
+ v.reset(OpNeg64)
+ v.Type = t
+ v0 := b.NewValue0(v.Line, OpSub64, t)
+ v1 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v2 := b.NewValue0(v.Line, OpAdd64, t)
+ v3 := b.NewValue0(v.Line, OpHmul64, t)
+ v4 := b.NewValue0(v.Line, OpConst64, t)
+ v4.AuxInt = smagic64m(c)
+ v3.AddArg(v4)
+ v3.AddArg(x)
+ v2.AddArg(v3)
+ v2.AddArg(x)
+ v1.AddArg(v2)
+ v5 := b.NewValue0(v.Line, OpConst64, t)
+ v5.AuxInt = smagic64s(c)
+ v1.AddArg(v5)
+ v0.AddArg(v1)
+ v6 := b.NewValue0(v.Line, OpRsh64x64, t)
+ v6.AddArg(x)
+ v7 := b.NewValue0(v.Line, OpConst64, t)
+ v7.AuxInt = 63
+ v6.AddArg(v7)
+ v0.AddArg(v6)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
+func rewriteValuegeneric_OpDiv64u(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Div64u <t> x (Const64 [c]))
+ // cond: umagic64ok(c) && !umagic64a(c)
+ // result: (Rsh64Ux64 (Hmul64u <t> (Const64 <t> [umagic64m(c)]) x) (Const64 <t> [umagic64s(c)]))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(umagic64ok(c) && !umagic64a(c)) {
+ break
+ }
+ v.reset(OpRsh64Ux64)
+ v0 := b.NewValue0(v.Line, OpHmul64u, t)
+ v1 := b.NewValue0(v.Line, OpConst64, t)
+ v1.AuxInt = umagic64m(c)
+ v0.AddArg(v1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = umagic64s(c)
+ v.AddArg(v2)
+ return true
+ }
+ // match: (Div64u <t> x (Const64 [c]))
+ // cond: umagic64ok(c) && umagic64a(c)
+ // result: (Rsh64Ux64 (Avg64u <t> (Hmul64u <t> x (Const64 <t> [umagic64m(c)])) x) (Const64 <t> [umagic64s(c)-1]))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(umagic64ok(c) && umagic64a(c)) {
+ break
+ }
+ v.reset(OpRsh64Ux64)
+ v0 := b.NewValue0(v.Line, OpAvg64u, t)
+ v1 := b.NewValue0(v.Line, OpHmul64u, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = umagic64m(c)
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v0.AddArg(x)
+ v.AddArg(v0)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = umagic64s(c) - 1
+ v.AddArg(v3)
+ return true
+ }
+ return false
+}
func rewriteValuegeneric_OpEq16(v *Value, config *Config) bool {
b := v.Block
_ = b
@@ -3061,6 +3278,72 @@
}
return false
}
+func rewriteValuegeneric_OpMod64(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Mod64 <t> x (Const64 [c]))
+ // cond: smagic64ok(c)
+ // result: (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(smagic64ok(c)) {
+ break
+ }
+ v.reset(OpSub64)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpMul64, t)
+ v1 := b.NewValue0(v.Line, OpDiv64, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = c
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = c
+ v0.AddArg(v3)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
+func rewriteValuegeneric_OpMod64u(v *Value, config *Config) bool {
+ b := v.Block
+ _ = b
+ // match: (Mod64u <t> x (Const64 [c]))
+ // cond: umagic64ok(c)
+ // result: (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
+ for {
+ t := v.Type
+ x := v.Args[0]
+ if v.Args[1].Op != OpConst64 {
+ break
+ }
+ c := v.Args[1].AuxInt
+ if !(umagic64ok(c)) {
+ break
+ }
+ v.reset(OpSub64)
+ v.AddArg(x)
+ v0 := b.NewValue0(v.Line, OpMul64, t)
+ v1 := b.NewValue0(v.Line, OpDiv64u, t)
+ v1.AddArg(x)
+ v2 := b.NewValue0(v.Line, OpConst64, t)
+ v2.AuxInt = c
+ v1.AddArg(v2)
+ v0.AddArg(v1)
+ v3 := b.NewValue0(v.Line, OpConst64, t)
+ v3.AuxInt = c
+ v0.AddArg(v3)
+ v.AddArg(v0)
+ return true
+ }
+ return false
+}
func rewriteValuegeneric_OpMul16(v *Value, config *Config) bool {
b := v.Block
_ = b