cmd/compile: intrinsify math.RoundToEven on s390x
The new RoundToEven function can be implemented as a single FIDBR
instruction on s390x.
name old time/op new time/op delta
RoundToEven 5.32ns ± 1% 0.86ns ± 1% -83.86% (p=0.000 n=10+10)
Change-Id: Iaf597e57a0d1085961701e3c75ff4f6f6dcebb5f
Reviewed-on: https://go-review.googlesource.com/74350
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/gc/asm_test.go b/src/cmd/compile/internal/gc/asm_test.go
index 4320628..c609e00 100644
--- a/src/cmd/compile/internal/gc/asm_test.go
+++ b/src/cmd/compile/internal/gc/asm_test.go
@@ -1634,6 +1634,14 @@
pos: []string{"\tFIDBR\t[$]5"},
},
{
+ fn: `
+ func roundToEven(x float64) float64 {
+ return math.RoundToEven(x)
+ }
+ `,
+ pos: []string{"\tFIDBR\t[$]4"},
+ },
+ {
// check that stack store is optimized away
fn: `
func $() int {
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 9eeeb35..233c639 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -2807,6 +2807,11 @@
return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
},
sys.S390X)
+ addF("math", "RoundToEven",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
+ },
+ sys.S390X)
addF("math", "Abs",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 21bd728..a3908e7 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -108,11 +108,12 @@
(Bswap32 x) -> (MOVWBR x)
// math package intrinsics
-(Sqrt x) -> (FSQRT x)
-(Floor x) -> (FIDBR [7] x)
-(Ceil x) -> (FIDBR [6] x)
-(Trunc x) -> (FIDBR [5] x)
-(Round x) -> (FIDBR [1] x)
+(Sqrt x) -> (FSQRT x)
+(Floor x) -> (FIDBR [7] x)
+(Ceil x) -> (FIDBR [6] x)
+(Trunc x) -> (FIDBR [5] x)
+(RoundToEven x) -> (FIDBR [4] x)
+(Round x) -> (FIDBR [1] x)
// Atomic loads.
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 0ad582b..d36910e 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -268,10 +268,11 @@
// ±∞ → ±∞ (sign preserved)
// ±0 → ±0 (sign preserved)
// NaN → NaN
- {name: "Floor", argLength: 1}, // round arg0 toward -∞
- {name: "Ceil", argLength: 1}, // round arg0 toward +∞
- {name: "Trunc", argLength: 1}, // round arg0 toward 0
- {name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
+ {name: "Floor", argLength: 1}, // round arg0 toward -∞
+ {name: "Ceil", argLength: 1}, // round arg0 toward +∞
+ {name: "Trunc", argLength: 1}, // round arg0 toward 0
+ {name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
+ {name: "RoundToEven", argLength: 1}, // round arg0 to nearest, ties to even
// Modify the sign bit
{name: "Abs", argLength: 1}, // absolute value arg0
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index a18cf78..9d44e1a 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1901,6 +1901,7 @@
OpCeil
OpTrunc
OpRound
+ OpRoundToEven
OpAbs
OpCopysign
OpPhi
@@ -23319,6 +23320,11 @@
generic: true,
},
{
+ name: "RoundToEven",
+ argLen: 1,
+ generic: true,
+ },
+ {
name: "Abs",
argLen: 1,
generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index 9237bfc..0c7dd17 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -383,6 +383,8 @@
return rewriteValueS390X_OpRound32F_0(v)
case OpRound64F:
return rewriteValueS390X_OpRound64F_0(v)
+ case OpRoundToEven:
+ return rewriteValueS390X_OpRoundToEven_0(v)
case OpRsh16Ux16:
return rewriteValueS390X_OpRsh16Ux16_0(v)
case OpRsh16Ux32:
@@ -5028,6 +5030,18 @@
return true
}
}
+func rewriteValueS390X_OpRoundToEven_0(v *Value) bool {
+ // match: (RoundToEven x)
+ // cond:
+ // result: (FIDBR [4] x)
+ for {
+ x := v.Args[0]
+ v.reset(OpS390XFIDBR)
+ v.AuxInt = 4
+ v.AddArg(x)
+ return true
+ }
+}
func rewriteValueS390X_OpRsh16Ux16_0(v *Value) bool {
b := v.Block
_ = b