cmd/compile: intrinsify math.RoundToEven on s390x

The new RoundToEven function can be implemented as a single FIDBR
instruction on s390x.

name         old time/op  new time/op  delta
RoundToEven  5.32ns ± 1%  0.86ns ± 1%  -83.86%  (p=0.000 n=10+10)

Change-Id: Iaf597e57a0d1085961701e3c75ff4f6f6dcebb5f
Reviewed-on: https://go-review.googlesource.com/74350
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/gc/asm_test.go b/src/cmd/compile/internal/gc/asm_test.go
index 4320628..c609e00 100644
--- a/src/cmd/compile/internal/gc/asm_test.go
+++ b/src/cmd/compile/internal/gc/asm_test.go
@@ -1634,6 +1634,14 @@
 		pos: []string{"\tFIDBR\t[$]5"},
 	},
 	{
+		fn: `
+		func roundToEven(x float64) float64 {
+			return math.RoundToEven(x)
+		}
+		`,
+		pos: []string{"\tFIDBR\t[$]4"},
+	},
+	{
 		// check that stack store is optimized away
 		fn: `
 		func $() int {
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 9eeeb35..233c639 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -2807,6 +2807,11 @@
 			return s.newValue1(ssa.OpRound, types.Types[TFLOAT64], args[0])
 		},
 		sys.S390X)
+	addF("math", "RoundToEven",
+		func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+			return s.newValue1(ssa.OpRoundToEven, types.Types[TFLOAT64], args[0])
+		},
+		sys.S390X)
 	addF("math", "Abs",
 		func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpAbs, types.Types[TFLOAT64], args[0])
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 21bd728..a3908e7 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -108,11 +108,12 @@
 (Bswap32 x) -> (MOVWBR x)
 
 // math package intrinsics
-(Sqrt  x) -> (FSQRT x)
-(Floor x) -> (FIDBR [7] x)
-(Ceil  x) -> (FIDBR [6] x)
-(Trunc x) -> (FIDBR [5] x)
-(Round x) -> (FIDBR [1] x)
+(Sqrt        x) -> (FSQRT x)
+(Floor       x) -> (FIDBR [7] x)
+(Ceil        x) -> (FIDBR [6] x)
+(Trunc       x) -> (FIDBR [5] x)
+(RoundToEven x) -> (FIDBR [4] x)
+(Round       x) -> (FIDBR [1] x)
 
 // Atomic loads.
 (AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 0ad582b..d36910e 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -268,10 +268,11 @@
 	//   ±∞  → ±∞ (sign preserved)
 	//   ±0  → ±0 (sign preserved)
 	//   NaN → NaN
-	{name: "Floor", argLength: 1}, // round arg0 toward -∞
-	{name: "Ceil", argLength: 1},  // round arg0 toward +∞
-	{name: "Trunc", argLength: 1}, // round arg0 toward 0
-	{name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
+	{name: "Floor", argLength: 1},       // round arg0 toward -∞
+	{name: "Ceil", argLength: 1},        // round arg0 toward +∞
+	{name: "Trunc", argLength: 1},       // round arg0 toward 0
+	{name: "Round", argLength: 1},       // round arg0 to nearest, ties away from 0
+	{name: "RoundToEven", argLength: 1}, // round arg0 to nearest, ties to even
 
 	// Modify the sign bit
 	{name: "Abs", argLength: 1},      // absolute value arg0
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index a18cf78..9d44e1a 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1901,6 +1901,7 @@
 	OpCeil
 	OpTrunc
 	OpRound
+	OpRoundToEven
 	OpAbs
 	OpCopysign
 	OpPhi
@@ -23319,6 +23320,11 @@
 		generic: true,
 	},
 	{
+		name:    "RoundToEven",
+		argLen:  1,
+		generic: true,
+	},
+	{
 		name:    "Abs",
 		argLen:  1,
 		generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteS390X.go b/src/cmd/compile/internal/ssa/rewriteS390X.go
index 9237bfc..0c7dd17 100644
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@@ -383,6 +383,8 @@
 		return rewriteValueS390X_OpRound32F_0(v)
 	case OpRound64F:
 		return rewriteValueS390X_OpRound64F_0(v)
+	case OpRoundToEven:
+		return rewriteValueS390X_OpRoundToEven_0(v)
 	case OpRsh16Ux16:
 		return rewriteValueS390X_OpRsh16Ux16_0(v)
 	case OpRsh16Ux32:
@@ -5028,6 +5030,18 @@
 		return true
 	}
 }
+func rewriteValueS390X_OpRoundToEven_0(v *Value) bool {
+	// match: (RoundToEven x)
+	// cond:
+	// result: (FIDBR [4] x)
+	for {
+		x := v.Args[0]
+		v.reset(OpS390XFIDBR)
+		v.AuxInt = 4
+		v.AddArg(x)
+		return true
+	}
+}
 func rewriteValueS390X_OpRsh16Ux16_0(v *Value) bool {
 	b := v.Block
 	_ = b