cmd/compile: add s390x intrinsics for Ceil, Floor, Round and Trunc
Ceil, Floor and Trunc are pre-existing intrinsics. Round is a new
function and has been added as an intrinsic in this CL. All of the
functions can be implemented as a single 'LOAD FP INTEGER'
instruction, FIDBR, on s390x.
name old time/op new time/op delta
Ceil 2.34ns ± 0% 0.85ns ± 0% -63.74% (p=0.000 n=5+4)
Floor 2.33ns ± 0% 0.85ns ± 1% -63.35% (p=0.008 n=5+5)
Round 4.23ns ± 0% 0.85ns ± 0% -79.89% (p=0.000 n=5+4)
Trunc 2.35ns ± 0% 0.85ns ± 0% -63.83% (p=0.029 n=4+4)
Change-Id: Idee7ba24a2899d12bf9afee4eedd6b4aaad3c510
Reviewed-on: https://go-review.googlesource.com/63890
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index 8a627e7..d03ca32 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -107,7 +107,12 @@
(Bswap64 x) -> (MOVDBR x)
(Bswap32 x) -> (MOVWBR x)
-(Sqrt x) -> (FSQRT x)
+// math package intrinsics
+(Sqrt x) -> (FSQRT x)
+(Floor x) -> (FIDBR [7] x)
+(Ceil x) -> (FIDBR [6] x)
+(Trunc x) -> (FIDBR [5] x)
+(Round x) -> (FIDBR [1] x)
// Atomic loads.
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go
index 2a08a27..b330398 100644
--- a/src/cmd/compile/internal/ssa/gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go
@@ -206,6 +206,17 @@
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0
{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0
+ // Round to integer, float64 only.
+ //
+ // aux | rounding mode
+ // ----+-----------------------------------
+ // 1 | round to nearest, ties away from 0
+ // 4 | round to nearest, ties to even
+ // 5 | round toward 0
+ // 6 | round toward +∞
+ // 7 | round toward -∞
+ {name: "FIDBR", argLength: 1, reg: fp11, asm: "FIDBR", aux: "Int8"},
+
{name: "FMOVSload", argLength: 2, reg: fpload, asm: "FMOVS", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp32 load
{name: "FMOVDload", argLength: 2, reg: fpload, asm: "FMOVD", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"}, // fp64 load
{name: "FMOVSconst", reg: fp01, asm: "FMOVS", aux: "Float32", rematerializeable: true}, // fp32 constant
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 6f8d10a..2967d29 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -255,10 +255,23 @@
{name: "PopCount32", argLength: 1}, // Count bits in arg[0]
{name: "PopCount64", argLength: 1}, // Count bits in arg[0]
- {name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
- {name: "Floor", argLength: 1}, // floor(arg0), float64 only
- {name: "Ceil", argLength: 1}, // ceil(arg0), float64 only
- {name: "Trunc", argLength: 1}, // trunc(arg0), float64 only
+ // Square root, float64 only.
+ // Special cases:
+ // +∞ → +∞
+ // ±0 → ±0 (sign preserved)
+ // x<0 → NaN
+ // NaN → NaN
+ {name: "Sqrt", argLength: 1}, // √arg0
+
+ // Round to integer, float64 only.
+ // Special cases:
+ // ±∞ → ±∞ (sign preserved)
+ // ±0 → ±0 (sign preserved)
+ // NaN → NaN
+ {name: "Floor", argLength: 1}, // round arg0 toward -∞
+ {name: "Ceil", argLength: 1}, // round arg0 toward +∞
+ {name: "Trunc", argLength: 1}, // round arg0 toward 0
+ {name: "Round", argLength: 1}, // round arg0 to nearest, ties away from 0
// Data movement, max argument length for Phi is indefinite so just pick
// a really large number