| // Copyright 2025 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| #include "textflag.h" |
| |
| #define NearZero 0x3e30000000000000 // 2**-28 |
| #define PosInf 0x7ff0000000000000 |
| #define FracMask 0x000fffffffffffff |
| #define C1 0x3cb0000000000000 // 2**-52 |
| |
| DATA exprodata<>+0(SB)/8, $0.0 |
| DATA exprodata<>+8(SB)/8, $0.5 |
| DATA exprodata<>+16(SB)/8, $1.0 |
| DATA exprodata<>+24(SB)/8, $2.0 |
| DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi |
| DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo |
| DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e |
| DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow |
| DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow |
| DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2 |
| DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2 |
| DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero |
| GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96 |
| |
| DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1 |
| DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2 |
| DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3 |
| DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4 |
| DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5 |
| GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40 |
| |
| // Exp returns e**x, the base-e exponential of x. |
| // This is an assembly implementation of the method used for function Exp in file exp.go. |
| // |
| // func Exp(x float64) float64 |
| TEXT ·archExp(SB),$0-16 |
| MOVD x+0(FP), F0 // F0 = x |
| |
| MOVV $exprodata<>+0(SB), R10 |
| MOVD 56(R10), F1 // Overflow |
| MOVD 64(R10), F2 // Underflow |
| MOVD 88(R10), F3 // NearZero |
| MOVD 16(R10), F17 // 1.0 |
| |
| CMPEQD F0, F0, FCC0 |
| BFPF isNaN // x = NaN, return NaN |
| |
| CMPGTD F0, F1, FCC0 |
| BFPT overflow // x > Overflow, return PosInf |
| |
| CMPGTD F2, F0, FCC0 |
| BFPT underflow // x < Underflow, return 0 |
| |
| ABSD F0, F5 |
| CMPGTD F3, F5, FCC0 |
| BFPT nearzero // fabs(x) < NearZero, return 1 + x |
| |
| // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2 |
| // computed as r = hi - lo for extra precision. |
| MOVD 0(R10), F5 |
| MOVD 8(R10), F3 |
| MOVD 48(R10), F2 |
| CMPGTD F0, F5, FCC0 |
| BFPT add // x > 0 |
| sub: |
| FMSUBD F3, F2, F0, F3 // Log2e*x - 0.5 |
| JMP 2(PC) |
| add: |
| FMADDD F3, F2, F0, F3 // Log2e*x + 0.5 |
| |
| FTINTRZVD F3, F4 // float64 -> int64 |
| MOVV F4, R5 // R5 = int(k) |
| FFINTDV F4, F3 // int64 -> float64 |
| |
| MOVD 32(R10), F4 |
| MOVD 40(R10), F5 |
| FNMSUBD F0, F3, F4, F4 |
| MULD F3, F5, F5 |
| SUBD F5, F4, F6 |
| MULD F6, F6, F7 |
| |
| // compute c |
| MOVV $expmultirodata<>+0(SB), R11 |
| MOVD 32(R11), F8 |
| MOVD 24(R11), F9 |
| FMADDD F9, F8, F7, F13 |
| MOVD 16(R11), F10 |
| FMADDD F10, F13, F7, F13 |
| MOVD 8(R11), F11 |
| FMADDD F11, F13, F7, F13 |
| MOVD 0(R11), F12 |
| FMADDD F12, F13, F7, F13 |
| FNMSUBD F6, F13, F7, F13 |
| |
| // compute y |
| MOVD 24(R10), F14 |
| SUBD F13, F14, F14 |
| MULD F6, F13, F15 |
| DIVD F14, F15, F15 |
| SUBD F15, F5, F15 |
| SUBD F4, F15, F15 |
| SUBD F15, F17, F16 |
| |
| // inline Ldexp(y, k), benefit: |
| // 1, no parameter pass overhead. |
| // 2, skip unnecessary checks for Inf/NaN/Zero |
| MOVV F16, R4 |
| MOVV $FracMask, R9 |
| AND R9, R4, R6 // fraction |
| SRLV $52, R4, R7 // exponent |
| ADDV R5, R7 |
| MOVV $1, R12 |
| BGE R7, R12, normal |
| ADDV $52, R7 // denormal |
| MOVV $C1, R8 |
| MOVV R8, F17 |
| normal: |
| SLLV $52, R7 |
| OR R7, R6, R4 |
| MOVV R4, F0 |
| MULD F17, F0 // return m * x |
| MOVD F0, ret+8(FP) |
| RET |
| nearzero: |
| ADDD F17, F0, F0 |
| isNaN: |
| MOVD F0, ret+8(FP) |
| RET |
| underflow: |
| MOVV R0, ret+8(FP) |
| RET |
| overflow: |
| MOVV $PosInf, R4 |
| MOVV R4, ret+8(FP) |
| RET |
| |
| |
| // Exp2 returns 2**x, the base-2 exponential of x. |
| // This is an assembly implementation of the method used for function Exp2 in file exp.go. |
| // |
| // func Exp2(x float64) float64 |
| TEXT ·archExp2(SB),$0-16 |
| MOVD x+0(FP), F0 // F0 = x |
| |
| MOVV $exprodata<>+0(SB), R10 |
| MOVD 72(R10), F1 // Overflow2 |
| MOVD 80(R10), F2 // Underflow2 |
| MOVD 88(R10), F3 // NearZero |
| |
| CMPEQD F0, F0, FCC0 |
| BFPF isNaN // x = NaN, return NaN |
| |
| CMPGTD F0, F1, FCC0 |
| BFPT overflow // x > Overflow, return PosInf |
| |
| CMPGTD F2, F0, FCC0 |
| BFPT underflow // x < Underflow, return 0 |
| |
| // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2 |
| // computed as r = hi - lo for extra precision. |
| MOVD 0(R10), F10 |
| MOVD 8(R10), F2 |
| CMPGTD F0, F10, FCC0 |
| BFPT add |
| sub: |
| SUBD F2, F0, F3 // x - 0.5 |
| JMP 2(PC) |
| add: |
| ADDD F2, F0, F3 // x + 0.5 |
| |
| FTINTRZVD F3, F4 |
| MOVV F4, R5 |
| FFINTDV F4, F3 |
| |
| MOVD 32(R10), F4 |
| MOVD 40(R10), F5 |
| SUBD F3, F0, F3 |
| MULD F3, F4 |
| FNMSUBD F10, F3, F5, F5 |
| SUBD F5, F4, F6 |
| MULD F6, F6, F7 |
| |
| // compute c |
| MOVV $expmultirodata<>+0(SB), R11 |
| MOVD 32(R11), F8 |
| MOVD 24(R11), F9 |
| FMADDD F9, F8, F7, F13 |
| MOVD 16(R11), F10 |
| FMADDD F10, F13, F7, F13 |
| MOVD 8(R11), F11 |
| FMADDD F11, F13, F7, F13 |
| MOVD 0(R11), F12 |
| FMADDD F12, F13, F7, F13 |
| FNMSUBD F6, F13, F7, F13 |
| |
| // compute y |
| MOVD 24(R10), F14 |
| SUBD F13, F14, F14 |
| MULD F6, F13, F15 |
| DIVD F14, F15 |
| |
| MOVD 16(R10), F17 |
| SUBD F15, F5, F15 |
| SUBD F4, F15, F15 |
| SUBD F15, F17, F16 |
| |
| // inline Ldexp(y, k), benefit: |
| // 1, no parameter pass overhead. |
| // 2, skip unnecessary checks for Inf/NaN/Zero |
| MOVV F16, R4 |
| MOVV $FracMask, R9 |
| SRLV $52, R4, R7 // exponent |
| AND R9, R4, R6 // fraction |
| ADDV R5, R7 |
| MOVV $1, R12 |
| BGE R7, R12, normal |
| |
| ADDV $52, R7 // denormal |
| MOVV $C1, R8 |
| MOVV R8, F17 |
| normal: |
| SLLV $52, R7 |
| OR R7, R6, R4 |
| MOVV R4, F0 |
| MULD F17, F0 |
| isNaN: |
| MOVD F0, ret+8(FP) |
| RET |
| underflow: |
| MOVV R0, ret+8(FP) |
| RET |
| overflow: |
| MOVV $PosInf, R4 |
| MOVV R4, ret+8(FP) |
| RET |