blob: 9f4eaa28d5ae22d05b33848313469297be1699a8 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·atanrodataL8<> + 0(SB)/8, $0.199999999999554423E+00
DATA ·atanrodataL8<> + 8(SB)/8, $0.111111110136634272E+00
DATA ·atanrodataL8<> + 16(SB)/8, $-.142857142828026806E+00
DATA ·atanrodataL8<> + 24(SB)/8, $-.333333333333330928E+00
DATA ·atanrodataL8<> + 32(SB)/8, $0.769228118888682505E-01
DATA ·atanrodataL8<> + 40(SB)/8, $0.588059263575587687E-01
DATA ·atanrodataL8<> + 48(SB)/8, $-.666641501287528609E-01
DATA ·atanrodataL8<> + 56(SB)/8, $-.909090711945939878E-01
DATA ·atanrodataL8<> + 64(SB)/8, $0.472329433805024762E-01
DATA ·atanrodataL8<> + 72(SB)/8, $0.366935664549587481E-01
DATA ·atanrodataL8<> + 80(SB)/8, $-.422172007412067035E-01
DATA ·atanrodataL8<> + 88(SB)/8, $-.299856214685512712E-01
DATA ·atanrodataL8<> + 96(SB)/8, $0.220852012160300086E-01
DATA ·atanrodataL8<> + 104(SB)/8, $0.726338160757602439E-02
DATA ·atanrodataL8<> + 112(SB)/8, $0.843488472994227321E-03
DATA ·atanrodataL8<> + 120(SB)/8, $0.134893651284712515E-04
DATA ·atanrodataL8<> + 128(SB)/8, $-.525380587584426406E-01
DATA ·atanrodataL8<> + 136(SB)/8, $-.139950258898989925E-01
DATA ·atanrodataL8<> + 144(SB)/8, $-.291935324869629616E-02
DATA ·atanrodataL8<> + 152(SB)/8, $-.154797890856877418E-03
GLOBL ·atanrodataL8<> + 0(SB), RODATA, $160
DATA ·atanxpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
DATA ·atanxpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
DATA ·atanxpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
DATA ·atanxpi2h<> + 24(SB)/4, $0xc00330e4e4fa7b1b
GLOBL ·atanxpi2h<> + 0(SB), RODATA, $32
DATA ·atanxpim<> + 0(SB)/8, $0x3ff4f42b00000000
GLOBL ·atanxpim<> + 0(SB), RODATA, $8
DATA ·atanxmone<> + 0(SB)/8, $-1.0
GLOBL ·atanxmone<> + 0(SB), RODATA, $8
// Atan returns the arctangent, in radians, of the argument.
//
// Special cases are:
// Atan(±0) = ±0
// Atan(±Inf) = ±Pi/2Pi
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·atanAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
//special case Atan(±0) = ±0
FMOVD $(0.0), F1
FCMPU F0, F1
BEQ atanIsZero
MOVD $·atanrodataL8<>+0(SB), R5
MOVH $0x3FE0, R3
WORD $0xB3CD0010 //lgdr %r1,%f0
WORD $0xEC1120BF //risbgn %r1,%r1,64-32,128+63,64+0+32
BYTE $0x60
BYTE $0x59
RLL $16, R1, R2
ANDW $0x7FF0, R2
MOVW R2, R6
MOVW R3, R7
CMPUBLE R6, R7, L6
MOVD $·atanxmone<>+0(SB), R3
FMOVD 0(R3), F2
WFDDB V0, V2, V0
WORD $0xEC113FBF //risbg %r1,%r1,64-1,128+63,64+32+1
BYTE $0x61
BYTE $0x55
MOVD $·atanxpi2h<>+0(SB), R3
MOVWZ R1, R1
SLD $3, R1, R1
WORD $0x68813000 //ld %f8,0(%r1,%r3)
L6:
WFMDB V0, V0, V2
FMOVD 152(R5), F6
FMOVD 144(R5), F1
FMOVD 136(R5), F7
VLEG $0, 128(R5), V16
FMOVD 120(R5), F4
FMOVD 112(R5), F5
WFMADB V2, V4, V6, V4
WFMADB V2, V5, V1, V5
WFMDB V2, V2, V6
FMOVD 104(R5), F3
FMOVD 96(R5), F1
WFMADB V2, V3, V7, V3
MOVH $0x3FE0, R1
FMOVD 88(R5), F7
WFMADB V2, V1, V7, V1
FMOVD 80(R5), F7
WFMADB V6, V3, V1, V3
WFMADB V6, V4, V5, V4
WFMDB V6, V6, V1
FMOVD 72(R5), F5
WFMADB V2, V5, V7, V5
FMOVD 64(R5), F7
WFMADB V2, V7, V16, V7
VLEG $0, 56(R5), V16
WFMADB V6, V5, V7, V5
WFMADB V1, V4, V3, V4
FMOVD 48(R5), F7
FMOVD 40(R5), F3
WFMADB V2, V3, V7, V3
FMOVD 32(R5), F7
WFMADB V2, V7, V16, V7
VLEG $0, 24(R5), V16
WFMADB V1, V4, V5, V4
FMOVD 16(R5), F5
WFMADB V6, V3, V7, V3
FMOVD 8(R5), F7
WFMADB V2, V7, V5, V7
FMOVD 0(R5), F5
WFMADB V2, V5, V16, V5
WFMADB V1, V4, V3, V4
WFMADB V6, V7, V5, V6
FMUL F0, F2
FMADD F4, F1, F6
FMADD F6, F2, F0
MOVW R2, R6
MOVW R1, R7
CMPUBLE R6, R7, L1
MOVD $·atanxpim<>+0(SB), R1
WORD $0xED801000 //madb %f0,%f8,0(%r1)
BYTE $0x00
BYTE $0x1E
L1:
atanIsZero:
FMOVD F0, ret+8(FP)
RET