blob: 7e9d469cc6e0d2d81fa32186772f8a8ba7acb930 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
#define Neg2p11 0xC000E147AE147AE1
#define Pos15 0x402E
// Minimax polynomial coefficients and other constants
DATA ·erfcrodataL38<> + 0(SB)/8, $.234875460637085087E-01
DATA ·erfcrodataL38<> + 8(SB)/8, $.234469449299256284E-01
DATA ·erfcrodataL38<> + 16(SB)/8, $-.606918710392844955E-04
DATA ·erfcrodataL38<> + 24(SB)/8, $-.198827088077636213E-04
DATA ·erfcrodataL38<> + 32(SB)/8, $.257805645845475331E-06
DATA ·erfcrodataL38<> + 40(SB)/8, $-.184427218110620284E-09
DATA ·erfcrodataL38<> + 48(SB)/8, $.122408098288933181E-10
DATA ·erfcrodataL38<> + 56(SB)/8, $.484691106751495392E-07
DATA ·erfcrodataL38<> + 64(SB)/8, $-.150147637632890281E-08
DATA ·erfcrodataL38<> + 72(SB)/8, $23.999999999973521625
DATA ·erfcrodataL38<> + 80(SB)/8, $27.226017111108365754
DATA ·erfcrodataL38<> + 88(SB)/8, $-2.0
DATA ·erfcrodataL38<> + 96(SB)/8, $0.100108802034478228E+00
DATA ·erfcrodataL38<> + 104(SB)/8, $0.244588413746558125E+00
DATA ·erfcrodataL38<> + 112(SB)/8, $-.669188879646637174E-01
DATA ·erfcrodataL38<> + 120(SB)/8, $0.151311447000953551E-01
DATA ·erfcrodataL38<> + 128(SB)/8, $-.284720833493302061E-02
DATA ·erfcrodataL38<> + 136(SB)/8, $0.455491239358743212E-03
DATA ·erfcrodataL38<> + 144(SB)/8, $-.631850539280720949E-04
DATA ·erfcrodataL38<> + 152(SB)/8, $0.772532660726086679E-05
DATA ·erfcrodataL38<> + 160(SB)/8, $-.843706007150936940E-06
DATA ·erfcrodataL38<> + 168(SB)/8, $-.735330214904227472E-08
DATA ·erfcrodataL38<> + 176(SB)/8, $0.753002008837084967E-09
DATA ·erfcrodataL38<> + 184(SB)/8, $0.832482036660624637E-07
DATA ·erfcrodataL38<> + 192(SB)/8, $-0.75
DATA ·erfcrodataL38<> + 200(SB)/8, $.927765678007128609E-01
DATA ·erfcrodataL38<> + 208(SB)/8, $.903621209344751506E-01
DATA ·erfcrodataL38<> + 216(SB)/8, $-.344203375025257265E-02
DATA ·erfcrodataL38<> + 224(SB)/8, $-.869243428221791329E-03
DATA ·erfcrodataL38<> + 232(SB)/8, $.174699813107105603E-03
DATA ·erfcrodataL38<> + 240(SB)/8, $.649481036316130000E-05
DATA ·erfcrodataL38<> + 248(SB)/8, $-.895265844897118382E-05
DATA ·erfcrodataL38<> + 256(SB)/8, $.135970046909529513E-05
DATA ·erfcrodataL38<> + 264(SB)/8, $.277617717014748015E-06
DATA ·erfcrodataL38<> + 272(SB)/8, $.810628018408232910E-08
DATA ·erfcrodataL38<> + 280(SB)/8, $.210430084693497985E-07
DATA ·erfcrodataL38<> + 288(SB)/8, $-.342138077525615091E-08
DATA ·erfcrodataL38<> + 296(SB)/8, $-.165467946798610800E-06
DATA ·erfcrodataL38<> + 304(SB)/8, $5.999999999988412824
DATA ·erfcrodataL38<> + 312(SB)/8, $.468542210149072159E-01
DATA ·erfcrodataL38<> + 320(SB)/8, $.465343528567604256E-01
DATA ·erfcrodataL38<> + 328(SB)/8, $-.473338083650201733E-03
DATA ·erfcrodataL38<> + 336(SB)/8, $-.147220659069079156E-03
DATA ·erfcrodataL38<> + 344(SB)/8, $.755284723554388339E-05
DATA ·erfcrodataL38<> + 352(SB)/8, $.116158570631428789E-05
DATA ·erfcrodataL38<> + 360(SB)/8, $-.155445501551602389E-06
DATA ·erfcrodataL38<> + 368(SB)/8, $-.616940119847805046E-10
DATA ·erfcrodataL38<> + 376(SB)/8, $-.728705590727563158E-10
DATA ·erfcrodataL38<> + 384(SB)/8, $-.983452460354586779E-08
DATA ·erfcrodataL38<> + 392(SB)/8, $.365156164194346316E-08
DATA ·erfcrodataL38<> + 400(SB)/8, $11.999999999996530775
DATA ·erfcrodataL38<> + 408(SB)/8, $0.467773498104726584E-02
DATA ·erfcrodataL38<> + 416(SB)/8, $0.206669853540920535E-01
DATA ·erfcrodataL38<> + 424(SB)/8, $0.413339707081841473E-01
DATA ·erfcrodataL38<> + 432(SB)/8, $0.482229658262131320E-01
DATA ·erfcrodataL38<> + 440(SB)/8, $0.344449755901841897E-01
DATA ·erfcrodataL38<> + 448(SB)/8, $0.130890907240765465E-01
DATA ·erfcrodataL38<> + 456(SB)/8, $-.459266344100642687E-03
DATA ·erfcrodataL38<> + 464(SB)/8, $-.337888800856913728E-02
DATA ·erfcrodataL38<> + 472(SB)/8, $-.159103061687062373E-02
DATA ·erfcrodataL38<> + 480(SB)/8, $-.501128905515922644E-04
DATA ·erfcrodataL38<> + 488(SB)/8, $0.262775855852903132E-03
DATA ·erfcrodataL38<> + 496(SB)/8, $0.103860982197462436E-03
DATA ·erfcrodataL38<> + 504(SB)/8, $-.548835785414200775E-05
DATA ·erfcrodataL38<> + 512(SB)/8, $-.157075054646618214E-04
DATA ·erfcrodataL38<> + 520(SB)/8, $-.480056366276045110E-05
DATA ·erfcrodataL38<> + 528(SB)/8, $0.198263013759701555E-05
DATA ·erfcrodataL38<> + 536(SB)/8, $-.224394262958888780E-06
DATA ·erfcrodataL38<> + 544(SB)/8, $-.321853693146683428E-06
DATA ·erfcrodataL38<> + 552(SB)/8, $0.445073894984683537E-07
DATA ·erfcrodataL38<> + 560(SB)/8, $0.660425940000555729E-06
DATA ·erfcrodataL38<> + 568(SB)/8, $2.0
DATA ·erfcrodataL38<> + 576(SB)/8, $8.63616855509444462538e-78
DATA ·erfcrodataL38<> + 584(SB)/8, $1.00000000000000222044
DATA ·erfcrodataL38<> + 592(SB)/8, $0.500000000000004237e+00
DATA ·erfcrodataL38<> + 600(SB)/8, $0.416666664838056960e-01
DATA ·erfcrodataL38<> + 608(SB)/8, $0.166666666630345592e+00
DATA ·erfcrodataL38<> + 616(SB)/8, $0.138926439368309441e-02
DATA ·erfcrodataL38<> + 624(SB)/8, $0.833349307718286047e-02
DATA ·erfcrodataL38<> + 632(SB)/8, $-.693147180558298714e+00
DATA ·erfcrodataL38<> + 640(SB)/8, $-.164659495826017651e-11
DATA ·erfcrodataL38<> + 648(SB)/8, $.179001151181866548E+00
DATA ·erfcrodataL38<> + 656(SB)/8, $-.144269504088896339e+01
DATA ·erfcrodataL38<> + 664(SB)/8, $+281475245147134.9375
DATA ·erfcrodataL38<> + 672(SB)/8, $.163116780021877404E+00
DATA ·erfcrodataL38<> + 680(SB)/8, $-.201574395828120710E-01
DATA ·erfcrodataL38<> + 688(SB)/8, $-.185726336009394125E-02
DATA ·erfcrodataL38<> + 696(SB)/8, $.199349204957273749E-02
DATA ·erfcrodataL38<> + 704(SB)/8, $-.554902415532606242E-03
DATA ·erfcrodataL38<> + 712(SB)/8, $-.638914789660242846E-05
DATA ·erfcrodataL38<> + 720(SB)/8, $-.424441522653742898E-04
DATA ·erfcrodataL38<> + 728(SB)/8, $.827967511921486190E-04
DATA ·erfcrodataL38<> + 736(SB)/8, $.913965446284062654E-05
DATA ·erfcrodataL38<> + 744(SB)/8, $.277344791076320853E-05
DATA ·erfcrodataL38<> + 752(SB)/8, $-.467239678927239526E-06
DATA ·erfcrodataL38<> + 760(SB)/8, $.344814065920419986E-07
DATA ·erfcrodataL38<> + 768(SB)/8, $-.366013491552527132E-05
DATA ·erfcrodataL38<> + 776(SB)/8, $.181242810023783439E-05
DATA ·erfcrodataL38<> + 784(SB)/8, $2.999999999991234567
DATA ·erfcrodataL38<> + 792(SB)/8, $1.0
GLOBL ·erfcrodataL38<> + 0(SB), RODATA, $800
// Table of log correction terms
DATA ·erfctab2069<> + 0(SB)/8, $0.442737824274138381e-01
DATA ·erfctab2069<> + 8(SB)/8, $0.263602189790660309e-01
DATA ·erfctab2069<> + 16(SB)/8, $0.122565642281703586e-01
DATA ·erfctab2069<> + 24(SB)/8, $0.143757052860721398e-02
DATA ·erfctab2069<> + 32(SB)/8, $-.651375034121276075e-02
DATA ·erfctab2069<> + 40(SB)/8, $-.119317678849450159e-01
DATA ·erfctab2069<> + 48(SB)/8, $-.150868749549871069e-01
DATA ·erfctab2069<> + 56(SB)/8, $-.161992609578469234e-01
DATA ·erfctab2069<> + 64(SB)/8, $-.154492360403337917e-01
DATA ·erfctab2069<> + 72(SB)/8, $-.129850717389178721e-01
DATA ·erfctab2069<> + 80(SB)/8, $-.892902649276657891e-02
DATA ·erfctab2069<> + 88(SB)/8, $-.338202636596794887e-02
DATA ·erfctab2069<> + 96(SB)/8, $0.357266307045684762e-02
DATA ·erfctab2069<> + 104(SB)/8, $0.118665304327406698e-01
DATA ·erfctab2069<> + 112(SB)/8, $0.214434994118118914e-01
DATA ·erfctab2069<> + 120(SB)/8, $0.322580645161290314e-01
GLOBL ·erfctab2069<> + 0(SB), RODATA, $128
// Erfc returns the complementary error function of the argument.
//
// Special cases are:
// Erfc(+Inf) = 0
// Erfc(-Inf) = 2
// Erfc(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
// This assembly implementation handles inputs in the range [-2.11, +15].
// For all other inputs we call the generic Go implementation.
TEXT ·erfcAsm(SB), NOSPLIT|NOFRAME, $0-16
MOVD x+0(FP), R1
MOVD $Neg2p11, R2
CMPUBGT R1, R2, usego
FMOVD x+0(FP), F0
MOVD $·erfcrodataL38<>+0(SB), R9
FMOVD F0, F2
SRAD $48, R1
MOVH R1, R2
ANDW $0x7FFF, R1
MOVH $Pos15, R3
CMPW R1, R3
BGT usego
MOVH $0x3FFF, R3
MOVW R1, R6
MOVW R3, R7
CMPBGT R6, R7, L2
MOVH $0x3FEF, R3
MOVW R3, R7
CMPBGT R6, R7, L3
MOVH $0x2FFF, R2
MOVW R2, R7
CMPBGT R6, R7, L4
FMOVD 792(R9), F0
WFSDB V2, V0, V2
FMOVD F2, ret+8(FP)
RET
L2:
LTDBR F0, F0
MOVH $0x0, R4
BLTU L3
FMOVD F0, F1
L9:
MOVH $0x400F, R3
MOVW R1, R6
MOVW R3, R7
CMPBGT R6, R7, L10
FMOVD 784(R9), F3
FSUB F1, F3
VLEG $0, 776(R9), V20
WFDDB V1, V3, V6
VLEG $0, 768(R9), V18
FMOVD 760(R9), F7
FMOVD 752(R9), F5
VLEG $0, 744(R9), V16
FMOVD 736(R9), F3
FMOVD 728(R9), F2
FMOVD 720(R9), F4
WFMDB V6, V6, V1
FMUL F0, F0
MOVH $0x0, R3
WFMADB V1, V7, V20, V7
WFMADB V1, V5, V18, V5
WFMADB V1, V7, V16, V7
WFMADB V1, V5, V3, V5
WFMADB V1, V7, V4, V7
WFMADB V1, V5, V2, V5
FMOVD 712(R9), F2
WFMADB V1, V7, V2, V7
FMOVD 704(R9), F2
WFMADB V1, V5, V2, V5
FMOVD 696(R9), F2
WFMADB V1, V7, V2, V7
FMOVD 688(R9), F2
MOVH $0x0, R1
WFMADB V1, V5, V2, V5
FMOVD 680(R9), F2
WFMADB V1, V7, V2, V7
FMOVD 672(R9), F2
WFMADB V1, V5, V2, V1
FMOVD 664(R9), F3
WFMADB V6, V7, V1, V7
FMOVD 656(R9), F5
FMOVD 648(R9), F2
WFMADB V0, V5, V3, V5
WFMADB V6, V7, V2, V7
L11:
LGDR F5, R6
WFSDB V0, V0, V2
WORD $0xED509298 //sdb %f5,.L55-.L38(%r9)
BYTE $0x00
BYTE $0x1B
FMOVD 640(R9), F6
FMOVD 632(R9), F4
WFMSDB V5, V6, V2, V6
WFMSDB V5, V4, V0, V4
FMOVD 624(R9), F2
FADD F6, F4
FMOVD 616(R9), F0
FMOVD 608(R9), F6
WFMADB V4, V0, V2, V0
FMOVD 600(R9), F3
WFMDB V4, V4, V2
MOVH R6,R6
ADD R6, R3
WFMADB V4, V3, V6, V3
FMOVD 592(R9), F6
WFMADB V0, V2, V3, V0
FMOVD 584(R9), F3
WFMADB V4, V6, V3, V6
RISBGZ $57, $60, $3, R3, R12
WFMADB V2, V0, V6, V0
MOVD $·erfctab2069<>+0(SB), R5
WORD $0x682C5000 //ld %f2,0(%r12,%r5)
FMADD F2, F4, F4
RISBGN $0, $15, $48, R3, R4
WFMADB V4, V0, V2, V4
LDGR R4, F2
FMADD F4, F2, F2
MOVW R2, R6
CMPBLE R6, $0, L20
MOVW R1, R6
CMPBEQ R6, $0, L21
WORD $0xED709240 //mdb %f7,.L66-.L38(%r9)
BYTE $0x00
BYTE $0x1C
L21:
FMUL F7, F2
L1:
FMOVD F2, ret+8(FP)
RET
L3:
LTDBR F0, F0
BLTU L30
FMOVD 568(R9), F2
WFSDB V0, V2, V0
L8:
WFMDB V0, V0, V4
FMOVD 560(R9), F2
FMOVD 552(R9), F6
FMOVD 544(R9), F1
WFMADB V4, V6, V2, V6
FMOVD 536(R9), F2
WFMADB V4, V1, V2, V1
FMOVD 528(R9), F3
FMOVD 520(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 512(R9), F3
FMOVD 504(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 496(R9), F3
FMOVD 488(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 480(R9), F3
FMOVD 472(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 464(R9), F3
FMOVD 456(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 448(R9), F3
FMOVD 440(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 432(R9), F3
FMOVD 424(R9), F2
WFMADB V4, V6, V3, V6
WFMADB V4, V1, V2, V1
FMOVD 416(R9), F3
FMOVD 408(R9), F2
WFMADB V4, V6, V3, V6
FMADD F1, F4, F2
FMADD F6, F0, F2
MOVW R2, R6
CMPBGE R6, $0, L1
FMOVD 568(R9), F0
WFSDB V2, V0, V2
BR L1
L10:
MOVH $0x401F, R3
MOVW R1, R6
MOVW R3, R7
CMPBLE R6, R7, L36
MOVH $0x402F, R3
MOVW R3, R7
CMPBGT R6, R7, L13
FMOVD 400(R9), F3
FSUB F1, F3
VLEG $0, 392(R9), V20
WFDDB V1, V3, V6
VLEG $0, 384(R9), V18
FMOVD 376(R9), F2
FMOVD 368(R9), F4
VLEG $0, 360(R9), V16
FMOVD 352(R9), F7
FMOVD 344(R9), F3
FMUL F0, F0
WFMDB V6, V6, V1
FMOVD 656(R9), F5
MOVH $0x0, R3
WFMADB V1, V2, V20, V2
WFMADB V1, V4, V18, V4
WFMADB V1, V2, V16, V2
WFMADB V1, V4, V7, V4
WFMADB V1, V2, V3, V2
FMOVD 336(R9), F3
WFMADB V1, V4, V3, V4
FMOVD 328(R9), F3
WFMADB V1, V2, V3, V2
FMOVD 320(R9), F3
WFMADB V1, V4, V3, V1
FMOVD 312(R9), F7
WFMADB V6, V2, V1, V2
MOVH $0x0, R1
FMOVD 664(R9), F3
FMADD F2, F6, F7
WFMADB V0, V5, V3, V5
BR L11
L35:
WORD $0xB3130010 //lcdbr %f1,%f0
BR L9
L36:
FMOVD 304(R9), F3
FSUB F1, F3
VLEG $0, 296(R9), V20
WFDDB V1, V3, V6
FMOVD 288(R9), F5
FMOVD 280(R9), F1
FMOVD 272(R9), F2
VLEG $0, 264(R9), V18
VLEG $0, 256(R9), V16
FMOVD 248(R9), F3
FMOVD 240(R9), F4
WFMDB V6, V6, V7
FMUL F0, F0
MOVH $0x0, R3
FMADD F5, F7, F1
WFMADB V7, V2, V20, V2
WFMADB V7, V1, V18, V1
WFMADB V7, V2, V16, V2
WFMADB V7, V1, V3, V1
WFMADB V7, V2, V4, V2
FMOVD 232(R9), F4
WFMADB V7, V1, V4, V1
FMOVD 224(R9), F4
WFMADB V7, V2, V4, V2
FMOVD 216(R9), F4
WFMADB V7, V1, V4, V1
FMOVD 208(R9), F4
MOVH $0x0, R1
WFMADB V7, V2, V4, V7
FMOVD 656(R9), F5
WFMADB V6, V1, V7, V1
FMOVD 664(R9), F3
FMOVD 200(R9), F7
WFMADB V0, V5, V3, V5
FMADD F1, F6, F7
BR L11
L4:
FMOVD 192(R9), F1
FMADD F0, F0, F1
FMOVD 184(R9), F3
WFMDB V1, V1, V0
FMOVD 176(R9), F4
FMOVD 168(R9), F6
WFMADB V0, V4, V3, V4
FMOVD 160(R9), F3
WFMADB V0, V6, V3, V6
FMOVD 152(R9), F5
FMOVD 144(R9), F3
WFMADB V0, V4, V5, V4
WFMADB V0, V6, V3, V6
FMOVD 136(R9), F5
FMOVD 128(R9), F3
WFMADB V0, V4, V5, V4
WFMADB V0, V6, V3, V6
FMOVD 120(R9), F5
FMOVD 112(R9), F3
WFMADB V0, V4, V5, V4
WFMADB V0, V6, V3, V6
FMOVD 104(R9), F5
FMOVD 96(R9), F3
WFMADB V0, V4, V5, V4
WFMADB V0, V6, V3, V0
FMOVD F2, F6
FMADD F4, F1, F0
WORD $0xED609318 //sdb %f6,.L39-.L38(%r9)
BYTE $0x00
BYTE $0x1B
WFMSDB V2, V0, V6, V2
FMOVD F2, ret+8(FP)
RET
L30:
WORD $0xED009238 //adb %f0,.L67-.L38(%r9)
BYTE $0x00
BYTE $0x1A
BR L8
L20:
FMOVD 88(R9), F0
WFMADB V7, V2, V0, V2
WORD $0xB3130022 //lcdbr %f2,%f2
FMOVD F2, ret+8(FP)
RET
L13:
MOVH $0x403A, R3
MOVW R1, R6
MOVW R3, R7
CMPBLE R6, R7, L4
WORD $0xED109050 //cdb %f1,.L128-.L38(%r9)
BYTE $0x00
BYTE $0x19
BGE L37
BVS L37
FMOVD 72(R9), F6
FSUB F1, F6
MOVH $0x1000, R3
FDIV F1, F6
MOVH $0x1000, R1
L17:
WFMDB V6, V6, V1
FMOVD 64(R9), F2
FMOVD 56(R9), F4
FMOVD 48(R9), F3
WFMADB V1, V3, V2, V3
FMOVD 40(R9), F2
WFMADB V1, V2, V4, V2
FMOVD 32(R9), F4
WFMADB V1, V3, V4, V3
FMOVD 24(R9), F4
WFMADB V1, V2, V4, V2
FMOVD 16(R9), F4
WFMADB V1, V3, V4, V3
FMOVD 8(R9), F4
WFMADB V1, V2, V4, V1
FMUL F0, F0
WFMADB V3, V6, V1, V3
FMOVD 656(R9), F5
FMOVD 664(R9), F4
FMOVD 0(R9), F7
WFMADB V0, V5, V4, V5
FMADD F6, F3, F7
BR L11
L14:
FMOVD 72(R9), F6
FSUB F1, F6
MOVH $0x403A, R3
FDIV F1, F6
MOVW R1, R6
MOVW R3, R7
CMPBEQ R6, R7, L23
MOVH $0x0, R3
MOVH $0x0, R1
BR L17
L37:
WFCEDBS V0, V0, V0
BVS L1
MOVW R2, R6
CMPBLE R6, $0, L18
MOVH $0x7FEF, R2
MOVW R1, R6
MOVW R2, R7
CMPBGT R6, R7, L24
WORD $0xA5400010 //iihh %r4,16
LDGR R4, F2
FMUL F2, F2
BR L1
L23:
MOVH $0x1000, R3
MOVH $0x1000, R1
BR L17
L24:
FMOVD $0, F2
BR L1
L18:
MOVH $0x7FEF, R2
MOVW R1, R6
MOVW R2, R7
CMPBGT R6, R7, L25
WORD $0xA5408010 //iihh %r4,32784
FMOVD 568(R9), F2
LDGR R4, F0
FMADD F2, F0, F2
BR L1
L25:
FMOVD 568(R9), F2
BR L1
usego:
BR ·erfc(SB)