blob: 20d9fe6f55cfda3abb93fa0b490d38a8e4eed236 [file] [log] [blame]
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
#include "textflag.h"
// Minimax polynomial coefficients and other constants
DATA ·cbrtrodataL9<> + 0(SB)/8, $-.00016272731015974436E+00
DATA ·cbrtrodataL9<> + 8(SB)/8, $0.66639548758285293179E+00
DATA ·cbrtrodataL9<> + 16(SB)/8, $0.55519402697349815993E+00
DATA ·cbrtrodataL9<> + 24(SB)/8, $0.49338566048766782004E+00
DATA ·cbrtrodataL9<> + 32(SB)/8, $0.45208160036325611486E+00
DATA ·cbrtrodataL9<> + 40(SB)/8, $0.43099892837778637816E+00
DATA ·cbrtrodataL9<> + 48(SB)/8, $1.000244140625
DATA ·cbrtrodataL9<> + 56(SB)/8, $0.33333333333333333333E+00
DATA ·cbrtrodataL9<> + 64(SB)/8, $79228162514264337593543950336.
GLOBL ·cbrtrodataL9<> + 0(SB), RODATA, $72
// Index tables
DATA ·cbrttab32069<> + 0(SB)/8, $0x404030303020202
DATA ·cbrttab32069<> + 8(SB)/8, $0x101010101000000
DATA ·cbrttab32069<> + 16(SB)/8, $0x808070706060605
DATA ·cbrttab32069<> + 24(SB)/8, $0x505040404040303
DATA ·cbrttab32069<> + 32(SB)/8, $0xe0d0c0c0b0b0b0a
DATA ·cbrttab32069<> + 40(SB)/8, $0xa09090908080808
DATA ·cbrttab32069<> + 48(SB)/8, $0x11111010100f0f0f
DATA ·cbrttab32069<> + 56(SB)/8, $0xe0e0e0e0e0d0d0d
DATA ·cbrttab32069<> + 64(SB)/8, $0x1515141413131312
DATA ·cbrttab32069<> + 72(SB)/8, $0x1212111111111010
GLOBL ·cbrttab32069<> + 0(SB), RODATA, $80
DATA ·cbrttab22068<> + 0(SB)/8, $0x151015001420141
DATA ·cbrttab22068<> + 8(SB)/8, $0x140013201310130
DATA ·cbrttab22068<> + 16(SB)/8, $0x122012101200112
DATA ·cbrttab22068<> + 24(SB)/8, $0x111011001020101
DATA ·cbrttab22068<> + 32(SB)/8, $0x10000f200f100f0
DATA ·cbrttab22068<> + 40(SB)/8, $0xe200e100e000d2
DATA ·cbrttab22068<> + 48(SB)/8, $0xd100d000c200c1
DATA ·cbrttab22068<> + 56(SB)/8, $0xc000b200b100b0
DATA ·cbrttab22068<> + 64(SB)/8, $0xa200a100a00092
DATA ·cbrttab22068<> + 72(SB)/8, $0x91009000820081
DATA ·cbrttab22068<> + 80(SB)/8, $0x80007200710070
DATA ·cbrttab22068<> + 88(SB)/8, $0x62006100600052
DATA ·cbrttab22068<> + 96(SB)/8, $0x51005000420041
DATA ·cbrttab22068<> + 104(SB)/8, $0x40003200310030
DATA ·cbrttab22068<> + 112(SB)/8, $0x22002100200012
DATA ·cbrttab22068<> + 120(SB)/8, $0x11001000020001
GLOBL ·cbrttab22068<> + 0(SB), RODATA, $128
DATA ·cbrttab12067<> + 0(SB)/8, $0x53e1529051324fe1
DATA ·cbrttab12067<> + 8(SB)/8, $0x4e904d324be14a90
DATA ·cbrttab12067<> + 16(SB)/8, $0x493247e146904532
DATA ·cbrttab12067<> + 24(SB)/8, $0x43e1429041323fe1
DATA ·cbrttab12067<> + 32(SB)/8, $0x3e903d323be13a90
DATA ·cbrttab12067<> + 40(SB)/8, $0x393237e136903532
DATA ·cbrttab12067<> + 48(SB)/8, $0x33e1329031322fe1
DATA ·cbrttab12067<> + 56(SB)/8, $0x2e902d322be12a90
DATA ·cbrttab12067<> + 64(SB)/8, $0xd3e1d290d132cfe1
DATA ·cbrttab12067<> + 72(SB)/8, $0xce90cd32cbe1ca90
DATA ·cbrttab12067<> + 80(SB)/8, $0xc932c7e1c690c532
DATA ·cbrttab12067<> + 88(SB)/8, $0xc3e1c290c132bfe1
DATA ·cbrttab12067<> + 96(SB)/8, $0xbe90bd32bbe1ba90
DATA ·cbrttab12067<> + 104(SB)/8, $0xb932b7e1b690b532
DATA ·cbrttab12067<> + 112(SB)/8, $0xb3e1b290b132afe1
DATA ·cbrttab12067<> + 120(SB)/8, $0xae90ad32abe1aa90
GLOBL ·cbrttab12067<> + 0(SB), RODATA, $128
// Cbrt returns the cube root of the argument.
//
// Special cases are:
// Cbrt(±0) = ±0
// Cbrt(±Inf) = ±Inf
// Cbrt(NaN) = NaN
// The algorithm used is minimax polynomial approximation
// with coefficients determined with a Remez exchange algorithm.
TEXT ·cbrtAsm(SB), NOSPLIT, $0-16
FMOVD x+0(FP), F0
MOVD $·cbrtrodataL9<>+0(SB), R9
WORD $0xB3CD0020 //lgdr %r2, %f0
WORD $0xC039000F //iilf %r3,1048575
BYTE $0xFF
BYTE $0xFF
SRAD $32, R2
WORD $0xB9170012 //llgtr %r1,%r2
MOVW R1, R6
MOVW R3, R7
CMPBLE R6, R7, L2
WORD $0xC0397FEF //iilf %r3,2146435071
BYTE $0xFF
BYTE $0xFF
MOVW R3, R7
CMPBLE R6, R7, L8
L1:
FMOVD F0, ret+8(FP)
RET
L3:
L2:
WORD $0xB3120000 //ltdbr %f0,%f0
BEQ L1
FMOVD F0, F2
WORD $0xED209040 //mdb %f2,.L10-.L9(%r9)
BYTE $0x00
BYTE $0x1C
MOVH $0x200, R4
WORD $0xB3CD0022 //lgdr %r2, %f2
SRAD $32, R2
L4:
WORD $0xEC3239BE //risbg %r3,%r2,57,128+62,64-25
BYTE $0x27
BYTE $0x55
MOVD $·cbrttab12067<>+0(SB), R1
WORD $0x48131000 //lh %r1,0(%r3,%r1)
WORD $0xEC3239BE //risbg %r3,%r2,57,128+62,64-19
BYTE $0x2D
BYTE $0x55
MOVD $·cbrttab22068<>+0(SB), R5
WORD $0xEC223CBF //risbgn %r2,%r2,64-4,128+63,64+44+4
BYTE $0x70
BYTE $0x59
WORD $0x4A135000 //ah %r1,0(%r3,%r5)
BYTE $0x18 //lr %r3,%r1
BYTE $0x31
MOVD $·cbrttab32069<>+0(SB), R1
FMOVD 56(R9), F1
FMOVD 48(R9), F5
WORD $0xEC23393B //rosbg %r2,%r3,57,59,4
BYTE $0x04
BYTE $0x56
WORD $0xE3121000 //llc %r1,0(%r2,%r1)
BYTE $0x00
BYTE $0x94
ADDW R3, R1
ADDW R4, R1
SLW $16, R1, R1
SLD $32, R1, R1
WORD $0xB3C10021 //ldgr %f2,%r1
WFMDB V2, V2, V4
WFMDB V4, V0, V6
WFMSDB V4, V6, V2, V4
FMOVD 40(R9), F6
FMSUB F1, F4, F2
FMOVD 32(R9), F4
WFMDB V2, V2, V3
FMOVD 24(R9), F1
FMUL F3, F0
FMOVD 16(R9), F3
WFMADB V2, V0, V5, V2
FMOVD 8(R9), F5
FMADD F6, F2, F4
WFMADB V2, V1, V3, V1
WFMDB V2, V2, V6
FMOVD 0(R9), F3
WFMADB V4, V6, V1, V4
WFMADB V2, V5, V3, V2
FMADD F4, F6, F2
FMADD F2, F0, F0
FMOVD F0, ret+8(FP)
RET
L8:
MOVH $0x0, R4
BR L4