math: implement trignometric range reduction for huge arguments
This change implements Payne-Hanek range reduction by Pi/4
to properly calculate trigonometric functions of huge arguments.
The implementation is based on:
"ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
K. C. Ng et al, March 24, 1992
The major difference with the reference is that the simulated
multi-precision calculation of x*B is implemented using 64-bit
integer arithmetic rather than floating point to ease extraction
of the relevant bits of 4/Pi.
The assembly implementations for 386 were removed since the trigonometric
instructions only use a 66-bit representation of Pi internally for
reduction. It is not possible to use these instructions and maintain
accuracy without a prior accurate reduction in software as recommended
by Intel.
Fixes #6794
Change-Id: I31bf1369e0578891d738c5473447fe9b10560196
Reviewed-on: https://go-review.googlesource.com/c/153059
Reviewed-by: Robert Griesemer <gri@golang.org>
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
diff --git a/src/cmd/go/go_test.go b/src/cmd/go/go_test.go
index d16ab3d..e2ddc58 100644
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@@ -1771,11 +1771,11 @@
if runtime.Compiler != "gccgo" {
// Check the list is in dependency order.
tg.run("list", "-deps", "math")
- want := "internal/cpu\nunsafe\nmath\n"
+ want := "internal/cpu\nunsafe\nmath/bits\nmath\n"
out := tg.stdout.String()
if !strings.Contains(out, "internal/cpu") {
// Some systems don't use internal/cpu.
- want = "unsafe\nmath\n"
+ want = "unsafe\nmath/bits\nmath\n"
}
if tg.stdout.String() != want {
t.Fatalf("list -deps math: wrong order\nhave %q\nwant %q", tg.stdout.String(), want)
diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go
index 3a70991..2c29a3e 100644
--- a/src/go/build/deps_test.go
+++ b/src/go/build/deps_test.go
@@ -61,7 +61,7 @@
// L1 adds simple functions and strings processing,
// but not Unicode tables.
- "math": {"internal/cpu", "unsafe"},
+ "math": {"internal/cpu", "unsafe", "math/bits"},
"math/bits": {"unsafe"},
"math/cmplx": {"math"},
"math/rand": {"L0", "math"},
diff --git a/src/math/all_test.go b/src/math/all_test.go
index 6a6d8bf..5716048 100644
--- a/src/math/all_test.go
+++ b/src/math/all_test.go
@@ -175,6 +175,48 @@
-2.51772931436786954751e-01,
-7.3924135157173099849e-01,
}
+
+// Inputs to test trig_reduce
+var trigHuge = []float64{
+ 1 << 120,
+ 1 << 240,
+ 1 << 480,
+ 1234567891234567 << 180,
+ 1234567891234567 << 300,
+ MaxFloat64,
+}
+
+// Results for trigHuge[i] calculated with https://github.com/robpike/ivy
+// using 4096 bits of working precision. Values requiring less than
+// 102 decimal digits (1 << 120, 1 << 240, 1 << 480, 1234567891234567 << 180)
+// were confirmed via https://keisan.casio.com/
+var cosHuge = []float64{
+ -0.92587902285483787,
+ 0.93601042593353793,
+ -0.28282777640193788,
+ -0.14616431394103619,
+ -0.79456058210671406,
+ -0.99998768942655994,
+}
+
+var sinHuge = []float64{
+ 0.37782010936075202,
+ -0.35197227524865778,
+ 0.95917070894368716,
+ 0.98926032637023618,
+ -0.60718488235646949,
+ 0.00496195478918406,
+}
+
+var tanHuge = []float64{
+ -0.40806638884180424,
+ -0.37603456702698076,
+ -3.39135965054779932,
+ -6.76813854009065030,
+ 0.76417695016604922,
+ -0.00496201587444489,
+}
+
var cosh = []float64{
7.2668796942212842775517446e+01,
1.1479413465659254502011135e+03,
@@ -3026,6 +3068,84 @@
}
}
+// Check that trigReduce matches the standard reduction results for input values
+// below reduceThreshold.
+func TestTrigReduce(t *testing.T) {
+ inputs := make([]float64, len(vf))
+ // all of the standard inputs
+ copy(inputs, vf)
+ // all of the large inputs
+ large := float64(100000 * Pi)
+ for _, v := range vf {
+ inputs = append(inputs, v+large)
+ }
+ // Also test some special inputs, Pi and right below the reduceThreshold
+ inputs = append(inputs, Pi, Nextafter(ReduceThreshold, 0))
+ for _, x := range inputs {
+ // reduce the value to compare
+ j, z := TrigReduce(x)
+ xred := float64(j)*(Pi/4) + z
+
+ if f, fred := Sin(x), Sin(xred); !close(f, fred) {
+ t.Errorf("Sin(trigReduce(%g)) != Sin(%g), got %g, want %g", x, x, fred, f)
+ }
+ if f, fred := Cos(x), Cos(xred); !close(f, fred) {
+ t.Errorf("Cos(trigReduce(%g)) != Cos(%g), got %g, want %g", x, x, fred, f)
+ }
+ if f, fred := Tan(x), Tan(xred); !close(f, fred) {
+ t.Errorf(" Tan(trigReduce(%g)) != Tan(%g), got %g, want %g", x, x, fred, f)
+ }
+ f, g := Sincos(x)
+ fred, gred := Sincos(xred)
+ if !close(f, fred) || !close(g, gred) {
+ t.Errorf(" Sincos(trigReduce(%g)) != Sincos(%g), got %g, %g, want %g, %g", x, x, fred, gred, f, g)
+ }
+ }
+}
+
+// Check that trig values of huge angles return accurate results.
+// This confirms that argument reduction works for very large values
+// up to MaxFloat64.
+func TestHugeCos(t *testing.T) {
+ for i := 0; i < len(trigHuge); i++ {
+ f1 := cosHuge[i]
+ f2 := Cos(trigHuge[i])
+ if !close(f1, f2) {
+ t.Errorf("Cos(%g) = %g, want %g", trigHuge[i], f2, f1)
+ }
+ }
+}
+
+func TestHugeSin(t *testing.T) {
+ for i := 0; i < len(trigHuge); i++ {
+ f1 := sinHuge[i]
+ f2 := Sin(trigHuge[i])
+ if !close(f1, f2) {
+ t.Errorf("Sin(%g) = %g, want %g", trigHuge[i], f2, f1)
+ }
+ }
+}
+
+func TestHugeSinCos(t *testing.T) {
+ for i := 0; i < len(trigHuge); i++ {
+ f1, g1 := sinHuge[i], cosHuge[i]
+ f2, g2 := Sincos(trigHuge[i])
+ if !close(f1, f2) || !close(g1, g2) {
+ t.Errorf("Sincos(%g) = %g, %g, want %g, %g", trigHuge[i], f2, g2, f1, g1)
+ }
+ }
+}
+
+func TestHugeTan(t *testing.T) {
+ for i := 0; i < len(trigHuge); i++ {
+ f1 := tanHuge[i]
+ f2 := Tan(trigHuge[i])
+ if !close(f1, f2) {
+ t.Errorf("Tan(%g) = %g, want %g", trigHuge[i], f2, f1)
+ }
+ }
+}
+
// Check that math constants are accepted by compiler
// and have right value (assumes strconv.ParseFloat works).
// https://golang.org/issue/201
diff --git a/src/math/export_test.go b/src/math/export_test.go
index 368308e..5f15bdb 100644
--- a/src/math/export_test.go
+++ b/src/math/export_test.go
@@ -9,3 +9,5 @@
var Exp2Go = exp2
var HypotGo = hypot
var SqrtGo = sqrt
+var ReduceThreshold = reduceThreshold
+var TrigReduce = trigReduce
diff --git a/src/math/sin.go b/src/math/sin.go
index 929cac3..cc8b136 100644
--- a/src/math/sin.go
+++ b/src/math/sin.go
@@ -118,10 +118,9 @@
func cos(x float64) float64 {
const (
- PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
- PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
- PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
- M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi
+ PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
+ PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
+ PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
)
// special cases
switch {
@@ -133,15 +132,23 @@
sign := false
x = Abs(x)
- j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle
- y := float64(j) // integer part of x/(Pi/4), as float
+ var j uint64
+ var y, z float64
+ if x >= reduceThreshold {
+ j, z = trigReduce(x)
+ } else {
+ j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+ y = float64(j) // integer part of x/(Pi/4), as float
- // map zeros to origin
- if j&1 == 1 {
- j++
- y++
+ // map zeros to origin
+ if j&1 == 1 {
+ j++
+ y++
+ }
+ j &= 7 // octant modulo 2Pi radians (360 degrees)
+ z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
}
- j &= 7 // octant modulo 2Pi radians (360 degrees)
+
if j > 3 {
j -= 4
sign = !sign
@@ -150,7 +157,6 @@
sign = !sign
}
- z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
zz := z * z
if j == 1 || j == 2 {
y = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
@@ -173,10 +179,9 @@
func sin(x float64) float64 {
const (
- PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
- PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
- PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
- M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi
+ PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
+ PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
+ PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
)
// special cases
switch {
@@ -193,22 +198,27 @@
sign = true
}
- j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle
- y := float64(j) // integer part of x/(Pi/4), as float
+ var j uint64
+ var y, z float64
+ if x >= reduceThreshold {
+ j, z = trigReduce(x)
+ } else {
+ j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+ y = float64(j) // integer part of x/(Pi/4), as float
- // map zeros to origin
- if j&1 == 1 {
- j++
- y++
+ // map zeros to origin
+ if j&1 == 1 {
+ j++
+ y++
+ }
+ j &= 7 // octant modulo 2Pi radians (360 degrees)
+ z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
}
- j &= 7 // octant modulo 2Pi radians (360 degrees)
// reflect in x axis
if j > 3 {
sign = !sign
j -= 4
}
-
- z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
zz := z * z
if j == 1 || j == 2 {
y = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
diff --git a/src/math/sin_386.s b/src/math/sin_386.s
index 45d12e0..cf7679d 100644
--- a/src/math/sin_386.s
+++ b/src/math/sin_386.s
@@ -6,42 +6,8 @@
// func Cos(x float64) float64
TEXT ·Cos(SB),NOSPLIT,$0
- FMOVD x+0(FP), F0 // F0=x
- FCOS // F0=cos(x) if -2**63 < x < 2**63
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE 3(PC) // jump if x outside range
- FMOVDP F0, ret+8(FP)
- RET
- FLDPI // F0=Pi, F1=x
- FADDD F0, F0 // F0=2*Pi, F1=x
- FXCHD F0, F1 // F0=x, F1=2*Pi
- FPREM1 // F0=reduced_x, F1=2*Pi
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE -3(PC) // jump if reduction incomplete
- FMOVDP F0, F1 // F0=reduced_x
- FCOS // F0=cos(reduced_x)
- FMOVDP F0, ret+8(FP)
- RET
+ JMP ·cos(SB)
// func Sin(x float64) float64
TEXT ·Sin(SB),NOSPLIT,$0
- FMOVD x+0(FP), F0 // F0=x
- FSIN // F0=sin(x) if -2**63 < x < 2**63
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE 3(PC) // jump if x outside range
- FMOVDP F0, ret+8(FP)
- RET
- FLDPI // F0=Pi, F1=x
- FADDD F0, F0 // F0=2*Pi, F1=x
- FXCHD F0, F1 // F0=x, F1=2*Pi
- FPREM1 // F0=reduced_x, F1=2*Pi
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE -3(PC) // jump if reduction incomplete
- FMOVDP F0, F1 // F0=reduced_x
- FSIN // F0=sin(reduced_x)
- FMOVDP F0, ret+8(FP)
- RET
+ JMP ·sin(SB)
diff --git a/src/math/sincos.go b/src/math/sincos.go
index 3ae193a..c002db6 100644
--- a/src/math/sincos.go
+++ b/src/math/sincos.go
@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
-// +build !386
-
package math
// Coefficients _sin[] and _cos[] are found in pkg/math/sin.go.
@@ -16,10 +14,9 @@
// Sincos(NaN) = NaN, NaN
func Sincos(x float64) (sin, cos float64) {
const (
- PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
- PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
- PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
- M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi
+ PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
+ PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
+ PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
)
// special cases
switch {
@@ -36,14 +33,21 @@
sinSign = true
}
- j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle
- y := float64(j) // integer part of x/(Pi/4), as float
+ var j uint64
+ var y, z float64
+ if x >= reduceThreshold {
+ j, z = trigReduce(x)
+ } else {
+ j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+ y = float64(j) // integer part of x/(Pi/4), as float
- if j&1 == 1 { // map zeros to origin
- j++
- y++
+ if j&1 == 1 { // map zeros to origin
+ j++
+ y++
+ }
+ j &= 7 // octant modulo 2Pi radians (360 degrees)
+ z = ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
}
- j &= 7 // octant modulo 2Pi radians (360 degrees)
if j > 3 { // reflect in x axis
j -= 4
sinSign, cosSign = !sinSign, !cosSign
@@ -52,7 +56,6 @@
cosSign = !cosSign
}
- z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
zz := z * z
cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
diff --git a/src/math/sincos_386.go b/src/math/sincos_386.go
deleted file mode 100644
index 38bb050..0000000
--- a/src/math/sincos_386.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package math
-
-// Sincos returns Sin(x), Cos(x).
-//
-// Special cases are:
-// Sincos(±0) = ±0, 1
-// Sincos(±Inf) = NaN, NaN
-// Sincos(NaN) = NaN, NaN
-func Sincos(x float64) (sin, cos float64)
diff --git a/src/math/sincos_386.s b/src/math/sincos_386.s
deleted file mode 100644
index f700a4f..0000000
--- a/src/math/sincos_386.s
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-// func Sincos(x float64) (sin, cos float64)
-TEXT ·Sincos(SB),NOSPLIT,$0
- FMOVD x+0(FP), F0 // F0=x
- FSINCOS // F0=cos(x), F1=sin(x) if -2**63 < x < 2**63
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE 4(PC) // jump if x outside range
- FMOVDP F0, cos+16(FP) // F0=sin(x)
- FMOVDP F0, sin+8(FP)
- RET
- FLDPI // F0=Pi, F1=x
- FADDD F0, F0 // F0=2*Pi, F1=x
- FXCHD F0, F1 // F0=x, F1=2*Pi
- FPREM1 // F0=reduced_x, F1=2*Pi
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE -3(PC) // jump if reduction incomplete
- FMOVDP F0, F1 // F0=reduced_x
- FSINCOS // F0=cos(reduced_x), F1=sin(reduced_x)
- FMOVDP F0, cos+16(FP) // F0=sin(reduced_x)
- FMOVDP F0, sin+8(FP)
- RET
diff --git a/src/math/tan.go b/src/math/tan.go
index aa2fb37..0d5394c 100644
--- a/src/math/tan.go
+++ b/src/math/tan.go
@@ -83,10 +83,9 @@
func tan(x float64) float64 {
const (
- PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
- PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
- PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
- M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi
+ PI4A = 7.85398125648498535156E-1 // 0x3fe921fb40000000, Pi/4 split into three parts
+ PI4B = 3.77489470793079817668E-8 // 0x3e64442d00000000,
+ PI4C = 2.69515142907905952645E-15 // 0x3ce8469898cc5170,
)
// special cases
switch {
@@ -102,17 +101,22 @@
x = -x
sign = true
}
+ var j uint64
+ var y, z float64
+ if x >= reduceThreshold {
+ j, z = trigReduce(x)
+ } else {
+ j = uint64(x * (4 / Pi)) // integer part of x/(Pi/4), as integer for tests on the phase angle
+ y = float64(j) // integer part of x/(Pi/4), as float
- j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle
- y := float64(j) // integer part of x/(Pi/4), as float
+ /* map zeros and singularities to origin */
+ if j&1 == 1 {
+ j++
+ y++
+ }
- /* map zeros and singularities to origin */
- if j&1 == 1 {
- j++
- y++
+ z = ((x - y*PI4A) - y*PI4B) - y*PI4C
}
-
- z := ((x - y*PI4A) - y*PI4B) - y*PI4C
zz := z * z
if zz > 1e-14 {
diff --git a/src/math/tan_386.s b/src/math/tan_386.s
index cb65a3f..4e44c26 100644
--- a/src/math/tan_386.s
+++ b/src/math/tan_386.s
@@ -6,23 +6,4 @@
// func Tan(x float64) float64
TEXT ·Tan(SB),NOSPLIT,$0
- FMOVD x+0(FP), F0 // F0=x
- FPTAN // F0=1, F1=tan(x) if -2**63 < x < 2**63
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE 4(PC) // jump if x outside range
- FMOVDP F0, F0 // F0=tan(x)
- FMOVDP F0, ret+8(FP)
- RET
- FLDPI // F0=Pi, F1=x
- FADDD F0, F0 // F0=2*Pi, F1=x
- FXCHD F0, F1 // F0=x, F1=2*Pi
- FPREM1 // F0=reduced_x, F1=2*Pi
- FSTSW AX // AX=status word
- ANDW $0x0400, AX
- JNE -3(PC) // jump if reduction incomplete
- FMOVDP F0, F1 // F0=reduced_x
- FPTAN // F0=1, F1=tan(reduced_x)
- FMOVDP F0, F0 // F0=tan(reduced_x)
- FMOVDP F0, ret+8(FP)
- RET
+ JMP ·tan(SB)
diff --git a/src/math/trig_reduce.go b/src/math/trig_reduce.go
new file mode 100644
index 0000000..7bc72e9
--- /dev/null
+++ b/src/math/trig_reduce.go
@@ -0,0 +1,94 @@
+// Copyright 2018 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package math
+
+import (
+ "math/bits"
+)
+
+// reduceThreshold is the maximum value where the reduction using Pi/4
+// in 3 float64 parts still gives accurate results. Above this
+// threshold Payne-Hanek range reduction must be used.
+const reduceThreshold = (1 << 52) / (4 / Pi)
+
+// trigReduce implements Payne-Hanek range reduction by Pi/4
+// for x > 0. It returns the integer part mod 8 (j) and
+// the fractional part (z) of x / (Pi/4).
+// The implementation is based on:
+// "ARGUMENT REDUCTION FOR HUGE ARGUMENTS: Good to the Last Bit"
+// K. C. Ng et al, March 24, 1992
+// The simulated multi-precision calculation of x*B uses 64-bit integer arithmetic.
+func trigReduce(x float64) (j uint64, z float64) {
+ const PI4 = Pi / 4
+ if x < PI4 {
+ return 0, x
+ }
+ // Extract out the integer and exponent such that,
+ // x = ix * 2 ** exp.
+ ix := Float64bits(x)
+ exp := int(ix>>shift&mask) - bias - shift
+ ix &^= mask << shift
+ ix |= 1 << shift
+ // Use the exponent to extract the 3 appropriate uint64 digits from mPi4,
+ // B ~ (z0, z1, z2), such that the product leading digit has the exponent -61.
+ // Note, exp >= -53 since x >= PI4 and exp < 971 for maximum float64.
+ digit, bitshift := uint(exp+61)/64, uint(exp+61)%64
+ z0 := (mPi4[digit] << bitshift) | (mPi4[digit+1] >> (64 - bitshift))
+ z1 := (mPi4[digit+1] << bitshift) | (mPi4[digit+2] >> (64 - bitshift))
+ z2 := (mPi4[digit+2] << bitshift) | (mPi4[digit+3] >> (64 - bitshift))
+ // Multiply mantissa by the digits and extract the upper two digits (hi, lo).
+ z2hi, _ := bits.Mul64(z2, ix)
+ z1hi, z1lo := bits.Mul64(z1, ix)
+ z0lo := z0 * ix
+ lo, c := bits.Add64(z1lo, z2hi, 0)
+ hi, _ := bits.Add64(z0lo, z1hi, c)
+ // The top 3 bits are j.
+ j = hi >> 61
+ // Extract the fraction and find its magnitude.
+ hi = hi<<3 | lo>>61
+ lz := uint(bits.LeadingZeros64(hi))
+ e := uint64(bias - (lz + 1))
+ // Clear implicit mantissa bit and shift into place.
+ hi = (hi << (lz + 1)) | (lo >> (64 - (lz + 1)))
+ hi >>= 64 - shift
+ // Include the exponent and convert to a float.
+ hi |= e << shift
+ z = Float64frombits(hi)
+ // Map zeros to origin.
+ if j&1 == 1 {
+ j++
+ j &= 7
+ z--
+ }
+ // Multiply the fractional part by pi/4.
+ return j, z * PI4
+}
+
+// mPi4 is the binary digits of 4/pi as a uint64 array,
+// that is, 4/pi = Sum mPi4[i]*2^(-64*i)
+// 19 64-bit digits gives 1153 bits of precision to handle
+// the largest possible float64 exponent.
+var mPi4 = [...]uint64{
+ 0x0000000000000001,
+ 0x45f306dc9c882a53,
+ 0xf84eafa3ea69bb81,
+ 0xb6c52b3278872083,
+ 0xfca2c757bd778ac3,
+ 0x6e48dc74849ba5c0,
+ 0x0c925dd413a32439,
+ 0xfc3bd63962534e7d,
+ 0xd1046bea5d768909,
+ 0xd338e04d68befc82,
+ 0x7323ac7306a673e9,
+ 0x3908bf177bf25076,
+ 0x3ff12fffbc0b301f,
+ 0xde5e2316b414da3e,
+ 0xda6cfd9e4f96136e,
+ 0x9e8c7ecd3cbfd45a,
+ 0xea4f758fd7cbe2f6,
+ 0x7a0e73ef14a525d4,
+ 0xd7f6bf623f1aba10,
+ 0xac06608df8f6d757,
+}