math: faster Sincos

Sincos via sincos.go is 35.4 ns/op, via sincos_amd64.s is 37.4 ns/op on 2.53 GHz Intel Core 2 Duo (Mac OS X).

R=rsc, golang-dev
CC=golang-dev
https://golang.org/cl/5447045
diff --git a/src/pkg/math/Makefile b/src/pkg/math/Makefile
index 5472dc1..ad012b64c 100644
--- a/src/pkg/math/Makefile
+++ b/src/pkg/math/Makefile
@@ -15,7 +15,6 @@
 	exp_amd64.$O\
 	hypot_amd64.$O\
 	log_amd64.$O\
-	sincos_amd64.$O\
 	sqrt_amd64.$O\
 
 OFILES_386=\
diff --git a/src/pkg/math/sincos.go b/src/pkg/math/sincos.go
index e8261bc..f5412fd 100644
--- a/src/pkg/math/sincos.go
+++ b/src/pkg/math/sincos.go
@@ -4,9 +4,66 @@
 
 package math
 
+// Coefficients _sin[] and _cos[] are found in pkg/math/sin.go.
+
 // Sincos(x) returns Sin(x), Cos(x).
 //
 // Special conditions are:
+//	Sincos(±0) = ±0, 1
 //	Sincos(±Inf) = NaN, NaN
 //	Sincos(NaN) = NaN, NaN
-func Sincos(x float64) (sin, cos float64) { return Sin(x), Cos(x) }
+func Sincos(x float64) (sin, cos float64) {
+	const (
+		PI4A = 7.85398125648498535156E-1                             // 0x3fe921fb40000000, Pi/4 split into three parts
+		PI4B = 3.77489470793079817668E-8                             // 0x3e64442d00000000,
+		PI4C = 2.69515142907905952645E-15                            // 0x3ce8469898cc5170,
+		M4PI = 1.273239544735162542821171882678754627704620361328125 // 4/pi
+	)
+	// TODO(rsc): Remove manual inlining of IsNaN, IsInf
+	// when compiler does it for us
+	// special cases
+	switch {
+	case x == 0:
+		return x, 1 // return ±0.0, 1.0
+	case x != x || x < -MaxFloat64 || x > MaxFloat64: // IsNaN(x) || IsInf(x, 0):
+		return NaN(), NaN()
+	}
+
+	// make argument positive
+	sinSign, cosSign := false, false
+	if x < 0 {
+		x = -x
+		sinSign = true
+	}
+
+	j := int64(x * M4PI) // integer part of x/(Pi/4), as integer for tests on the phase angle
+	y := float64(j)      // integer part of x/(Pi/4), as float
+
+	if j&1 == 1 { // map zeros to origin
+		j += 1
+		y += 1
+	}
+	j &= 7     // octant modulo 2Pi radians (360 degrees)
+	if j > 3 { // reflect in x axis
+		j -= 4
+		sinSign, cosSign = !sinSign, !cosSign
+	}
+	if j > 1 {
+		cosSign = !cosSign
+	}
+
+	z := ((x - y*PI4A) - y*PI4B) - y*PI4C // Extended precision modular arithmetic
+	zz := z * z
+	cos = 1.0 - 0.5*zz + zz*zz*((((((_cos[0]*zz)+_cos[1])*zz+_cos[2])*zz+_cos[3])*zz+_cos[4])*zz+_cos[5])
+	sin = z + z*zz*((((((_sin[0]*zz)+_sin[1])*zz+_sin[2])*zz+_sin[3])*zz+_sin[4])*zz+_sin[5])
+	if j == 1 || j == 2 {
+		sin, cos = cos, sin
+	}
+	if cosSign {
+		cos = -cos
+	}
+	if sinSign {
+		sin = -sin
+	}
+	return
+}