cmd/internal/gc: use hardware instruction for math.Sqrt (amd64/arm)

I first prototyped this change in Sept 2011, and I discarded it
because it made no difference in the obvious benchmark loop.
It still makes no difference in the obvious benchmark loop,
but in a less obvious one, doing some extra computation
around the calls to Sqrt, not making the call does have a
significant effect.

benchmark                 old ns/op     new ns/op     delta
BenchmarkSqrt             4.56          4.57          +0.22%
BenchmarkSqrtIndirect     4.56          4.56          +0.00%
BenchmarkSqrtGo           69.4          69.4          +0.00%
BenchmarkSqrtPrime        4417          3647          -17.43%

This is a warmup for using hardware expansions for some
calls to 1-line assembly routines in the runtime (for example getg).

Change-Id: Ie66be23f8c09d0f7dc4ddd7ca8a93cfce28f55a4
Reviewed-on: https://go-review.googlesource.com/8356
Reviewed-by: Rob Pike <r@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/src/math/all_test.go b/src/math/all_test.go
index c07ac74..84061be 100644
--- a/src/math/all_test.go
+++ b/src/math/all_test.go
@@ -2977,15 +2977,56 @@
 	}
 }
 
+var Global float64
+
 func BenchmarkSqrt(b *testing.B) {
+	x, y := 0.0, 10.0
 	for i := 0; i < b.N; i++ {
-		Sqrt(10)
+		x += Sqrt(y)
 	}
+	Global = x
+}
+
+func BenchmarkSqrtIndirect(b *testing.B) {
+	x, y := 0.0, 10.0
+	f := Sqrt
+	for i := 0; i < b.N; i++ {
+		x += f(y)
+	}
+	Global = x
 }
 
 func BenchmarkSqrtGo(b *testing.B) {
+	x, y := 0.0, 10.0
 	for i := 0; i < b.N; i++ {
-		SqrtGo(10)
+		x += SqrtGo(y)
+	}
+	Global = x
+}
+
+func isPrime(i int) bool {
+	// Yes, this is a dumb way to write this code,
+	// but calling Sqrt repeatedly in this way demonstrates
+	// the benefit of using a direct SQRT instruction on systems
+	// that have one, whereas the obvious loop seems not to
+	// demonstrate such a benefit.
+	for j := 2; float64(j) <= Sqrt(float64(i)); j++ {
+		if i%j == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func BenchmarkSqrtPrime(b *testing.B) {
+	any := false
+	for i := 0; i < b.N; i++ {
+		if isPrime(100003) {
+			any = true
+		}
+	}
+	if any {
+		Global = 1
 	}
 }