cmd/internal/gc: use hardware instruction for math.Sqrt (amd64/arm)
I first prototyped this change in Sept 2011, and I discarded it
because it made no difference in the obvious benchmark loop.
It still makes no difference in the obvious benchmark loop,
but in a less obvious one, doing some extra computation
around the calls to Sqrt, not making the call does have a
significant effect.
benchmark old ns/op new ns/op delta
BenchmarkSqrt 4.56 4.57 +0.22%
BenchmarkSqrtIndirect 4.56 4.56 +0.00%
BenchmarkSqrtGo 69.4 69.4 +0.00%
BenchmarkSqrtPrime 4417 3647 -17.43%
This is a warmup for using hardware expansions for some
calls to 1-line assembly routines in the runtime (for example getg).
Change-Id: Ie66be23f8c09d0f7dc4ddd7ca8a93cfce28f55a4
Reviewed-on: https://go-review.googlesource.com/8356
Reviewed-by: Rob Pike <r@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/src/cmd/internal/gc/cgen.go b/src/cmd/internal/gc/cgen.go
index b3524c2..d3921f7 100644
--- a/src/cmd/internal/gc/cgen.go
+++ b/src/cmd/internal/gc/cgen.go
@@ -409,6 +409,15 @@
cgen_norm(n, &n1, res)
return
+ case OSQRT:
+ var n1 Node
+ Regalloc(&n1, nl.Type, res)
+ Cgen(n.Left, &n1)
+ Thearch.Gins(Thearch.Optoas(OSQRT, nl.Type), &n1, &n1)
+ Thearch.Gmove(&n1, res)
+ Regfree(&n1)
+ return
+
// symmetric binary
case OAND,
OOR,
diff --git a/src/cmd/internal/gc/gen.go b/src/cmd/internal/gc/gen.go
index caae2f1..e0659fc 100644
--- a/src/cmd/internal/gc/gen.go
+++ b/src/cmd/internal/gc/gen.go
@@ -1002,6 +1002,9 @@
case ORETURN, ORETJMP:
cgen_ret(n)
+ case OSQRT:
+ cgen_discard(n.Left)
+
case OCHECKNIL:
Cgen_checknil(n.Left)
diff --git a/src/cmd/internal/gc/syntax.go b/src/cmd/internal/gc/syntax.go
index 8f5b85d..671a624 100644
--- a/src/cmd/internal/gc/syntax.go
+++ b/src/cmd/internal/gc/syntax.go
@@ -293,7 +293,7 @@
OREGISTER // a register, such as AX.
OINDREG // offset plus indirect of a register, such as 8(SP).
- // 386/amd64-specific opcodes
+ // arch-specific opcodes
OCMP // compare: ACMP.
ODEC // decrement: ADEC.
OINC // increment: AINC.
@@ -303,6 +303,7 @@
ORROTC // right rotate-carry: ARCR.
ORETJMP // return to other function
OPS // compare parity set (for x86 NaN check)
+ OSQRT // sqrt(float64), on systems that have hw support
OEND
)
diff --git a/src/cmd/internal/gc/walk.go b/src/cmd/internal/gc/walk.go
index c10201a..a0a29d3 100644
--- a/src/cmd/internal/gc/walk.go
+++ b/src/cmd/internal/gc/walk.go
@@ -622,6 +622,16 @@
walkexpr(&n.Left, init)
walkexprlist(n.List, init)
+ if n.Left.Op == ONAME && n.Left.Sym.Name == "Sqrt" && n.Left.Sym.Pkg.Path == "math" {
+ switch Thearch.Thechar {
+ case '5', '6':
+ n.Op = OSQRT
+ n.Left = n.List.N
+ n.List = nil
+ goto ret
+ }
+ }
+
ll := ascompatte(int(n.Op), n, n.Isddd, getinarg(t), n.List, 0, init)
n.List = reorder1(ll)
goto ret