math, cmd/internal/gc, cmd/7g: enable SQRT inlining, add assembly implementation

benchmark                 old ns/op     new ns/op     delta
BenchmarkSqrt             474           16.5          -96.52%
BenchmarkSqrtIndirect     476           38.1          -92.00%
BenchmarkSqrtGo           484           483           -0.21%

Change-Id: I5ad0132feda0d3275a884523b9e79d83db4fc726
Signed-off-by: Shenghou Ma <minux@golang.org>
Reviewed-on: https://go-review.googlesource.com/8465
Reviewed-by: David Crawshaw <crawshaw@golang.org>
diff --git a/src/cmd/7g/gsubr.go b/src/cmd/7g/gsubr.go
index 2d2bdb7..cea1e92 100644
--- a/src/cmd/7g/gsubr.go
+++ b/src/cmd/7g/gsubr.go
@@ -938,6 +938,9 @@
 
 	case gc.ODIV<<16 | gc.TFLOAT64:
 		a = arm64.AFDIVD
+
+	case gc.OSQRT<<16 | gc.TFLOAT64:
+		a = arm64.AFSQRTD
 	}
 
 	return a
diff --git a/src/cmd/7g/prog.go b/src/cmd/7g/prog.go
index 2763e27..733c405fa 100644
--- a/src/cmd/7g/prog.go
+++ b/src/cmd/7g/prog.go
@@ -60,18 +60,19 @@
 	arm64.ACMP:   {gc.SizeQ | gc.LeftRead | gc.RegRead, 0, 0, 0},
 
 	// Floating point.
-	arm64.AFADDD: {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFADDS: {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFSUBD: {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFSUBS: {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFNEGD: {gc.SizeD | gc.LeftRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFNEGS: {gc.SizeF | gc.LeftRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFMULD: {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFMULS: {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFDIVD: {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFDIVS: {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
-	arm64.AFCMPD: {gc.SizeD | gc.LeftRead | gc.RegRead, 0, 0, 0},
-	arm64.AFCMPS: {gc.SizeF | gc.LeftRead | gc.RegRead, 0, 0, 0},
+	arm64.AFADDD:  {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFADDS:  {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFSUBD:  {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFSUBS:  {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFNEGD:  {gc.SizeD | gc.LeftRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFNEGS:  {gc.SizeF | gc.LeftRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFSQRTD: {gc.SizeD | gc.LeftRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFMULD:  {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFMULS:  {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFDIVD:  {gc.SizeD | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFDIVS:  {gc.SizeF | gc.LeftRead | gc.RegRead | gc.RightWrite, 0, 0, 0},
+	arm64.AFCMPD:  {gc.SizeD | gc.LeftRead | gc.RegRead, 0, 0, 0},
+	arm64.AFCMPS:  {gc.SizeF | gc.LeftRead | gc.RegRead, 0, 0, 0},
 
 	// float -> integer
 	arm64.AFCVTZSD:  {gc.SizeD | gc.LeftRead | gc.RightWrite | gc.Conv, 0, 0, 0},
diff --git a/src/cmd/internal/gc/walk.go b/src/cmd/internal/gc/walk.go
index a0a29d3..c845f78 100644
--- a/src/cmd/internal/gc/walk.go
+++ b/src/cmd/internal/gc/walk.go
@@ -624,7 +624,7 @@
 
 		if n.Left.Op == ONAME && n.Left.Sym.Name == "Sqrt" && n.Left.Sym.Pkg.Path == "math" {
 			switch Thearch.Thechar {
-			case '5', '6':
+			case '5', '6', '7':
 				n.Op = OSQRT
 				n.Left = n.List.N
 				n.List = nil