runtime: reduce stack usage for asmcgocall on PPC64

Define it as ABIInternal, so the result does not take space on
stack.

Also use R10 as a temporary register for arithmetics on SP, so it
is hidden from the assembler's SP delta calculation, which is
irrelevant anyway as we are on the system stack.

Change-Id: I8fed467601c19cad2d7afab26978246d15ce3147
Reviewed-on: https://go-review.googlesource.com/c/go/+/580918
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Cherry Mui <cherryyz@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index ff9b736..97c2a37 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -615,9 +615,9 @@
 // Call fn(arg) on the scheduler stack,
 // aligned appropriately for the gcc ABI.
 // See cgocall.go for more details.
-TEXT ·asmcgocall(SB),NOSPLIT,$0-20
-	MOVD	fn+0(FP), R3
-	MOVD	arg+8(FP), R4
+TEXT ·asmcgocall<ABIInternal>(SB),NOSPLIT,$0-20
+	// R3 = fn
+	// R4 = arg
 
 	MOVD	R1, R7		// save original stack pointer
 	CMP	$0, g
@@ -651,8 +651,11 @@
 #endif
 	// Save room for two of our pointers, plus the callee
 	// save area that lives on the caller stack.
-	SUB	$(asmcgocallSaveOffset+16), R1
-	RLDCR	$0, R1, $~15, R1	// 16-byte alignment for gcc ABI
+	// Do arithmetics in R10 to hide from the assembler
+	// counting it as SP delta, which is irrelevant as we are
+	// on the system stack.
+	SUB	$(asmcgocallSaveOffset+16), R1, R10
+	RLDCR	$0, R10, $~15, R1	// 16-byte alignment for gcc ABI
 	MOVD	R5, (asmcgocallSaveOffset+8)(R1)	// save old g on stack
 	MOVD	(g_stack+stack_hi)(R5), R5
 	SUB	R7, R5
@@ -689,7 +692,7 @@
 	MOVD	R5, R1
 	BL	runtime·save_g(SB)
 
-	MOVW	R3, ret+16(FP)
+	// ret = R3
 	RET
 
 nosave:
@@ -703,8 +706,8 @@
 	// Using this code for all "already on system stack" calls exercises it more,
 	// which should help keep it correct.
 
-	SUB	$(asmcgocallSaveOffset+8), R1
-	RLDCR	$0, R1, $~15, R1		// 16-byte alignment for gcc ABI
+	SUB	$(asmcgocallSaveOffset+8), R1, R10
+	RLDCR	$0, R10, $~15, R1		// 16-byte alignment for gcc ABI
 	MOVD	R7, asmcgocallSaveOffset(R1)	// Save original stack pointer.
 
 	MOVD	R3, R12		// fn
@@ -724,7 +727,7 @@
 #ifndef GOOS_aix
 	MOVD	24(R1), R2
 #endif
-	MOVW	R3, ret+16(FP)
+	// ret = R3
 	RET
 
 // func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)