runtime/cgo: save correct floating point registers on s390x
When transitioning from C code to Go code we must respect the C
calling convention. On s390x this means that r6-r13, r15 and f8-f15
must be saved and restored by functions that use them.
On s390x we were saving the wrong set of floating point registers
(f0, f2, f4 and f6) rather than f8-f15 which means that Go code
could clobber registers that C code expects to be restored. This
CL modifies the crosscall functions on s390x to save/restore the
correct floating point registers.
Fixes #18035.
Change-Id: I5cc6f552c893a4e677669c8891521bf735492e97
Reviewed-on: https://go-review.googlesource.com/33571
Reviewed-by: Ian Lance Taylor <iant@golang.org>
diff --git a/src/runtime/asm_s390x.s b/src/runtime/asm_s390x.s
index 026c18c..c2212a5 100644
--- a/src/runtime/asm_s390x.s
+++ b/src/runtime/asm_s390x.s
@@ -1305,13 +1305,9 @@
// However, since this function is only called once per loaded module
// performance is unimportant.
TEXT runtime·addmoduledata(SB),NOSPLIT|NOFRAME,$0-0
- // Save R6-R15, F0, F2, F4 and F6 in the
- // register save area of the calling function
+ // Save R6-R15 in the register save area of the calling function.
+ // Don't bother saving F8-F15 as we aren't doing any calls.
STMG R6, R15, 48(R15)
- FMOVD F0, 128(R15)
- FMOVD F2, 136(R15)
- FMOVD F4, 144(R15)
- FMOVD F6, 152(R15)
// append the argument (passed in R2, as per the ELF ABI) to the
// moduledata linked list.
@@ -1319,12 +1315,8 @@
MOVD R2, moduledata_next(R1)
MOVD R2, runtime·lastmoduledatap(SB)
- // Restore R6-R15, F0, F2, F4 and F6
+ // Restore R6-R15.
LMG 48(R15), R6, R15
- FMOVD F0, 128(R15)
- FMOVD F2, 136(R15)
- FMOVD F4, 144(R15)
- FMOVD F6, 152(R15)
RET
TEXT ·checkASM(SB),NOSPLIT,$0-1
diff --git a/src/runtime/cgo/asm_s390x.s b/src/runtime/cgo/asm_s390x.s
index ae688b6..7eab8f6 100644
--- a/src/runtime/cgo/asm_s390x.s
+++ b/src/runtime/cgo/asm_s390x.s
@@ -8,36 +8,46 @@
// func crosscall2(fn func(a unsafe.Pointer, n int32, ctxt uintptr), a unsafe.Pointer, n int32, ctxt uintptr)
// Saves C callee-saved registers and calls fn with three arguments.
TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0
- // Start with standard C stack frame layout and linkage
+ // Start with standard C stack frame layout and linkage.
- // Save R6-R15, F0, F2, F4 and F6 in the
- // register save area of the calling function
+ // Save R6-R15 in the register save area of the calling function.
STMG R6, R15, 48(R15)
- FMOVD F0, 128(R15)
- FMOVD F2, 136(R15)
- FMOVD F4, 144(R15)
- FMOVD F6, 152(R15)
- // Initialize Go ABI environment
- XOR R0, R0
+ // Allocate 96 bytes on the stack.
+ MOVD $-96(R15), R15
+
+ // Save F8-F15 in our stack frame.
+ FMOVD F8, 32(R15)
+ FMOVD F9, 40(R15)
+ FMOVD F10, 48(R15)
+ FMOVD F11, 56(R15)
+ FMOVD F12, 64(R15)
+ FMOVD F13, 72(R15)
+ FMOVD F14, 80(R15)
+ FMOVD F15, 88(R15)
+
+ // Initialize Go ABI environment.
BL runtime·load_g(SB)
- // Allocate 32 bytes on the stack
- SUB $32, R15
-
MOVD R3, 8(R15) // arg1
MOVW R4, 16(R15) // arg2
MOVD R5, 24(R15) // arg3
BL (R2) // fn(arg1, arg2, arg3)
- ADD $32, R15
+ FMOVD 32(R15), F8
+ FMOVD 40(R15), F9
+ FMOVD 48(R15), F10
+ FMOVD 56(R15), F11
+ FMOVD 64(R15), F12
+ FMOVD 72(R15), F13
+ FMOVD 80(R15), F14
+ FMOVD 88(R15), F15
- // Restore R6-R15, F0, F2, F4 and F6
+ // De-allocate stack frame.
+ MOVD $96(R15), R15
+
+ // Restore R6-R15.
LMG 48(R15), R6, R15
- FMOVD F0, 128(R15)
- FMOVD F2, 136(R15)
- FMOVD F4, 144(R15)
- FMOVD F6, 152(R15)
RET
diff --git a/src/runtime/cgo/gcc_s390x.S b/src/runtime/cgo/gcc_s390x.S
index db654e4..614de4b 100644
--- a/src/runtime/cgo/gcc_s390x.S
+++ b/src/runtime/cgo/gcc_s390x.S
@@ -6,38 +6,48 @@
* void crosscall_s390x(void (*fn)(void), void *g)
*
* Calling into the go tool chain, where all registers are caller save.
- * Called from standard s390x C ABI, where r6-r13, r15, and f0, f2, f4 and f6 are
+ * Called from standard s390x C ABI, where r6-r13, r15, and f8-f15 are
* callee-save, so they must be saved explicitly.
*/
.globl crosscall_s390x
crosscall_s390x:
- /*
- * save r6-r15, f0, f2, f4 and f6 in the
- * register save area of the calling function
- */
- stmg %r6, %r15, 48(%r15)
- stdy %f0, 128(%r15)
- stdy %f2, 136(%r15)
- stdy %f4, 144(%r15)
- stdy %f6, 152(%r15)
+ /* save r6-r15 in the register save area of the calling function */
+ stmg %r6, %r15, 48(%r15)
- /* set r0 to 0 */
- xgr %r0, %r0
+ /* allocate 64 bytes of stack space to save f8-f15 */
+ lay %r15, -64(%r15)
+
+ /* save callee-saved floating point registers */
+ std %f8, 0(%r15)
+ std %f9, 8(%r15)
+ std %f10, 16(%r15)
+ std %f11, 24(%r15)
+ std %f12, 32(%r15)
+ std %f13, 40(%r15)
+ std %f14, 48(%r15)
+ std %f15, 56(%r15)
/* restore g pointer */
- lgr %r13, %r3
+ lgr %r13, %r3
- /* grow stack 8 bytes and call fn */
- agfi %r15, -8
+ /* call fn */
basr %r14, %r2
- agfi %r15, 8
- /* restore registers */
- lmg %r6, %r15, 48(%r15)
- ldy %f0, 128(%r15)
- ldy %f2, 136(%r15)
- ldy %f4, 144(%r15)
- ldy %f6, 152(%r15)
+ /* restore floating point registers */
+ ld %f8, 0(%r15)
+ ld %f9, 8(%r15)
+ ld %f10, 16(%r15)
+ ld %f11, 24(%r15)
+ ld %f12, 32(%r15)
+ ld %f13, 40(%r15)
+ ld %f14, 48(%r15)
+ ld %f15, 56(%r15)
+
+ /* de-allocate stack frame */
+ la %r15, 64(%r15)
+
+ /* restore general purpose registers */
+ lmg %r6, %r15, 48(%r15)
br %r14 /* restored by lmg */