runtime,runtime/cgo: save callee-saved FP register on arm

Other GOARCHs already handle their callee-saved FP registers, but
arm was missing. Without this change, code using Cgo and floating
point code might fail in mysterious and hard to debug ways.

There are no floating point registers when GOARM=5, so skip the
registers when runtime.goarm < 6.

darwin/arm doesn't support GOARM=5, so the check is left out of
rt0_darwin_arm.s.

Fixes #14876

Change-Id: I6bcb90a76df3664d8ba1f33123a74b1eb2c9f8b2
Reviewed-on: https://go-review.googlesource.com/23140
Run-TryBot: Elias Naur <elias.naur@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Minux Ma <minux@golang.org>
diff --git a/src/runtime/cgo/asm_arm.s b/src/runtime/cgo/asm_arm.s
index 08472b6..0f35422 100644
--- a/src/runtime/cgo/asm_arm.s
+++ b/src/runtime/cgo/asm_arm.s
@@ -16,8 +16,40 @@
 	 *  Additionally, runtime·load_g will clobber R0, so we need to save R0
 	 *  nevertheless.
 	 */
+	SUB	$(8*9), R13 // Reserve space for the floating point registers.
 	MOVM.WP	[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, g, R11, R12, R14], (R13)
+
+	// Skip floating point registers on GOARM < 6.
+	MOVB    runtime·goarm(SB), R11
+	CMP $6, R11
+	BLT skipfpsave
+	MOVD	F8, (14*4+8*1)(R13)
+	MOVD	F9, (14*4+8*2)(R13)
+	MOVD	F10, (14*4+8*3)(R13)
+	MOVD	F11, (14*4+8*4)(R13)
+	MOVD	F12, (14*4+8*5)(R13)
+	MOVD	F13, (14*4+8*6)(R13)
+	MOVD	F14, (14*4+8*7)(R13)
+	MOVD	F15, (14*4+8*8)(R13)
+
+skipfpsave:
 	BL	runtime·load_g(SB)
 	MOVW	R15, R14 // R15 is PC.
 	MOVW	0(R13), R15
-	MOVM.IAW	(R13), [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, g, R11, R12, R15]
+
+	MOVB    runtime·goarm(SB), R11
+	CMP $6, R11
+	BLT skipfprest
+	MOVD	(14*4+8*1)(R13), F8
+	MOVD	(14*4+8*2)(R13), F9
+	MOVD	(14*4+8*3)(R13), F10
+	MOVD	(14*4+8*4)(R13), F11
+	MOVD	(14*4+8*5)(R13), F12
+	MOVD	(14*4+8*6)(R13), F13
+	MOVD	(14*4+8*7)(R13), F14
+	MOVD	(14*4+8*8)(R13), F15
+
+skipfprest:
+	MOVM.IAW	(R13), [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, g, R11, R12, R14]
+	ADD	$(8*9), R13
+	MOVW	R14, R15