sync/atomic: fix check64

The LDREXD and STREXD instructions require
aligned addresses, and the ARM stack is not
guaranteed to be aligned during the check.
This may cause other problems later (on the ARM
not all 64-bit pointers may be 64-bit aligned)
but at least the check is correct now.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/4564053
diff --git a/src/pkg/sync/atomic/asm_arm.s b/src/pkg/sync/atomic/asm_arm.s
index 3363bbc..448a98a 100644
--- a/src/pkg/sync/atomic/asm_arm.s
+++ b/src/pkg/sync/atomic/asm_arm.s
@@ -87,11 +87,14 @@
 // which will make uses of the 64-bit atomic operations loop forever.
 // If things are working, set okLDREXD to avoid future checks.
 // https://bugs.launchpad.net/qemu/+bug/670883.
-TEXT	check64<>(SB),7,$8
+TEXT	check64<>(SB),7,$16
 	MOVW	$10, R1
+	// 8-aligned stack address scratch space.
+	MOVW	$8(SP), R3
+	AND	$~7, R3
 loop:
-	LDREXD	(SP), R2
-	STREXD	R2, (SP), R0
+	LDREXD	(R3), R2
+	STREXD	R2, (R3), R0
 	CMP	$0, R0
 	BEQ	ok
 	SUB	$1, R1