runtime, sync/atomic: fix arm cas

Works around bug in kernel implementation on old ARM5 kernels.
Bug was fixed on 26 Nov 2007 (between 2.6.23 and 2.6.24) but
old kernels persist.

Fixes #1750.

R=dfc, golang-dev
CC=golang-dev
https://golang.org/cl/4436072
diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s
index d866b0e..2b5365b 100644
--- a/src/pkg/runtime/linux/arm/sys.s
+++ b/src/pkg/runtime/linux/arm/sys.s
@@ -258,11 +258,22 @@
 TEXT runtime·cas(SB),7,$0
 	MOVW	valptr+0(FP), R2
 	MOVW	old+4(FP), R0
+casagain:
 	MOVW	new+8(FP), R1
 	BL	cas<>(SB)
-	MOVW	$0, R0
-	MOVW.CS	$1, R0
+	BCC	cascheck
+	MOVW $1, R0
 	RET
+cascheck:
+	// Kernel lies; double-check.
+	MOVW	valptr+0(FP), R2
+	MOVW	old+4(FP), R0
+	MOVW	0(R2), R3
+	CMP	R0, R3
+	BEQ	casagain
+	MOVW $0, R0
+	RET
+
 
 TEXT runtime·casp(SB),7,$0
 	B	runtime·cas(SB)