runtime: add atomic xchg64
It will be handy for network poller.

R=golang-dev, rsc
CC=golang-dev
https://golang.org/cl/7429048
diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s
index 9879584..696befd 100644
--- a/src/pkg/runtime/asm_amd64.s
+++ b/src/pkg/runtime/asm_amd64.s
@@ -442,6 +442,12 @@
 	XCHGL	AX, 0(BX)
 	RET
 
+TEXT runtime·xchg64(SB), 7, $0
+	MOVQ	8(SP), BX
+	MOVQ	16(SP), AX
+	XCHGQ	AX, 0(BX)
+	RET
+
 TEXT runtime·procyield(SB),7,$0
 	MOVL	8(SP), AX
 again:
diff --git a/src/pkg/runtime/atomic_386.c b/src/pkg/runtime/atomic_386.c
index 79b7cbf..1046eb8 100644
--- a/src/pkg/runtime/atomic_386.c
+++ b/src/pkg/runtime/atomic_386.c
@@ -30,3 +30,16 @@
 	}
 	return old+v;
 }
+
+#pragma textflag 7
+uint64
+runtime·xchg64(uint64 volatile* addr, uint64 v)
+{
+	uint64 old;
+
+	old = *addr;
+	while(!runtime·cas64(addr, &old, v)) {
+		// nothing
+	}
+	return old;
+}
diff --git a/src/pkg/runtime/atomic_arm.c b/src/pkg/runtime/atomic_arm.c
index 0b54840..9193d59 100644
--- a/src/pkg/runtime/atomic_arm.c
+++ b/src/pkg/runtime/atomic_arm.c
@@ -123,6 +123,19 @@
 
 #pragma textflag 7
 uint64
+runtime·xchg64(uint64 volatile *addr, uint64 v)
+{
+	uint64 res;
+
+	runtime·lock(LOCK(addr));
+	res = *addr;
+	*addr = v;
+	runtime·unlock(LOCK(addr));
+	return res;
+}
+
+#pragma textflag 7
+uint64
 runtime·atomicload64(uint64 volatile *addr)
 {
 	uint64 res;
diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c
index 4d57cba..d3ee2a0 100644
--- a/src/pkg/runtime/runtime.c
+++ b/src/pkg/runtime/runtime.c
@@ -156,6 +156,10 @@
 		runtime·throw("xadd64 failed");
 	if(runtime·atomicload64(&z64) != (2ull<<40)+2)
 		runtime·throw("xadd64 failed");
+	if(runtime·xchg64(&z64, (3ull<<40)+3) != (2ull<<40)+2)
+		runtime·throw("xchg64 failed");
+	if(runtime·atomicload64(&z64) != (3ull<<40)+3)
+		runtime·throw("xchg64 failed");
 }
 
 void
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 585d6a5..8ed1843 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -691,6 +691,7 @@
 uint32	runtime·xadd(uint32 volatile*, int32);
 uint64	runtime·xadd64(uint64 volatile*, int64);
 uint32	runtime·xchg(uint32 volatile*, uint32);
+uint64	runtime·xchg64(uint64 volatile*, uint64);
 uint32	runtime·atomicload(uint32 volatile*);
 void	runtime·atomicstore(uint32 volatile*, uint32);
 void	runtime·atomicstore64(uint64 volatile*, uint64);