runtime, sync: use __atomic intrinsics instead of __sync

GCC has supported the __atomic intrinsics since 4.7.  They are better
than the __sync intrinsics in that they specify a memory model and,
more importantly for our purposes, they are reliably implemented
either in the compiler or in libatomic.

Fixes https://gcc.gnu.org/PR52084

Change-Id: I27333d59e7c795c1e2a6de63eb5eab67d9fe13b2
Reviewed-on: https://go-review.googlesource.com/c/160820
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Than McIntosh <thanm@google.com>
diff --git a/libgo/Makefile.am b/libgo/Makefile.am
index b9ff88c..1d62f11 100644
--- a/libgo/Makefile.am
+++ b/libgo/Makefile.am
@@ -471,7 +471,6 @@
 	runtime/proc.c \
 	runtime/runtime_c.c \
 	runtime/stack.c \
-	runtime/thread.c \
 	runtime/yield.c \
 	$(rtems_task_variable_add_file) \
 	$(runtime_getncpu_file)
diff --git a/libgo/Makefile.in b/libgo/Makefile.in
index abda064..04cce14 100644
--- a/libgo/Makefile.in
+++ b/libgo/Makefile.in
@@ -252,8 +252,8 @@
 	runtime/go-unsetenv.lo runtime/go-unwind.lo \
 	runtime/go-varargs.lo runtime/env_posix.lo runtime/panic.lo \
 	runtime/print.lo runtime/proc.lo runtime/runtime_c.lo \
-	runtime/stack.lo runtime/thread.lo runtime/yield.lo \
-	$(am__objects_1) $(am__objects_2)
+	runtime/stack.lo runtime/yield.lo $(am__objects_1) \
+	$(am__objects_2)
 am_libgo_llgo_la_OBJECTS = $(am__objects_3)
 libgo_llgo_la_OBJECTS = $(am_libgo_llgo_la_OBJECTS)
 AM_V_lt = $(am__v_lt_@AM_V@)
@@ -897,7 +897,6 @@
 	runtime/proc.c \
 	runtime/runtime_c.c \
 	runtime/stack.c \
-	runtime/thread.c \
 	runtime/yield.c \
 	$(rtems_task_variable_add_file) \
 	$(runtime_getncpu_file)
@@ -1373,8 +1372,6 @@
 	runtime/$(DEPDIR)/$(am__dirstamp)
 runtime/stack.lo: runtime/$(am__dirstamp) \
 	runtime/$(DEPDIR)/$(am__dirstamp)
-runtime/thread.lo: runtime/$(am__dirstamp) \
-	runtime/$(DEPDIR)/$(am__dirstamp)
 runtime/yield.lo: runtime/$(am__dirstamp) \
 	runtime/$(DEPDIR)/$(am__dirstamp)
 runtime/rtems-task-variable-add.lo: runtime/$(am__dirstamp) \
@@ -1449,7 +1446,6 @@
 @AMDEP_TRUE@@am__include@ @am__quote@runtime/$(DEPDIR)/rtems-task-variable-add.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@runtime/$(DEPDIR)/runtime_c.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@runtime/$(DEPDIR)/stack.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@runtime/$(DEPDIR)/thread.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@runtime/$(DEPDIR)/yield.Plo@am__quote@
 
 .c.o:
diff --git a/libgo/config.h.in b/libgo/config.h.in
index d47969d..bbb2516 100644
--- a/libgo/config.h.in
+++ b/libgo/config.h.in
@@ -246,22 +246,6 @@
 /* Define to 1 if <math.h> defines struct exception */
 #undef HAVE_STRUCT_EXCEPTION
 
-/* Define to 1 if the compiler provides the __sync_add_and_fetch function for
-   uint64 */
-#undef HAVE_SYNC_ADD_AND_FETCH_8
-
-/* Define to 1 if the compiler provides the __sync_bool_compare_and_swap
-   function for uint32 */
-#undef HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4
-
-/* Define to 1 if the compiler provides the __sync_bool_compare_and_swap
-   function for uint64 */
-#undef HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8
-
-/* Define to 1 if the compiler provides the __sync_fetch_and_add function for
-   uint32 */
-#undef HAVE_SYNC_FETCH_AND_ADD_4
-
 /* Define to 1 if you have the `sync_file_range' function. */
 #undef HAVE_SYNC_FILE_RANGE
 
diff --git a/libgo/configure b/libgo/configure
index 4f68400..138a9c6 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -15135,122 +15135,6 @@
 
 LIBS="$LIBS_hold"
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __sync_bool_compare_and_swap_4" >&5
-$as_echo_n "checking for __sync_bool_compare_and_swap_4... " >&6; }
-if ${libgo_cv_func___sync_bool_compare_and_swap_4+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-typedef unsigned int uint32  __attribute__ ((mode (SI)));
-uint32 i;
-int main() { return __sync_bool_compare_and_swap (&i, 0, 1); }
-
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  libgo_cv_func___sync_bool_compare_and_swap_4=yes
-else
-  libgo_cv_func___sync_bool_compare_and_swap_4=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgo_cv_func___sync_bool_compare_and_swap_4" >&5
-$as_echo "$libgo_cv_func___sync_bool_compare_and_swap_4" >&6; }
-if test "$libgo_cv_func___sync_bool_compare_and_swap_4" = "yes"; then
-
-$as_echo "#define HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4 1" >>confdefs.h
-
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __sync_bool_compare_and_swap_8" >&5
-$as_echo_n "checking for __sync_bool_compare_and_swap_8... " >&6; }
-if ${libgo_cv_func___sync_bool_compare_and_swap_8+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-typedef unsigned int uint64  __attribute__ ((mode (DI)));
-uint64 i;
-int main() { return __sync_bool_compare_and_swap (&i, 0, 1); }
-
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  libgo_cv_func___sync_bool_compare_and_swap_8=yes
-else
-  libgo_cv_func___sync_bool_compare_and_swap_8=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgo_cv_func___sync_bool_compare_and_swap_8" >&5
-$as_echo "$libgo_cv_func___sync_bool_compare_and_swap_8" >&6; }
-if test "$libgo_cv_func___sync_bool_compare_and_swap_8" = "yes"; then
-
-$as_echo "#define HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8 1" >>confdefs.h
-
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __sync_fetch_and_add_4" >&5
-$as_echo_n "checking for __sync_fetch_and_add_4... " >&6; }
-if ${libgo_cv_func___sync_fetch_and_add_4+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-typedef unsigned int uint32  __attribute__ ((mode (SI)));
-uint32 i;
-int main() { return __sync_fetch_and_add (&i, 1); }
-
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  libgo_cv_func___sync_fetch_and_add_4=yes
-else
-  libgo_cv_func___sync_fetch_and_add_4=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgo_cv_func___sync_fetch_and_add_4" >&5
-$as_echo "$libgo_cv_func___sync_fetch_and_add_4" >&6; }
-if test "$libgo_cv_func___sync_fetch_and_add_4" = "yes"; then
-
-$as_echo "#define HAVE_SYNC_FETCH_AND_ADD_4 1" >>confdefs.h
-
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __sync_add_and_fetch_8" >&5
-$as_echo_n "checking for __sync_add_and_fetch_8... " >&6; }
-if ${libgo_cv_func___sync_add_and_fetch_8+:} false; then :
-  $as_echo_n "(cached) " >&6
-else
-  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h.  */
-
-typedef unsigned int uint64  __attribute__ ((mode (DI)));
-uint64 i;
-int main() { return __sync_add_and_fetch (&i, 1); }
-
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
-  libgo_cv_func___sync_add_and_fetch_8=yes
-else
-  libgo_cv_func___sync_add_and_fetch_8=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
-    conftest$ac_exeext conftest.$ac_ext
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $libgo_cv_func___sync_add_and_fetch_8" >&5
-$as_echo "$libgo_cv_func___sync_add_and_fetch_8" >&6; }
-if test "$libgo_cv_func___sync_add_and_fetch_8" = "yes"; then
-
-$as_echo "#define HAVE_SYNC_ADD_AND_FETCH_8 1" >>confdefs.h
-
-fi
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler supports -minline-all-stringops" >&5
 $as_echo_n "checking whether compiler supports -minline-all-stringops... " >&6; }
 if ${libgo_cv_c_stringops+:} false; then :
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 2b7e191..2b452ec 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -578,62 +578,6 @@
 AC_CHECK_FUNCS(matherr)
 LIBS="$LIBS_hold"
 
-AC_CACHE_CHECK([for __sync_bool_compare_and_swap_4],
-[libgo_cv_func___sync_bool_compare_and_swap_4],
-[AC_LINK_IFELSE([AC_LANG_SOURCE([
-typedef unsigned int uint32  __attribute__ ((mode (SI)));
-uint32 i;
-int main() { return __sync_bool_compare_and_swap (&i, 0, 1); }
-])],
-[libgo_cv_func___sync_bool_compare_and_swap_4=yes],
-[libgo_cv_func___sync_bool_compare_and_swap_4=no])])
-if test "$libgo_cv_func___sync_bool_compare_and_swap_4" = "yes"; then
-  AC_DEFINE(HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4, 1,
-    [Define to 1 if the compiler provides the __sync_bool_compare_and_swap function for uint32])
-fi
-
-AC_CACHE_CHECK([for __sync_bool_compare_and_swap_8],
-[libgo_cv_func___sync_bool_compare_and_swap_8],
-[AC_LINK_IFELSE([AC_LANG_SOURCE([
-typedef unsigned int uint64  __attribute__ ((mode (DI)));
-uint64 i;
-int main() { return __sync_bool_compare_and_swap (&i, 0, 1); }
-])],
-[libgo_cv_func___sync_bool_compare_and_swap_8=yes],
-[libgo_cv_func___sync_bool_compare_and_swap_8=no])])
-if test "$libgo_cv_func___sync_bool_compare_and_swap_8" = "yes"; then
-  AC_DEFINE(HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8, 1,
-    [Define to 1 if the compiler provides the __sync_bool_compare_and_swap function for uint64])
-fi
-
-AC_CACHE_CHECK([for __sync_fetch_and_add_4],
-[libgo_cv_func___sync_fetch_and_add_4],
-[AC_LINK_IFELSE([AC_LANG_SOURCE([
-typedef unsigned int uint32  __attribute__ ((mode (SI)));
-uint32 i;
-int main() { return __sync_fetch_and_add (&i, 1); }
-])],
-[libgo_cv_func___sync_fetch_and_add_4=yes],
-[libgo_cv_func___sync_fetch_and_add_4=no])])
-if test "$libgo_cv_func___sync_fetch_and_add_4" = "yes"; then
-  AC_DEFINE(HAVE_SYNC_FETCH_AND_ADD_4, 1,
-    [Define to 1 if the compiler provides the __sync_fetch_and_add function for uint32])
-fi
-
-AC_CACHE_CHECK([for __sync_add_and_fetch_8],
-[libgo_cv_func___sync_add_and_fetch_8],
-[AC_LINK_IFELSE([AC_LANG_SOURCE([
-typedef unsigned int uint64  __attribute__ ((mode (DI)));
-uint64 i;
-int main() { return __sync_add_and_fetch (&i, 1); }
-])],
-[libgo_cv_func___sync_add_and_fetch_8=yes],
-[libgo_cv_func___sync_add_and_fetch_8=no])])
-if test "$libgo_cv_func___sync_add_and_fetch_8" = "yes"; then
-  AC_DEFINE(HAVE_SYNC_ADD_AND_FETCH_8, 1,
-    [Define to 1 if the compiler provides the __sync_add_and_fetch function for uint64])
-fi
-
 dnl For x86 we want to use the -minline-all-stringops option to avoid
 dnl forcing a stack split when calling memcpy and friends.
 AC_CACHE_CHECK([whether compiler supports -minline-all-stringops],
diff --git a/libgo/go/runtime/testdata/testprogcgo/lockosthread.c b/libgo/go/runtime/testdata/testprogcgo/lockosthread.c
index b10cc4f..9a8c057 100644
--- a/libgo/go/runtime/testdata/testprogcgo/lockosthread.c
+++ b/libgo/go/runtime/testdata/testprogcgo/lockosthread.c
@@ -9,5 +9,5 @@
 uint32_t threadExited;
 
 void setExited(void *x) {
-	__sync_fetch_and_add(&threadExited, 1);
+	__atomic_add_fetch(&threadExited, 1, __ATOMIC_SEQ_CST);
 }
diff --git a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
index f803511..28b094f 100644
--- a/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
+++ b/libgo/go/runtime/testdata/testprogcgo/threadpprof.go
@@ -50,13 +50,13 @@
 	arg->buf[0] = (uintptr_t)(cpuHogThread) + 0x10;
 	arg->buf[1] = (uintptr_t)(cpuHogThread2) + 0x4;
 	arg->buf[2] = 0;
-	__sync_add_and_fetch(&cpuHogThreadCount, 1);
+	__atomic_add_fetch(&cpuHogThreadCount, 1, __ATOMIC_SEQ_CST);
 }
 
 // getCPUHogThreadCount fetches the number of times we've seen cpuHogThread
 // in the traceback.
 int getCPUHogThreadCount() {
-	return __sync_add_and_fetch(&cpuHogThreadCount, 0);
+	return __atomic_load(&cpuHogThreadCount, __ATOMIC_SEQ_CST);
 }
 
 static void* cpuHogDriver(void* arg __attribute__ ((unused))) {
diff --git a/libgo/go/runtime/testdata/testprogcgo/tracebackctxt_c.c b/libgo/go/runtime/testdata/testprogcgo/tracebackctxt_c.c
index f02b7ca..ff6895e 100644
--- a/libgo/go/runtime/testdata/testprogcgo/tracebackctxt_c.c
+++ b/libgo/go/runtime/testdata/testprogcgo/tracebackctxt_c.c
@@ -49,18 +49,18 @@
 static int contextCount;
 
 int getContextCount() {
-	return __sync_add_and_fetch(&contextCount, 0);
+	return __atomic_load_n(&contextCount, __ATOMIC_SEQ_CST);
 }
 
 void tcContext(void* parg) {
 	struct cgoContextArg* arg = (struct cgoContextArg*)(parg);
 	if (arg->context == 0) {
-		arg->context = __sync_add_and_fetch(&contextCount, 1);
+		arg->context = __atomic_add_fetch(&contextCount, 1, __ATOMIC_SEQ_CST);
 	} else {
-		if (arg->context != __sync_add_and_fetch(&contextCount, 0)) {
+		if (arg->context != __atomic_load_n(&contextCount, __ATOMIC_SEQ_CST))
 			abort();
 		}
-		__sync_sub_and_fetch(&contextCount, 1);
+		__atomic_sub_fetch(&contextCount, 1, __ATOMIC_SEQ_CST);
 	}
 }
 
diff --git a/libgo/go/sync/atomic/atomic.c b/libgo/go/sync/atomic/atomic.c
index 6cc730f..25e439d 100644
--- a/libgo/go/sync/atomic/atomic.c
+++ b/libgo/go/sync/atomic/atomic.c
@@ -69,7 +69,8 @@
 _Bool
 CompareAndSwapInt32 (int32_t *val, int32_t old, int32_t new)
 {
-  return __sync_bool_compare_and_swap (val, old, new);
+  return __atomic_compare_exchange_n (val, &old, new, true, __ATOMIC_SEQ_CST,
+				      __ATOMIC_RELAXED);
 }
 
 _Bool CompareAndSwapInt64 (int64_t *, int64_t, int64_t)
@@ -81,7 +82,8 @@
 {
   if (((uintptr_t) val & 7) != 0)
     val = NULL;
-  return __sync_bool_compare_and_swap (val, old, new);
+  return __atomic_compare_exchange_n (val, &old, new, true, __ATOMIC_SEQ_CST,
+				      __ATOMIC_RELAXED);
 }
 
 _Bool CompareAndSwapUint32 (uint32_t *, uint32_t, uint32_t)
@@ -91,7 +93,8 @@
 _Bool
 CompareAndSwapUint32 (uint32_t *val, uint32_t old, uint32_t new)
 {
-  return __sync_bool_compare_and_swap (val, old, new);
+  return __atomic_compare_exchange_n (val, &old, new, true, __ATOMIC_SEQ_CST,
+				      __ATOMIC_RELAXED);
 }
 
 _Bool CompareAndSwapUint64 (uint64_t *, uint64_t, uint64_t)
@@ -103,7 +106,8 @@
 {
   if (((uintptr_t) val & 7) != 0)
     val = NULL;
-  return __sync_bool_compare_and_swap (val, old, new);
+  return __atomic_compare_exchange_n (val, &old, new, true, __ATOMIC_SEQ_CST,
+				      __ATOMIC_RELAXED);
 }
 
 _Bool CompareAndSwapUintptr (uintptr_t *, uintptr_t, uintptr_t)
@@ -113,7 +117,8 @@
 _Bool
 CompareAndSwapUintptr (uintptr_t *val, uintptr_t old, uintptr_t new)
 {
-  return __sync_bool_compare_and_swap (val, old, new);
+  return __atomic_compare_exchange_n (val, &old, new, true, __ATOMIC_SEQ_CST,
+				      __ATOMIC_RELAXED);
 }
 
 int32_t AddInt32 (int32_t *, int32_t)
@@ -123,7 +128,7 @@
 int32_t
 AddInt32 (int32_t *val, int32_t delta)
 {
-  return __sync_add_and_fetch (val, delta);
+  return __atomic_add_fetch (val, delta, __ATOMIC_SEQ_CST);
 }
 
 uint32_t AddUint32 (uint32_t *, uint32_t)
@@ -133,7 +138,7 @@
 uint32_t
 AddUint32 (uint32_t *val, uint32_t delta)
 {
-  return __sync_add_and_fetch (val, delta);
+  return __atomic_add_fetch (val, delta, __ATOMIC_SEQ_CST);
 }
 
 int64_t AddInt64 (int64_t *, int64_t)
@@ -145,7 +150,7 @@
 {
   if (((uintptr_t) val & 7) != 0)
     val = NULL;
-  return __sync_add_and_fetch (val, delta);
+  return __atomic_add_fetch (val, delta, __ATOMIC_SEQ_CST);
 }
 
 uint64_t AddUint64 (uint64_t *, uint64_t)
@@ -157,7 +162,7 @@
 {
   if (((uintptr_t) val & 7) != 0)
     val = NULL;
-  return __sync_add_and_fetch (val, delta);
+  return __atomic_add_fetch (val, delta, __ATOMIC_SEQ_CST);
 }
 
 uintptr_t AddUintptr (uintptr_t *, uintptr_t)
@@ -167,7 +172,7 @@
 uintptr_t
 AddUintptr (uintptr_t *val, uintptr_t delta)
 {
-  return __sync_add_and_fetch (val, delta);
+  return __atomic_add_fetch (val, delta, __ATOMIC_SEQ_CST);
 }
 
 int32_t LoadInt32 (int32_t *addr)
@@ -177,12 +182,7 @@
 int32_t
 LoadInt32 (int32_t *addr)
 {
-  int32_t v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, v))
-    v = *addr;
-  return v;
+  return __atomic_load_n (addr, __ATOMIC_SEQ_CST);
 }
 
 int64_t LoadInt64 (int64_t *addr)
@@ -192,14 +192,9 @@
 int64_t
 LoadInt64 (int64_t *addr)
 {
-  int64_t v;
-
   if (((uintptr_t) addr & 7) != 0)
     panicmem ();
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, v))
-    v = *addr;
-  return v;
+  return __atomic_load_n (addr, __ATOMIC_SEQ_CST);
 }
 
 uint32_t LoadUint32 (uint32_t *addr)
@@ -209,12 +204,7 @@
 uint32_t
 LoadUint32 (uint32_t *addr)
 {
-  uint32_t v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, v))
-    v = *addr;
-  return v;
+  return __atomic_load_n (addr, __ATOMIC_SEQ_CST);
 }
 
 uint64_t LoadUint64 (uint64_t *addr)
@@ -224,14 +214,9 @@
 uint64_t
 LoadUint64 (uint64_t *addr)
 {
-  uint64_t v;
-
   if (((uintptr_t) addr & 7) != 0)
     panicmem ();
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, v))
-    v = *addr;
-  return v;
+  return __atomic_load_n (addr, __ATOMIC_SEQ_CST);
 }
 
 uintptr_t LoadUintptr (uintptr_t *addr)
@@ -241,12 +226,7 @@
 uintptr_t
 LoadUintptr (uintptr_t *addr)
 {
-  uintptr_t v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, v))
-    v = *addr;
-  return v;
+  return __atomic_load_n (addr, __ATOMIC_SEQ_CST);
 }
 
 void *LoadPointer (void **addr)
@@ -256,12 +236,7 @@
 void *
 LoadPointer (void **addr)
 {
-  void *v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, v))
-    v = *addr;
-  return v;
+  return __atomic_load_n (addr, __ATOMIC_SEQ_CST);
 }
 
 void StoreInt32 (int32_t *addr, int32_t val)
@@ -271,11 +246,7 @@
 void
 StoreInt32 (int32_t *addr, int32_t val)
 {
-  int32_t v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, val))
-    v = *addr;
+  __atomic_store_n (addr, val, __ATOMIC_SEQ_CST);
 }
 
 void StoreInt64 (int64_t *addr, int64_t val)
@@ -285,13 +256,9 @@
 void
 StoreInt64 (int64_t *addr, int64_t val)
 {
-  int64_t v;
-
   if (((uintptr_t) addr & 7) != 0)
     panicmem ();
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, val))
-    v = *addr;
+  __atomic_store_n (addr, val, __ATOMIC_SEQ_CST);
 }
 
 void StoreUint32 (uint32_t *addr, uint32_t val)
@@ -301,11 +268,7 @@
 void
 StoreUint32 (uint32_t *addr, uint32_t val)
 {
-  uint32_t v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, val))
-    v = *addr;
+  __atomic_store_n (addr, val, __ATOMIC_SEQ_CST);
 }
 
 void StoreUint64 (uint64_t *addr, uint64_t val)
@@ -315,13 +278,9 @@
 void
 StoreUint64 (uint64_t *addr, uint64_t val)
 {
-  uint64_t v;
-
   if (((uintptr_t) addr & 7) != 0)
     panicmem ();
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, val))
-    v = *addr;
+  __atomic_store_n (addr, val, __ATOMIC_SEQ_CST);
 }
 
 void StoreUintptr (uintptr_t *addr, uintptr_t val)
@@ -331,9 +290,5 @@
 void
 StoreUintptr (uintptr_t *addr, uintptr_t val)
 {
-  uintptr_t v;
-
-  v = *addr;
-  while (! __sync_bool_compare_and_swap (addr, v, val))
-    v = *addr;
+  __atomic_store_n (addr, val, __ATOMIC_SEQ_CST);
 }
diff --git a/libgo/go/sync/cas.c b/libgo/go/sync/cas.c
deleted file mode 100644
index 7571c64..0000000
--- a/libgo/go/sync/cas.c
+++ /dev/null
@@ -1,17 +0,0 @@
-/* cas.c -- implement sync.cas for Go.
-
-   Copyright 2009 The Go Authors. All rights reserved.
-   Use of this source code is governed by a BSD-style
-   license that can be found in the LICENSE file.  */
-
-#include <stdint.h>
-
-#include "runtime.h"
-
-_Bool cas (int32_t *, int32_t, int32_t) __asm__ (GOSYM_PREFIX "libgo_sync.sync.cas");
-
-_Bool
-cas (int32_t *ptr, int32_t old, int32_t new)
-{
-  return __sync_bool_compare_and_swap (ptr, old, new);
-}
diff --git a/libgo/misc/cgo/test/issue7978.go b/libgo/misc/cgo/test/issue7978.go
index b057e3e..f0809d3 100644
--- a/libgo/misc/cgo/test/issue7978.go
+++ b/libgo/misc/cgo/test/issue7978.go
@@ -12,33 +12,18 @@
 
 void issue7978cb(void);
 
-#if defined(__APPLE__) && defined(__arm__)
-// on Darwin/ARM, libSystem doesn't provide implementation of the __sync_fetch_and_add
-// primitive, and although gcc supports it, it doesn't inline its definition.
-// Clang could inline its definition, so we require clang on Darwin/ARM.
-#if defined(__clang__)
-#define HAS_SYNC_FETCH_AND_ADD 1
-#else
-#define HAS_SYNC_FETCH_AND_ADD 0
-#endif
-#else
-#define HAS_SYNC_FETCH_AND_ADD 1
-#endif
-
 // use ugly atomic variable sync since that doesn't require calling back into
 // Go code or OS dependencies
 static void issue7978c(uint32_t *sync) {
-#if HAS_SYNC_FETCH_AND_ADD
-	while(__sync_fetch_and_add(sync, 0) != 0)
+	while(__atomic_load_n(sync, __ATOMIC_SEQ_CST) != 0)
 		;
-	__sync_fetch_and_add(sync, 1);
-	while(__sync_fetch_and_add(sync, 0) != 2)
+	__atomic_add_fetch(sync, 1, __ATOMIC_SEQ_CST);
+	while(__atomic_load_n(sync, __ATOMIC_SEQ_CST) != 2)
 		;
 	issue7978cb();
-	__sync_fetch_and_add(sync, 1);
-	while(__sync_fetch_and_add(sync, 0) != 6)
+	__atomic_add_fetch(sync, 1, __ATOMIC_SEQ_CST);
+	while(__atomic_load_n(sync, __ATOMIC_SEQ_CST) != 6)
 		;
-#endif
 }
 */
 import "C"
@@ -111,9 +96,6 @@
 	if runtime.Compiler == "gccgo" {
 		t.Skip("gccgo can not do stack traces of C code")
 	}
-	if C.HAS_SYNC_FETCH_AND_ADD == 0 {
-		t.Skip("clang required for __sync_fetch_and_add support on darwin/arm")
-	}
 	debug.SetTraceback("2")
 	issue7978sync = 0
 	go issue7978go()
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index 3bb1e55..5da34fb 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -276,22 +276,8 @@
 int32	runtime_round2(int32 x); // round x up to a power of 2.
 
 // atomic operations
-#define runtime_cas(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_cas64(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-#define runtime_casp(pval, old, new) __sync_bool_compare_and_swap (pval, old, new)
-// Don't confuse with XADD x86 instruction,
-// this one is actually 'addx', that is, add-and-fetch.
-#define runtime_xadd(p, v) __sync_add_and_fetch (p, v)
-#define runtime_xadd64(p, v) __sync_add_and_fetch (p, v)
-#define runtime_xchg(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_xchg64(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_xchgp(p, v) __atomic_exchange_n (p, v, __ATOMIC_SEQ_CST)
+#define runtime_xadd(p, v) __atomic_add_fetch (p, v, __ATOMIC_SEQ_CST)
 #define runtime_atomicload(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstore(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicstore64(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
-#define runtime_atomicload64(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicloadp(p) __atomic_load_n (p, __ATOMIC_SEQ_CST)
-#define runtime_atomicstorep(p, v) __atomic_store_n (p, v, __ATOMIC_SEQ_CST)
 
 void runtime_setg(G*)
   __asm__ (GOSYM_PREFIX "runtime.setg");
diff --git a/libgo/runtime/thread.c b/libgo/runtime/thread.c
deleted file mode 100644
index 83ee006..0000000
--- a/libgo/runtime/thread.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// Copyright 2010 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include <errno.h>
-#include <signal.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include "runtime.h"
-#include "go-assert.h"
-
-/* For targets which don't have the required sync support.  Really
-   these should be provided by gcc itself.  FIXME.  */
-
-#if !defined (HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4) || !defined (HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8) || !defined (HAVE_SYNC_FETCH_AND_ADD_4) || !defined (HAVE_SYNC_ADD_AND_FETCH_8)
-
-static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
-
-#endif
-
-#ifndef HAVE_SYNC_BOOL_COMPARE_AND_SWAP_4
-
-_Bool
-__sync_bool_compare_and_swap_4 (uint32*, uint32, uint32)
-  __attribute__ ((visibility ("hidden")));
-
-_Bool
-__sync_bool_compare_and_swap_4 (uint32* ptr, uint32 old, uint32 new)
-{
-  int i;
-  _Bool ret;
-
-  i = pthread_mutex_lock (&sync_lock);
-  __go_assert (i == 0);
-
-  if (*ptr != old)
-    ret = 0;
-  else
-    {
-      *ptr = new;
-      ret = 1;
-    }
-
-  i = pthread_mutex_unlock (&sync_lock);
-  __go_assert (i == 0);
-
-  return ret;
-}
-
-#endif
-
-#ifndef HAVE_SYNC_BOOL_COMPARE_AND_SWAP_8
-
-_Bool
-__sync_bool_compare_and_swap_8 (uint64*, uint64, uint64)
-  __attribute__ ((visibility ("hidden")));
-
-_Bool
-__sync_bool_compare_and_swap_8 (uint64* ptr, uint64 old, uint64 new)
-{
-  int i;
-  _Bool ret;
-
-  i = pthread_mutex_lock (&sync_lock);
-  __go_assert (i == 0);
-
-  if (*ptr != old)
-    ret = 0;
-  else
-    {
-      *ptr = new;
-      ret = 1;
-    }
-
-  i = pthread_mutex_unlock (&sync_lock);
-  __go_assert (i == 0);
-
-  return ret;
-}
-
-#endif
-
-#ifndef HAVE_SYNC_FETCH_AND_ADD_4
-
-uint32
-__sync_fetch_and_add_4 (uint32*, uint32)
-  __attribute__ ((visibility ("hidden")));
-
-uint32
-__sync_fetch_and_add_4 (uint32* ptr, uint32 add)
-{
-  int i;
-  uint32 ret;
-
-  i = pthread_mutex_lock (&sync_lock);
-  __go_assert (i == 0);
-
-  ret = *ptr;
-  *ptr += add;
-
-  i = pthread_mutex_unlock (&sync_lock);
-  __go_assert (i == 0);
-
-  return ret;
-}
-
-#endif
-
-#ifndef HAVE_SYNC_ADD_AND_FETCH_8
-
-uint64
-__sync_add_and_fetch_8 (uint64*, uint64)
-  __attribute__ ((visibility ("hidden")));
-
-uint64
-__sync_add_and_fetch_8 (uint64* ptr, uint64 add)
-{
-  int i;
-  uint64 ret;
-
-  i = pthread_mutex_lock (&sync_lock);
-  __go_assert (i == 0);
-
-  *ptr += add;
-  ret = *ptr;
-
-  i = pthread_mutex_unlock (&sync_lock);
-  __go_assert (i == 0);
-
-  return ret;
-}
-
-#endif
-
-uintptr
-runtime_memlimit(void)
-{
-	struct rlimit rl;
-	uintptr used;
-
-	if(getrlimit(RLIMIT_AS, &rl) != 0)
-		return 0;
-	if(rl.rlim_cur >= 0x7fffffff)
-		return 0;
-
-	// Estimate our VM footprint excluding the heap.
-	// Not an exact science: use size of binary plus
-	// some room for thread stacks.
-	used = (64<<20);
-	if(used >= rl.rlim_cur)
-		return 0;
-
-	// If there's not at least 16 MB left, we're probably
-	// not going to be able to do much.  Treat as no limit.
-	rl.rlim_cur -= used;
-	if(rl.rlim_cur < (16<<20))
-		return 0;
-
-	return rl.rlim_cur - used;
-}