cmd/cgo, runtime/cgo: support ppc64

This implements support for calls to and from C in the ppc64 C ABI, as
well as supporting functionality such as an entry point from the
dynamic linker.

Change-Id: I68da6df50d5638cb1a3d3fef773fb412d7bf631a
Reviewed-on: https://go-review.googlesource.com/2009
Reviewed-by: Russ Cox <rsc@golang.org>
diff --git a/src/runtime/cgo/asm_ppc64x.s b/src/runtime/cgo/asm_ppc64x.s
new file mode 100644
index 0000000..0c08a1d
--- /dev/null
+++ b/src/runtime/cgo/asm_ppc64x.s
@@ -0,0 +1,124 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+#include "textflag.h"
+
+/*
+ * void crosscall2(void (*fn)(void*, int32), void*, int32)
+ * Save registers and call fn with two arguments.
+ * crosscall2 obeys the C ABI; fn obeys the Go ABI.
+ */
+TEXT crosscall2(SB),NOSPLIT,$-8
+	// TODO(austin): ABI v1 (fn is probably a function descriptor)
+
+	// Start with standard C stack frame layout and linkage
+	MOVD	LR, R0
+	MOVD	R0, 16(R1)	// Save LR in caller's frame
+	MOVD	R2, 24(R1)	// Save TOC in caller's frame
+
+	BL	saveregs2<>(SB)
+
+	MOVDU	R1, (-288-3*8)(R1)
+
+	// Initialize Go ABI environment
+	BL	runtime·reginit(SB)
+	BL	runtime·load_g(SB)
+
+	MOVD	R3, CTR
+	MOVD	R4, 8(R1)
+	MOVD	R5, 16(R1)
+	BL	(CTR)
+
+	ADD	$(288+3*8), R1
+
+	BL	restoreregs2<>(SB)
+
+	MOVD	24(R1), R2
+	MOVD	16(R1), R0
+	MOVD	R0, LR
+	RET
+
+TEXT saveregs2<>(SB),NOSPLIT,$-8
+	// O=-288; for R in R{14..31}; do echo "\tMOVD\t$R, $O(R1)"|sed s/R30/g/; ((O+=8)); done; for F in F{14..31}; do echo "\tFMOVD\t$F, $O(R1)"; ((O+=8)); done
+	MOVD	R14, -288(R1)
+	MOVD	R15, -280(R1)
+	MOVD	R16, -272(R1)
+	MOVD	R17, -264(R1)
+	MOVD	R18, -256(R1)
+	MOVD	R19, -248(R1)
+	MOVD	R20, -240(R1)
+	MOVD	R21, -232(R1)
+	MOVD	R22, -224(R1)
+	MOVD	R23, -216(R1)
+	MOVD	R24, -208(R1)
+	MOVD	R25, -200(R1)
+	MOVD	R26, -192(R1)
+	MOVD	R27, -184(R1)
+	MOVD	R28, -176(R1)
+	MOVD	R29, -168(R1)
+	MOVD	g, -160(R1)
+	MOVD	R31, -152(R1)
+	FMOVD	F14, -144(R1)
+	FMOVD	F15, -136(R1)
+	FMOVD	F16, -128(R1)
+	FMOVD	F17, -120(R1)
+	FMOVD	F18, -112(R1)
+	FMOVD	F19, -104(R1)
+	FMOVD	F20, -96(R1)
+	FMOVD	F21, -88(R1)
+	FMOVD	F22, -80(R1)
+	FMOVD	F23, -72(R1)
+	FMOVD	F24, -64(R1)
+	FMOVD	F25, -56(R1)
+	FMOVD	F26, -48(R1)
+	FMOVD	F27, -40(R1)
+	FMOVD	F28, -32(R1)
+	FMOVD	F29, -24(R1)
+	FMOVD	F30, -16(R1)
+	FMOVD	F31, -8(R1)
+
+	RET
+
+TEXT restoreregs2<>(SB),NOSPLIT,$-8
+	// O=-288; for R in R{14..31}; do echo "\tMOVD\t$O(R1), $R"|sed s/R30/g/; ((O+=8)); done; for F in F{14..31}; do echo "\tFMOVD\t$O(R1), $F"; ((O+=8)); done
+	MOVD	-288(R1), R14
+	MOVD	-280(R1), R15
+	MOVD	-272(R1), R16
+	MOVD	-264(R1), R17
+	MOVD	-256(R1), R18
+	MOVD	-248(R1), R19
+	MOVD	-240(R1), R20
+	MOVD	-232(R1), R21
+	MOVD	-224(R1), R22
+	MOVD	-216(R1), R23
+	MOVD	-208(R1), R24
+	MOVD	-200(R1), R25
+	MOVD	-192(R1), R26
+	MOVD	-184(R1), R27
+	MOVD	-176(R1), R28
+	MOVD	-168(R1), R29
+	MOVD	-160(R1), g
+	MOVD	-152(R1), R31
+	FMOVD	-144(R1), F14
+	FMOVD	-136(R1), F15
+	FMOVD	-128(R1), F16
+	FMOVD	-120(R1), F17
+	FMOVD	-112(R1), F18
+	FMOVD	-104(R1), F19
+	FMOVD	-96(R1), F20
+	FMOVD	-88(R1), F21
+	FMOVD	-80(R1), F22
+	FMOVD	-72(R1), F23
+	FMOVD	-64(R1), F24
+	FMOVD	-56(R1), F25
+	FMOVD	-48(R1), F26
+	FMOVD	-40(R1), F27
+	FMOVD	-32(R1), F28
+	FMOVD	-24(R1), F29
+	FMOVD	-16(R1), F30
+	FMOVD	-8(R1), F31
+
+	RET
diff --git a/src/runtime/cgo/gcc_linux_ppc64x.c b/src/runtime/cgo/gcc_linux_ppc64x.c
new file mode 100644
index 0000000..b176295
--- /dev/null
+++ b/src/runtime/cgo/gcc_linux_ppc64x.c
@@ -0,0 +1,70 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include "libcgo.h"
+
+static void *threadentry(void*);
+
+void (*x_cgo_inittls)(void **tlsg, void **tlsbase);
+static void (*setg_gcc)(void*);
+
+void
+x_cgo_init(G *g, void (*setg)(void*), void **tlsbase)
+{
+	pthread_attr_t attr;
+	size_t size;
+
+	setg_gcc = setg;
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	g->stacklo = (uintptr)&attr - size + 4096;
+	pthread_attr_destroy(&attr);
+}
+
+void
+_cgo_sys_thread_start(ThreadStart *ts)
+{
+	pthread_attr_t attr;
+	sigset_t ign, oset;
+	pthread_t p;
+	size_t size;
+	int err;
+
+	sigfillset(&ign);
+	pthread_sigmask(SIG_SETMASK, &ign, &oset);
+
+	pthread_attr_init(&attr);
+	pthread_attr_getstacksize(&attr, &size);
+	// Leave stacklo=0 and set stackhi=size; mstack will do the rest.
+	ts->g->stackhi = size;
+	err = pthread_create(&p, &attr, threadentry, ts);
+
+	pthread_sigmask(SIG_SETMASK, &oset, nil);
+
+	if (err != 0) {
+		fatalf("pthread_create failed: %s", strerror(err));
+	}
+}
+
+extern void crosscall_ppc64(void (*fn)(void), void *g);
+
+static void*
+threadentry(void *v)
+{
+	ThreadStart ts;
+
+	ts = *(ThreadStart*)v;
+	free(v);
+
+	// Save g for this thread in C TLS
+	setg_gcc((void*)ts.g);
+
+	crosscall_ppc64(ts.fn, (void*)ts.g);
+	return nil;
+}
diff --git a/src/runtime/cgo/gcc_ppc64x.S b/src/runtime/cgo/gcc_ppc64x.S
new file mode 100644
index 0000000..fc20277
--- /dev/null
+++ b/src/runtime/cgo/gcc_ppc64x.S
@@ -0,0 +1,140 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build ppc64 ppc64le
+
+/*
+ * Apple still insists on underscore prefixes for C function names.
+ */
+#if defined(__APPLE__)
+#define EXT(s) _##s
+#else
+#define EXT(s) s
+#endif
+
+/*
+ * void crosscall_ppc64(void (*fn)(void), void *g)
+ *
+ * Calling into the 9g tool chain, where all registers are caller save.
+ * Called from standard ppc64 C ABI, where r2, r14-r31, f14-f31 are
+ * callee-save, so they must be saved explicitly.
+ */
+.globl EXT(crosscall_ppc64)
+EXT(crosscall_ppc64):
+	// Start with standard C stack frame layout and linkage
+	mflr	%r0
+	std	%r0, 16(%r1)	// Save LR in caller's frame
+	std	%r2, 24(%r1)	// Save TOC in caller's frame
+	bl	saveregs
+	stdu	%r1, -296(%r1)
+
+	// Set up Go ABI constant registers
+	bl	_cgo_reginit
+
+	// Restore g pointer (r30 in Go ABI, which may have been clobbered by C)
+	mr	%r30, %r4
+
+	// Call fn
+	mtctr	%r3
+	bctrl
+
+	addi	%r1, %r1, 296
+	bl	restoreregs
+	ld	%r2, 24(%r1)
+	ld	%r0, 16(%r1)
+	mtlr	%r0
+	blr
+
+saveregs:
+	// Save callee-save registers
+	// O=-288; for R in %r{14..31}; do echo "\tstd\t$R, $O(%r1)"; ((O+=8)); done; for F in f{14..31}; do echo "\tstfd\t$F, $O(%r1)"; ((O+=8)); done
+	std	%r14, -288(%r1)
+	std	%r15, -280(%r1)
+	std	%r16, -272(%r1)
+	std	%r17, -264(%r1)
+	std	%r18, -256(%r1)
+	std	%r19, -248(%r1)
+	std	%r20, -240(%r1)
+	std	%r21, -232(%r1)
+	std	%r22, -224(%r1)
+	std	%r23, -216(%r1)
+	std	%r24, -208(%r1)
+	std	%r25, -200(%r1)
+	std	%r26, -192(%r1)
+	std	%r27, -184(%r1)
+	std	%r28, -176(%r1)
+	std	%r29, -168(%r1)
+	std	%r30, -160(%r1)
+	std	%r31, -152(%r1)
+	stfd	%f14, -144(%r1)
+	stfd	%f15, -136(%r1)
+	stfd	%f16, -128(%r1)
+	stfd	%f17, -120(%r1)
+	stfd	%f18, -112(%r1)
+	stfd	%f19, -104(%r1)
+	stfd	%f20, -96(%r1)
+	stfd	%f21, -88(%r1)
+	stfd	%f22, -80(%r1)
+	stfd	%f23, -72(%r1)
+	stfd	%f24, -64(%r1)
+	stfd	%f25, -56(%r1)
+	stfd	%f26, -48(%r1)
+	stfd	%f27, -40(%r1)
+	stfd	%f28, -32(%r1)
+	stfd	%f29, -24(%r1)
+	stfd	%f30, -16(%r1)
+	stfd	%f31, -8(%r1)
+
+	blr
+
+restoreregs:
+	// O=-288; for R in %r{14..31}; do echo "\tld\t$R, $O(%r1)"; ((O+=8)); done; for F in %f{14..31}; do echo "\tlfd\t$F, $O(%r1)"; ((O+=8)); done
+	ld	%r14, -288(%r1)
+	ld	%r15, -280(%r1)
+	ld	%r16, -272(%r1)
+	ld	%r17, -264(%r1)
+	ld	%r18, -256(%r1)
+	ld	%r19, -248(%r1)
+	ld	%r20, -240(%r1)
+	ld	%r21, -232(%r1)
+	ld	%r22, -224(%r1)
+	ld	%r23, -216(%r1)
+	ld	%r24, -208(%r1)
+	ld	%r25, -200(%r1)
+	ld	%r26, -192(%r1)
+	ld	%r27, -184(%r1)
+	ld	%r28, -176(%r1)
+	ld	%r29, -168(%r1)
+	ld	%r30, -160(%r1)
+	ld	%r31, -152(%r1)
+	lfd	%f14, -144(%r1)
+	lfd	%f15, -136(%r1)
+	lfd	%f16, -128(%r1)
+	lfd	%f17, -120(%r1)
+	lfd	%f18, -112(%r1)
+	lfd	%f19, -104(%r1)
+	lfd	%f20, -96(%r1)
+	lfd	%f21, -88(%r1)
+	lfd	%f22, -80(%r1)
+	lfd	%f23, -72(%r1)
+	lfd	%f24, -64(%r1)
+	lfd	%f25, -56(%r1)
+	lfd	%f26, -48(%r1)
+	lfd	%f27, -40(%r1)
+	lfd	%f28, -32(%r1)
+	lfd	%f29, -24(%r1)
+	lfd	%f30, -16(%r1)
+	lfd	%f31, -8(%r1)
+
+	blr
+
+.globl EXT(__stack_chk_fail_local)
+EXT(__stack_chk_fail_local):
+1:
+	// TODO(austin)
+	b 1b
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif