cmd/internal/obj, runtime: add NOFRAME flag to suppress stack frame set up on ppc64x

Replace the confusing game where a frame size of $-8 would suppress the
implicit setting up of a stack frame with a nice explicit flag.

The code to set up the function prologue is still a little confusing but better
than it was.

Change-Id: I1d49278ff42c6bc734ebfb079998b32bc53f8d9a
Reviewed-on: https://go-review.googlesource.com/15670
Reviewed-by: Minux Ma <minux@golang.org>
diff --git a/misc/cgo/test/issue9400/asm_ppc64x.s b/misc/cgo/test/issue9400/asm_ppc64x.s
index 7dfe37e..9f80087 100644
--- a/misc/cgo/test/issue9400/asm_ppc64x.s
+++ b/misc/cgo/test/issue9400/asm_ppc64x.s
@@ -7,7 +7,7 @@
 
 #include "textflag.h"
 
-TEXT ·RewindAndSetgid(SB),NOSPLIT,$-8-0
+TEXT ·RewindAndSetgid(SB),NOSPLIT|NOFRAME,$0-0
 	// Rewind stack pointer so anything that happens on the stack
 	// will clobber the test pattern created by the caller
 	ADD	$(1024 * 8), R1
diff --git a/src/cmd/internal/obj/ppc64/asm9.go b/src/cmd/internal/obj/ppc64/asm9.go
index 745da9e..43ff683 100644
--- a/src/cmd/internal/obj/ppc64/asm9.go
+++ b/src/cmd/internal/obj/ppc64/asm9.go
@@ -412,7 +412,7 @@
 		return
 	}
 	ctxt.Cursym = cursym
-	ctxt.Autosize = int32(p.To.Offset + 8)
+	ctxt.Autosize = int32(p.To.Offset)
 
 	if oprange[AANDN&obj.AMask].start == nil {
 		buildop(ctxt)
diff --git a/src/cmd/internal/obj/ppc64/obj9.go b/src/cmd/internal/obj/ppc64/obj9.go
index d609f98..e1f3435 100644
--- a/src/cmd/internal/obj/ppc64/obj9.go
+++ b/src/cmd/internal/obj/ppc64/obj9.go
@@ -124,6 +124,19 @@
 
 	p := cursym.Text
 	textstksiz := p.To.Offset
+	if textstksiz == -8 {
+		// Compatibility hack.
+		p.From3.Offset |= obj.NOFRAME
+		textstksiz = 0
+	}
+	if textstksiz%8 != 0 {
+		ctxt.Diag("frame size %d not a multiple of 8", textstksiz)
+	}
+	if p.From3.Offset&obj.NOFRAME != 0 {
+		if textstksiz != 0 {
+			ctxt.Diag("NOFRAME functions must have a frame size of 0, not %d", textstksiz)
+		}
+	}
 
 	cursym.Args = p.To.Val.(int32)
 	cursym.Locals = int32(textstksiz)
@@ -314,13 +327,20 @@
 		case obj.ATEXT:
 			mov = AMOVD
 			aoffset = 0
-			autosize = int32(textstksiz + 8)
-			if (p.Mark&LEAF != 0) && autosize <= 8 {
-				autosize = 0
-			} else if autosize&4 != 0 {
-				autosize += 4
+			autosize = int32(textstksiz)
+
+			if p.Mark&LEAF != 0 && autosize == 0 && p.From3.Offset&obj.NOFRAME == 0 {
+				// A leaf function with no locals has no frame.
+				p.From3.Offset |= obj.NOFRAME
 			}
-			p.To.Offset = int64(autosize) - 8
+
+			if p.From3.Offset&obj.NOFRAME == 0 {
+				// If there is a stack frame at all, it includes
+				// space to save the LR.
+				autosize += 8
+			}
+
+			p.To.Offset = int64(autosize)
 
 			if p.From3.Offset&obj.NOSPLIT == 0 {
 				p = stacksplit(ctxt, p, autosize) // emit split check
@@ -344,11 +364,9 @@
 					q.Spadj = +autosize
 				}
 			} else if cursym.Text.Mark&LEAF == 0 {
-				if ctxt.Debugvlog != 0 {
-					fmt.Fprintf(ctxt.Bso, "save suppressed in: %s\n", cursym.Name)
-					ctxt.Bso.Flush()
-				}
-
+				// A very few functions that do not return to their caller
+				// (e.g. gogo) are not identified as leaves but still have
+				// no frame.
 				cursym.Text.Mark |= LEAF
 			}
 
diff --git a/src/cmd/internal/obj/textflag.go b/src/cmd/internal/obj/textflag.go
index 77766c9..3d3b3b8 100644
--- a/src/cmd/internal/obj/textflag.go
+++ b/src/cmd/internal/obj/textflag.go
@@ -39,4 +39,9 @@
 	// Allocate a word of thread local storage and store the offset from the
 	// thread local base to the thread local storage in this variable.
 	TLSBSS = 256
+
+	// Do not insert instructions to allocate a stack frame for this function.
+	// Only valid on functions that declare a frame size of 0.
+	// TODO(mwhudson): only implemented for ppc64x at present.
+	NOFRAME = 512
 )
diff --git a/src/runtime/asm_ppc64x.s b/src/runtime/asm_ppc64x.s
index 6646dd8..999f7ee 100644
--- a/src/runtime/asm_ppc64x.s
+++ b/src/runtime/asm_ppc64x.s
@@ -85,19 +85,19 @@
 DATA	runtime·mainPC+0(SB)/8,$runtime·main(SB)
 GLOBL	runtime·mainPC(SB),RODATA,$8
 
-TEXT runtime·breakpoint(SB),NOSPLIT,$-8-0
+TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	R0, 2(R0) // TODO: TD
 	RET
 
-TEXT runtime·asminit(SB),NOSPLIT,$-8-0
+TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
 	RET
 
-TEXT _cgo_reginit(SB),NOSPLIT,$-8-0
+TEXT _cgo_reginit(SB),NOSPLIT|NOFRAME,$0-0
 	// crosscall_ppc64 and crosscall2 need to reginit, but can't
 	// get at the 'runtime.reginit' symbol.
 	BR	runtime·reginit(SB)
 
-TEXT runtime·reginit(SB),NOSPLIT,$-8-0
+TEXT runtime·reginit(SB),NOSPLIT|NOFRAME,$0-0
 	// set R0 to zero, it's expected by the toolchain
 	XOR R0, R0
 	// initialize essential FP registers
@@ -114,7 +114,7 @@
 
 // void gosave(Gobuf*)
 // save state in Gobuf; setjmp
-TEXT runtime·gosave(SB), NOSPLIT, $-8-8
+TEXT runtime·gosave(SB), NOSPLIT|NOFRAME, $0-8
 	MOVD	buf+0(FP), R3
 	MOVD	R1, gobuf_sp(R3)
 	MOVD	LR, R31
@@ -127,7 +127,7 @@
 
 // void gogo(Gobuf*)
 // restore state from Gobuf; longjmp
-TEXT runtime·gogo(SB), NOSPLIT, $-8-8
+TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
 	MOVD	buf+0(FP), R5
 	MOVD	gobuf_g(R5), g	// make sure g is not nil
 	BL	runtime·save_g(SB)
@@ -151,7 +151,7 @@
 // Switch to m->g0's stack, call fn(g).
 // Fn must never return.  It should gogo(&g->sched)
 // to keep running g.
-TEXT runtime·mcall(SB), NOSPLIT, $-8-8
+TEXT runtime·mcall(SB), NOSPLIT|NOFRAME, $0-8
 	// Save caller state in g->sched
 	MOVD	R1, (g_sched+gobuf_sp)(g)
 	MOVD	LR, R31
@@ -262,7 +262,7 @@
 // the top of a stack (for example, morestack calling newstack
 // calling the scheduler calling newm calling gc), so we must
 // record an argument size. For that purpose, it has no arguments.
-TEXT runtime·morestack(SB),NOSPLIT,$-8-0
+TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
 	// Cannot grow scheduler stack (m->g0).
 	MOVD	g_m(g), R7
 	MOVD	m_g0(R7), R8
@@ -300,7 +300,7 @@
 	// is still in this function, and not the beginning of the next.
 	UNDEF
 
-TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0
+TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	R0, R11
 	BR	runtime·morestack(SB)
 
@@ -340,7 +340,7 @@
 TEXT reflect·call(SB), NOSPLIT, $0-0
 	BR	·reflectcall(SB)
 
-TEXT ·reflectcall(SB), NOSPLIT, $-8-32
+TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-32
 	MOVWZ argsize+24(FP), R3
 	// NOTE(rsc): No call16, because CALLFN needs four words
 	// of argument space to invoke callwritebarrier.
@@ -511,10 +511,10 @@
 TEXT runtime·casuintptr(SB), NOSPLIT, $0-25
 	BR	runtime·cas64(SB)
 
-TEXT runtime·atomicloaduintptr(SB), NOSPLIT, $-8-16
+TEXT runtime·atomicloaduintptr(SB), NOSPLIT|NOFRAME, $0-16
 	BR	runtime·atomicload64(SB)
 
-TEXT runtime·atomicloaduint(SB), NOSPLIT, $-8-16
+TEXT runtime·atomicloaduint(SB), NOSPLIT|NOFRAME, $0-16
 	BR	runtime·atomicload64(SB)
 
 TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16
@@ -669,7 +669,7 @@
 // 1. grab stored LR for caller
 // 2. sub 4 bytes to get back to BL deferreturn
 // 3. BR to fn
-TEXT runtime·jmpdefer(SB), NOSPLIT, $-8-16
+TEXT runtime·jmpdefer(SB), NOSPLIT|NOFRAME, $0-16
 	MOVD	0(R1), R31
 	SUB	$4, R31
 	MOVD	R31, LR
@@ -682,7 +682,7 @@
 	BR	(CTR)
 
 // Save state of caller into g->sched. Smashes R31.
-TEXT gosave<>(SB),NOSPLIT,$-8
+TEXT gosave<>(SB),NOSPLIT|NOFRAME,$0
 	MOVD	LR, R31
 	MOVD	R31, (g_sched+gobuf_pc)(g)
 	MOVD	R1, (g_sched+gobuf_sp)(g)
@@ -873,7 +873,7 @@
 
 // void setg_gcc(G*); set g in C TLS.
 // Must obey the gcc calling convention.
-TEXT setg_gcc<>(SB),NOSPLIT,$-8-0
+TEXT setg_gcc<>(SB),NOSPLIT|NOFRAME,$0-0
 	// The standard prologue clobbers R31, which is callee-save in
 	// the C ABI, so we have to use $-8-0 and save LR ourselves.
 	MOVD	LR, R4
@@ -921,7 +921,7 @@
 	MOVD	R3, ret+8(FP)
 	RET
 
-TEXT runtime·abort(SB),NOSPLIT,$-8-0
+TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R0
 	UNDEF
 
@@ -958,16 +958,16 @@
 	RET
 
 // AES hashing not implemented for ppc64
-TEXT runtime·aeshash(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshash(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
-TEXT runtime·aeshash32(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshash32(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
-TEXT runtime·aeshash64(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshash64(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
-TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0
+TEXT runtime·aeshashstr(SB),NOSPLIT|NOFRAME,$0-0
 	MOVW	(R0), R1
 
-TEXT runtime·memeq(SB),NOSPLIT,$-8-25
+TEXT runtime·memeq(SB),NOSPLIT|NOFRAME,$0-25
 	MOVD	a+0(FP), R3
 	MOVD	b+8(FP), R4
 	MOVD	size+16(FP), R5
@@ -1115,7 +1115,7 @@
 	MOVD	R3, ret+24(FP)
 	RET
 
-TEXT runtime·cmpstring(SB),NOSPLIT,$-4-40
+TEXT runtime·cmpstring(SB),NOSPLIT|NOFRAME,$0-40
 	MOVD	s1_base+0(FP), R5
 	MOVD	s1_len+8(FP), R3
 	MOVD	s2_base+16(FP), R6
@@ -1123,7 +1123,7 @@
 	MOVD	$ret+32(FP), R7
 	BR	runtime·cmpbody<>(SB)
 
-TEXT bytes·Compare(SB),NOSPLIT,$-4-56
+TEXT bytes·Compare(SB),NOSPLIT|NOFRAME,$0-56
 	MOVD	s1+0(FP), R5
 	MOVD	s1+8(FP), R3
 	MOVD	s2+24(FP), R6
@@ -1140,7 +1140,7 @@
 //
 // On exit:
 // R5, R6, R8, R9 and R10 are clobbered
-TEXT runtime·cmpbody<>(SB),NOSPLIT,$-4-0
+TEXT runtime·cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
 	CMP	R5, R6
 	BEQ	samebytes // same starting pointers; compare lengths
 	SUB	$1, R5
@@ -1191,7 +1191,7 @@
 
 // Called from cgo wrappers, this function returns g->m->curg.stack.hi.
 // Must obey the gcc calling convention.
-TEXT _cgo_topofstack(SB),NOSPLIT,$-8
+TEXT _cgo_topofstack(SB),NOSPLIT|NOFRAME,$0
 	// g (R30) and R31 are callee-save in the C ABI, so save them
 	MOVD	g, R4
 	MOVD	R31, R5
@@ -1209,7 +1209,7 @@
 
 // The top-most function running on a goroutine
 // returns to goexit+PCQuantum.
-TEXT runtime·goexit(SB),NOSPLIT,$-8-0
+TEXT runtime·goexit(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	R0, R0	// NOP
 	BL	runtime·goexit1(SB)	// does not return
 	// traceback from goexit1 must hit code range of goexit
diff --git a/src/runtime/atomic_ppc64x.s b/src/runtime/atomic_ppc64x.s
index 28c5bf3..47769e6 100644
--- a/src/runtime/atomic_ppc64x.s
+++ b/src/runtime/atomic_ppc64x.s
@@ -7,7 +7,7 @@
 #include "textflag.h"
 
 // uint32 runtime·atomicload(uint32 volatile* addr)
-TEXT ·atomicload(SB),NOSPLIT,$-8-12
+TEXT ·atomicload(SB),NOSPLIT|NOFRAME,$0-12
 	MOVD	addr+0(FP), R3
 	SYNC
 	MOVWZ	0(R3), R3
@@ -18,7 +18,7 @@
 	RET
 
 // uint64 runtime·atomicload64(uint64 volatile* addr)
-TEXT ·atomicload64(SB),NOSPLIT,$-8-16
+TEXT ·atomicload64(SB),NOSPLIT|NOFRAME,$0-16
 	MOVD	addr+0(FP), R3
 	SYNC
 	MOVD	0(R3), R3
@@ -29,7 +29,7 @@
 	RET
 
 // void *runtime·atomicloadp(void *volatile *addr)
-TEXT ·atomicloadp(SB),NOSPLIT,$-8-16
+TEXT ·atomicloadp(SB),NOSPLIT|NOFRAME,$0-16
 	MOVD	addr+0(FP), R3
 	SYNC
 	MOVD	0(R3), R3
@@ -39,7 +39,7 @@
 	MOVD	R3, ret+8(FP)
 	RET
 
-TEXT ·publicationBarrier(SB),NOSPLIT,$-8-0
+TEXT ·publicationBarrier(SB),NOSPLIT|NOFRAME,$0-0
 	// LWSYNC is the "export" barrier recommended by Power ISA
 	// v2.07 book II, appendix B.2.2.2.
 	// LWSYNC is a load/load, load/store, and store/store barrier.
diff --git a/src/runtime/cgo/asm_ppc64x.s b/src/runtime/cgo/asm_ppc64x.s
index 0c08a1d..f310471 100644
--- a/src/runtime/cgo/asm_ppc64x.s
+++ b/src/runtime/cgo/asm_ppc64x.s
@@ -11,7 +11,7 @@
  * Save registers and call fn with two arguments.
  * crosscall2 obeys the C ABI; fn obeys the Go ABI.
  */
-TEXT crosscall2(SB),NOSPLIT,$-8
+TEXT crosscall2(SB),NOSPLIT|NOFRAME,$0
 	// TODO(austin): ABI v1 (fn is probably a function descriptor)
 
 	// Start with standard C stack frame layout and linkage
@@ -41,7 +41,7 @@
 	MOVD	R0, LR
 	RET
 
-TEXT saveregs2<>(SB),NOSPLIT,$-8
+TEXT saveregs2<>(SB),NOSPLIT|NOFRAME,$0
 	// O=-288; for R in R{14..31}; do echo "\tMOVD\t$R, $O(R1)"|sed s/R30/g/; ((O+=8)); done; for F in F{14..31}; do echo "\tFMOVD\t$F, $O(R1)"; ((O+=8)); done
 	MOVD	R14, -288(R1)
 	MOVD	R15, -280(R1)
@@ -82,7 +82,7 @@
 
 	RET
 
-TEXT restoreregs2<>(SB),NOSPLIT,$-8
+TEXT restoreregs2<>(SB),NOSPLIT|NOFRAME,$0
 	// O=-288; for R in R{14..31}; do echo "\tMOVD\t$O(R1), $R"|sed s/R30/g/; ((O+=8)); done; for F in F{14..31}; do echo "\tFMOVD\t$O(R1), $F"; ((O+=8)); done
 	MOVD	-288(R1), R14
 	MOVD	-280(R1), R15
diff --git a/src/runtime/duff_ppc64x.s b/src/runtime/duff_ppc64x.s
index 14bc33e..c8204c4 100644
--- a/src/runtime/duff_ppc64x.s
+++ b/src/runtime/duff_ppc64x.s
@@ -6,7 +6,7 @@
 
 #include "textflag.h"
 
-TEXT runtime·duffzero(SB), NOSPLIT, $-8-0
+TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0
 	MOVDU	R0, 8(R3)
 	MOVDU	R0, 8(R3)
 	MOVDU	R0, 8(R3)
diff --git a/src/runtime/memclr_ppc64x.s b/src/runtime/memclr_ppc64x.s
index 90e2748..442faa2 100644
--- a/src/runtime/memclr_ppc64x.s
+++ b/src/runtime/memclr_ppc64x.s
@@ -7,7 +7,7 @@
 #include "textflag.h"
 
 // void runtime·memclr(void*, uintptr)
-TEXT runtime·memclr(SB),NOSPLIT,$0-16
+TEXT runtime·memclr(SB),NOSPLIT|NOFRAME,$0-16
 	MOVD	ptr+0(FP), R3
 	MOVD	n+8(FP), R4
 	SRADCC	$3, R4, R6	// R6 is the number of words to zero
diff --git a/src/runtime/memmove_ppc64x.s b/src/runtime/memmove_ppc64x.s
index 72c90de..b6d0b85 100644
--- a/src/runtime/memmove_ppc64x.s
+++ b/src/runtime/memmove_ppc64x.s
@@ -7,7 +7,7 @@
 #include "textflag.h"
 
 // void runtime·memmove(void*, void*, uintptr)
-TEXT runtime·memmove(SB), NOSPLIT, $-8-24
+TEXT runtime·memmove(SB), NOSPLIT|NOFRAME, $0-24
 	MOVD	to+0(FP), R3
 	MOVD	from+8(FP), R4
 	MOVD	n+16(FP), R5
diff --git a/src/runtime/mkduff.go b/src/runtime/mkduff.go
index 9187666..821906c 100644
--- a/src/runtime/mkduff.go
+++ b/src/runtime/mkduff.go
@@ -173,11 +173,11 @@
 	// R0: always zero
 	// R3 (aka REGRT1): ptr to memory to be zeroed - 8
 	// On return, R3 points to the last zeroed dword.
-	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT, $-8-0")
+	fmt.Fprintln(w, "TEXT runtime·duffzero(SB), NOSPLIT|NOFRAME, $0-0")
 	for i := 0; i < 128; i++ {
 		fmt.Fprintln(w, "\tMOVDU\tR0, 8(R3)")
 	}
-	fmt.Fprintln(w, "\tRETURN")
+	fmt.Fprintln(w, "\tRET")
 }
 
 func copyPPC64x(w io.Writer) {
diff --git a/src/runtime/sys_linux_ppc64x.s b/src/runtime/sys_linux_ppc64x.s
index 01575f8..a5dafd4 100644
--- a/src/runtime/sys_linux_ppc64x.s
+++ b/src/runtime/sys_linux_ppc64x.s
@@ -47,17 +47,17 @@
 #define SYS_clock_gettime	246
 #define SYS_epoll_create1	315
 
-TEXT runtime·exit(SB),NOSPLIT,$-8-4
+TEXT runtime·exit(SB),NOSPLIT|NOFRAME,$0-4
 	MOVW	code+0(FP), R3
 	SYSCALL	$SYS_exit_group
 	RET
 
-TEXT runtime·exit1(SB),NOSPLIT,$-8-4
+TEXT runtime·exit1(SB),NOSPLIT|NOFRAME,$0-4
 	MOVW	code+0(FP), R3
 	SYSCALL	$SYS_exit
 	RET
 
-TEXT runtime·open(SB),NOSPLIT,$-8-20
+TEXT runtime·open(SB),NOSPLIT|NOFRAME,$0-20
 	MOVD	name+0(FP), R3
 	MOVW	mode+8(FP), R4
 	MOVW	perm+12(FP), R5
@@ -67,7 +67,7 @@
 	MOVW	R3, ret+16(FP)
 	RET
 
-TEXT runtime·closefd(SB),NOSPLIT,$-8-12
+TEXT runtime·closefd(SB),NOSPLIT|NOFRAME,$0-12
 	MOVW	fd+0(FP), R3
 	SYSCALL	$SYS_close
 	BVC	2(PC)
@@ -75,7 +75,7 @@
 	MOVW	R3, ret+8(FP)
 	RET
 
-TEXT runtime·write(SB),NOSPLIT,$-8-28
+TEXT runtime·write(SB),NOSPLIT|NOFRAME,$0-28
 	MOVD	fd+0(FP), R3
 	MOVD	p+8(FP), R4
 	MOVW	n+16(FP), R5
@@ -85,7 +85,7 @@
 	MOVW	R3, ret+24(FP)
 	RET
 
-TEXT runtime·read(SB),NOSPLIT,$-8-28
+TEXT runtime·read(SB),NOSPLIT|NOFRAME,$0-28
 	MOVW	fd+0(FP), R3
 	MOVD	p+8(FP), R4
 	MOVW	n+16(FP), R5
@@ -95,7 +95,7 @@
 	MOVW	R3, ret+24(FP)
 	RET
 
-TEXT runtime·getrlimit(SB),NOSPLIT,$-8-20
+TEXT runtime·getrlimit(SB),NOSPLIT|NOFRAME,$0-20
 	MOVW	kind+0(FP), R3
 	MOVD	limit+8(FP), R4
 	SYSCALL	$SYS_ugetrlimit
@@ -126,28 +126,28 @@
 	MOVW	R3, ret+0(FP)
 	RET
 
-TEXT runtime·raise(SB),NOSPLIT,$-8
+TEXT runtime·raise(SB),NOSPLIT|NOFRAME,$0
 	SYSCALL	$SYS_gettid
 	MOVW	R3, R3	// arg 1 tid
 	MOVW	sig+0(FP), R4	// arg 2
 	SYSCALL	$SYS_tkill
 	RET
 
-TEXT runtime·raiseproc(SB),NOSPLIT,$-8
+TEXT runtime·raiseproc(SB),NOSPLIT|NOFRAME,$0
 	SYSCALL	$SYS_getpid
 	MOVW	R3, R3	// arg 1 pid
 	MOVW	sig+0(FP), R4	// arg 2
 	SYSCALL	$SYS_kill
 	RET
 
-TEXT runtime·setitimer(SB),NOSPLIT,$-8-24
+TEXT runtime·setitimer(SB),NOSPLIT|NOFRAME,$0-24
 	MOVW	mode+0(FP), R3
 	MOVD	new+8(FP), R4
 	MOVD	old+16(FP), R5
 	SYSCALL	$SYS_setitimer
 	RET
 
-TEXT runtime·mincore(SB),NOSPLIT,$-8-28
+TEXT runtime·mincore(SB),NOSPLIT|NOFRAME,$0-28
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	MOVD	dst+16(FP), R5
@@ -182,7 +182,7 @@
 	MOVD	R3, ret+0(FP)
 	RET
 
-TEXT runtime·rtsigprocmask(SB),NOSPLIT,$-8-28
+TEXT runtime·rtsigprocmask(SB),NOSPLIT|NOFRAME,$0-28
 	MOVW	sig+0(FP), R3
 	MOVD	new+8(FP), R4
 	MOVD	old+16(FP), R5
@@ -192,7 +192,7 @@
 	MOVD	R0, 0xf1(R0)	// crash
 	RET
 
-TEXT runtime·rt_sigaction(SB),NOSPLIT,$-8-36
+TEXT runtime·rt_sigaction(SB),NOSPLIT|NOFRAME,$0-36
 	MOVD	sig+0(FP), R3
 	MOVD	new+8(FP), R4
 	MOVD	old+16(FP), R5
@@ -215,7 +215,7 @@
 TEXT runtime·sigtramp(SB),NOSPLIT,$64
 #else
 // function descriptor for the real sigtramp
-TEXT runtime·sigtramp(SB),NOSPLIT,$-8
+TEXT runtime·sigtramp(SB),NOSPLIT|NOFRAME,$0
 	DWORD	$runtime·_sigtramp(SB)
 	DWORD	$0
 	DWORD	$0
@@ -239,7 +239,7 @@
 	BL	(CTR)
 	RET
 
-TEXT runtime·mmap(SB),NOSPLIT,$-8
+TEXT runtime·mmap(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	MOVW	prot+16(FP), R5
@@ -251,7 +251,7 @@
 	MOVD	R3, ret+32(FP)
 	RET
 
-TEXT runtime·munmap(SB),NOSPLIT,$-8
+TEXT runtime·munmap(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	SYSCALL	$SYS_munmap
@@ -259,7 +259,7 @@
 	MOVD	R0, 0xf3(R0)
 	RET
 
-TEXT runtime·madvise(SB),NOSPLIT,$-8
+TEXT runtime·madvise(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVD	n+8(FP), R4
 	MOVW	flags+16(FP), R5
@@ -269,7 +269,7 @@
 
 // int64 futex(int32 *uaddr, int32 op, int32 val,
 //	struct timespec *timeout, int32 *uaddr2, int32 val2);
-TEXT runtime·futex(SB),NOSPLIT,$-8
+TEXT runtime·futex(SB),NOSPLIT|NOFRAME,$0
 	MOVD	addr+0(FP), R3
 	MOVW	op+8(FP), R4
 	MOVW	val+12(FP), R5
@@ -281,7 +281,7 @@
 	RET
 
 // int64 clone(int32 flags, void *stk, M *mp, G *gp, void (*fn)(void));
-TEXT runtime·clone(SB),NOSPLIT,$-8
+TEXT runtime·clone(SB),NOSPLIT|NOFRAME,$0
 	MOVW	flags+0(FP), R3
 	MOVD	stk+8(FP), R4
 
@@ -344,7 +344,7 @@
 	SYSCALL	$SYS_exit
 	BR	-2(PC)	// keep exiting
 
-TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
+TEXT runtime·sigaltstack(SB),NOSPLIT|NOFRAME,$0
 	MOVD	new+0(FP), R3
 	MOVD	old+8(FP), R4
 	SYSCALL	$SYS_sigaltstack
@@ -352,11 +352,11 @@
 	MOVD	R0, 0xf1(R0)  // crash
 	RET
 
-TEXT runtime·osyield(SB),NOSPLIT,$-8
+TEXT runtime·osyield(SB),NOSPLIT|NOFRAME,$0
 	SYSCALL	$SYS_sched_yield
 	RET
 
-TEXT runtime·sched_getaffinity(SB),NOSPLIT,$-8
+TEXT runtime·sched_getaffinity(SB),NOSPLIT|NOFRAME,$0
 	MOVD	pid+0(FP), R3
 	MOVD	len+8(FP), R4
 	MOVD	buf+16(FP), R5
@@ -365,21 +365,21 @@
 	RET
 
 // int32 runtime·epollcreate(int32 size);
-TEXT runtime·epollcreate(SB),NOSPLIT,$-8
+TEXT runtime·epollcreate(SB),NOSPLIT|NOFRAME,$0
 	MOVW    size+0(FP), R3
 	SYSCALL	$SYS_epoll_create
 	MOVW	R3, ret+8(FP)
 	RET
 
 // int32 runtime·epollcreate1(int32 flags);
-TEXT runtime·epollcreate1(SB),NOSPLIT,$-8
+TEXT runtime·epollcreate1(SB),NOSPLIT|NOFRAME,$0
 	MOVW	flags+0(FP), R3
 	SYSCALL	$SYS_epoll_create1
 	MOVW	R3, ret+8(FP)
 	RET
 
 // func epollctl(epfd, op, fd int32, ev *epollEvent) int
-TEXT runtime·epollctl(SB),NOSPLIT,$-8
+TEXT runtime·epollctl(SB),NOSPLIT|NOFRAME,$0
 	MOVW	epfd+0(FP), R3
 	MOVW	op+4(FP), R4
 	MOVW	fd+8(FP), R5
@@ -389,7 +389,7 @@
 	RET
 
 // int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
-TEXT runtime·epollwait(SB),NOSPLIT,$-8
+TEXT runtime·epollwait(SB),NOSPLIT|NOFRAME,$0
 	MOVW	epfd+0(FP), R3
 	MOVD	ev+8(FP), R4
 	MOVW	nev+16(FP), R5
@@ -399,7 +399,7 @@
 	RET
 
 // void runtime·closeonexec(int32 fd);
-TEXT runtime·closeonexec(SB),NOSPLIT,$-8
+TEXT runtime·closeonexec(SB),NOSPLIT|NOFRAME,$0
 	MOVW    fd+0(FP), R3  // fd
 	MOVD    $2, R4  // F_SETFD
 	MOVD    $1, R5  // FD_CLOEXEC
diff --git a/src/runtime/textflag.h b/src/runtime/textflag.h
index f2690c9..dbf3d99 100644
--- a/src/runtime/textflag.h
+++ b/src/runtime/textflag.h
@@ -24,3 +24,7 @@
 // Allocate a word of thread local storage and store the offset from the
 // thread local base to the thread local storage in this variable.
 #define TLSBSS	256
+// Do not insert instructions to allocate a stack frame for this function.
+// Only valid on functions that declare a frame size of 0.
+// TODO(mwhudson): only implemented for ppc64x at present.
+#define NOFRAME 512
diff --git a/src/runtime/tls_ppc64x.s b/src/runtime/tls_ppc64x.s
index 127e702..d930718 100644
--- a/src/runtime/tls_ppc64x.s
+++ b/src/runtime/tls_ppc64x.s
@@ -22,7 +22,7 @@
 // If !iscgo, this is a no-op.
 //
 // NOTE: setg_gcc<> assume this clobbers only R31.
-TEXT runtime·save_g(SB),NOSPLIT,$-8-0
+TEXT runtime·save_g(SB),NOSPLIT|NOFRAME,$0-0
 	MOVB	runtime·iscgo(SB), R31
 	CMP	R31, $0
 	BEQ	nocgo
@@ -50,7 +50,7 @@
 // usual Go registers aren't set up.
 //
 // NOTE: _cgo_topofstack assumes this only clobbers g (R30), and R31.
-TEXT runtime·load_g(SB),NOSPLIT,$-8-0
+TEXT runtime·load_g(SB),NOSPLIT|NOFRAME,$0-0
 	MOVD	$runtime·tls_g(SB), R31
 	// R13 is the C ABI TLS base pointer + 0x7000
 	ADD	R13, R31