changed 5c calling convention to use stack exclusively for in
params. a number of fixes to assembly routines that assumed R0
had the first arg. one stack offset fix, arm pushes the link
register on stack top.

go/test: passes 65% (235/364) tests

R=rsc
APPROVED=rsc
DELTA=20  (11 added, 0 deleted, 9 changed)
OCL=34809
CL=34812
diff --git a/src/cmd/5g/ggen.c b/src/cmd/5g/ggen.c
index 00b44b0..9f4cc35 100644
--- a/src/cmd/5g/ggen.c
+++ b/src/cmd/5g/ggen.c
@@ -275,7 +275,7 @@
 	nod.val.u.reg = REGSP;
 	nod.addable = 1;
 
-	nod.xoffset = fp->width;
+	nod.xoffset = fp->width + 4; // +4: saved lr at 0(SP)
 	nod.type = fp->type;
 	cgen_as(res, &nod);
 }
@@ -305,7 +305,7 @@
 	nod1.val.u.reg = REGSP;
 	nod1.addable = 1;
 
-	nod1.xoffset = fp->width;
+	nod1.xoffset = fp->width + 4; // +4: saved lr at 0(SP)
 	nod1.type = fp->type;
 
 	if(res->op != OREGISTER) {
diff --git a/src/cmd/5g/gobj.c b/src/cmd/5g/gobj.c
index 8cb1a54..bc29ba2 100644
--- a/src/cmd/5g/gobj.c
+++ b/src/cmd/5g/gobj.c
@@ -621,6 +621,9 @@
 void
 genembedtramp(Type *rcvr, Type *method, Sym *newnam)
 {
+	// TODO(kaib): re-implement genembedtramp
+	genwrapper(rcvr, method, newnam);
+/*
 	Sym *e;
 	int c, d, o;
 	Prog *p;
@@ -692,6 +695,7 @@
 //print("4. %P\n", p);
 
 	pc->as = ARET;	// overwrite AEND
+*/
 }
 
 void
diff --git a/src/cmd/5l/5.out.h b/src/cmd/5l/5.out.h
index 3901803..f78b3f6 100644
--- a/src/cmd/5l/5.out.h
+++ b/src/cmd/5l/5.out.h
@@ -38,7 +38,8 @@
 #define	ALLTHUMBS	(1<<3)
 
 #define	REGRET		0
-#define	REGARG		0
+/* -1 disables use of REGARG */
+#define	REGARG		-1
 /* compiler allocates R1 up as temps */
 /* compiler allocates register variables R3 up */
 #define	REGEXT		10
diff --git a/src/pkg/runtime/arm/asm.s b/src/pkg/runtime/arm/asm.s
index c43b6ee..18e17be 100644
--- a/src/pkg/runtime/arm/asm.s
+++ b/src/pkg/runtime/arm/asm.s
@@ -92,6 +92,7 @@
 // uintptr gosave(Gobuf*)
 // save state in Gobuf; setjmp
 TEXT gosave(SB), 7, $0
+	MOVW	0(FP), R0
 	MOVW	SP, gobuf_sp(R0)
 	MOVW	LR, gobuf_pc(R0)
 	MOVW	g, gobuf_g(R0)
@@ -101,8 +102,8 @@
 // void gogo(Gobuf*, uintptr)
 // restore state from Gobuf; longjmp
 TEXT	gogo(SB), 7, $0
-	MOVW	R0, R1			// gobuf
-	MOVW	8(SP), R0		// return 2nd arg
+	MOVW	0(FP), R1			// gobuf
+	MOVW	4(FP), R0		// return 2nd arg
 	MOVW	gobuf_g(R1), g
 	MOVW	0(g), R2		// make sure g != nil
 	MOVW	gobuf_sp(R1), SP	// restore SP
@@ -113,7 +114,8 @@
 // (call fn, returning to state in Gobuf)
 // using frame size $-4 means do not save LR on stack.
 TEXT gogocall(SB), 7, $-4
-	MOVW	8(SP), R1		// fn
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1		// fn
 	MOVW	gobuf_g(R0), g
 	MOVW	0(g), R2		// make sure g != nil
 	MOVW	gobuf_sp(R0), SP	// restore SP
@@ -222,6 +224,7 @@
 #define	STREX(a,v,r)	WORD	$(0xe<<28|0x01800f90 | (a)<<16 | (r)<<12 | (v)<<0)
 
 TEXT	cas+0(SB),0,$12		/* r0 holds p */
+	MOVW	0(FP), R0
 	MOVW	ov+4(FP), R1
 	MOVW	nv+8(FP), R2
 spin:
@@ -253,7 +256,7 @@
 //	JMP	AX	// but first run the deferred function
 
 TEXT	sys·memclr(SB),7,$20
-// R0 = addr and passes implicitly to memset
+	MOVW	0(FP), R0
 	MOVW	$0, R1		// c = 0
 	MOVW	R1, -16(SP)
 	MOVW	4(FP), R1	// n
diff --git a/src/pkg/runtime/arm/vlop.s b/src/pkg/runtime/arm/vlop.s
index 56771ec..2c1d583 100644
--- a/src/pkg/runtime/arm/vlop.s
+++ b/src/pkg/runtime/arm/vlop.s
@@ -31,6 +31,7 @@
 /* replaced use of R10 by R11 because the former can be the data segment base register */
 
 TEXT	_mulv(SB), $0
+	MOVW	0(FP), R0
 	MOVW	8(FP), R2		/* l0 */
 	MOVW	4(FP), R3	  /* h0 */
 	MOVW	16(FP), R4	  /* l1 */
diff --git a/src/pkg/runtime/linux/arm/sys.s b/src/pkg/runtime/linux/arm/sys.s
index c61d08f..d7eb43e 100644
--- a/src/pkg/runtime/linux/arm/sys.s
+++ b/src/pkg/runtime/linux/arm/sys.s
@@ -12,8 +12,9 @@
 #define SYS_mmap2 (SYS_BASE + 192)
 
 TEXT write(SB),7,$0
-	MOVW	8(SP), R1
-	MOVW	12(SP), R2
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	8(FP), R2
     	SWI	$SYS_write
 	RET
 
@@ -22,6 +23,7 @@
 	SWI	$SYS_exit
 
 TEXT sys·mmap(SB),7,$0
+	MOVW	0(FP), R0
 	MOVW	4(FP), R1
 	MOVW	8(FP), R2
 	MOVW	12(FP), R3