cmd/cc, runtime: convert C compilers to use Go calling convention

To date, the C compilers and Go compilers differed only in how
values were returned from functions. This made it difficult to call
Go from C or C from Go if return values were involved. It also made
assembly called from Go and assembly called from C different.

This CL changes the C compiler to use the Go conventions, passing
results on the stack, after the arguments.
[Exception: this does not apply to C ... functions, because you can't
know where on the stack the arguments end.]

By doing this, the CL makes it possible to rewrite C functions into Go
one at a time, without worrying about which languages call that
function or which languages it calls.

This CL also updates all the assembly files in package runtime to use
the new conventions. Argument references of the form 40(SP) have
been rewritten to the form name+10(FP) instead, and there are now
Go func prototypes for every assembly function called from C or Go.
This means that 'go vet runtime' checks effectively every assembly
function, and go vet's output was used to automate the bulk of the
conversion.

Some functions, like seek and nsec on Plan 9, needed to be rewritten.

Many assembly routines called from C were reading arguments
incorrectly, using MOVL instead of MOVQ or vice versa, especially on
the less used systems like openbsd.
These were found by go vet and have been corrected too.
If we're lucky, this may reduce flakiness on those systems.

Tested on:
        darwin/386
        darwin/amd64
        linux/arm
        linux/386
        linux/amd64
If this breaks another system, the bug is almost certainly in the
sys_$GOOS_$GOARCH.s file, since the rest of the CL is tested
by the combination of the above systems.

LGTM=dvyukov, iant
R=golang-codereviews, 0intro, dave, alex.brainman, dvyukov, iant
CC=golang-codereviews, josharian, r
https://golang.org/cl/135830043
diff --git a/src/cmd/5c/cgen.c b/src/cmd/5c/cgen.c
index 08ed360..5ea8eea 100644
--- a/src/cmd/5c/cgen.c
+++ b/src/cmd/5c/cgen.c
@@ -46,7 +46,7 @@
 	}
 	if(n == Z || n->type == T)
 		return;
-	if(typesuv[n->type->etype]) {
+	if(typesuv[n->type->etype] && (n->op != OFUNC || nn != Z)) {
 		sugen(n, nn, n->type->width);
 		return;
 	}
@@ -75,7 +75,7 @@
 	if(r != Z && r->complex >= FNX)
 	switch(o) {
 	default:
-		regret(&nod, r);
+		regret(&nod, r, 0, 0);
 		cgen(r, &nod);
 
 		regsalloc(&nod1, r);
@@ -107,7 +107,7 @@
 		if(l->addable >= INDEXED && l->complex < FNX) {
 			if(nn != Z || r->addable < INDEXED) {
 				if(r->complex >= FNX && nn == Z)
-					regret(&nod, r);
+					regret(&nod, r, 0, 0);
 				else
 					regalloc(&nod, r, nn);
 				cgen(r, &nod);
@@ -348,7 +348,7 @@
 			if(l->op != OIND)
 				diag(n, "bad function call");
 
-			regret(&nod, l->left);
+			regret(&nod, l->left, 0, 0);
 			cgen(l->left, &nod);
 			regsalloc(&nod1, l->left);
 			gopcode(OAS, &nod, Z, &nod1);
@@ -377,11 +377,11 @@
 		if(REGARG >= 0)
 			if(o != reg[REGARG])
 				reg[REGARG]--;
-		if(nn != Z) {
-			regret(&nod, n);
-			gopcode(OAS, &nod, Z, nn);
+		regret(&nod, n, l->type, 1);
+		if(nn != Z)
+			gmove(&nod, nn);
+		if(nod.op == OREGISTER)
 			regfree(&nod);
-		}
 		break;
 
 	case OIND:
@@ -823,7 +823,7 @@
 		if(true)
 			o = comrel[relindex(o)];
 		if(l->complex >= FNX && r->complex >= FNX) {
-			regret(&nod, r);
+			regret(&nod, r, 0, 0);
 			cgenrel(r, &nod);
 			regsalloc(&nod1, r);
 			gopcode(OAS, &nod, Z, &nod1);
@@ -957,7 +957,7 @@
 		if(nn != Z && side(nn)) {
 			nod1 = *n;
 			nod1.type = typ(TIND, n->type);
-			regret(&nod2, &nod1);
+			regret(&nod2, &nod1, 0, 0);
 			lcgen(nn, &nod2);
 			regsalloc(&nod0, &nod1);
 			gopcode(OAS, &nod2, Z, &nod0);
@@ -1036,6 +1036,20 @@
 		break;
 
 	case OFUNC:
+		if(!hasdotdotdot(n->left->type)) {
+			cgen(n, Z);
+			if(nn != Z) {
+				curarg -= n->type->width;
+				regret(&nod1, n, n->left->type, 1);
+				if(nn->complex >= FNX) {
+					regsalloc(&nod2, n);
+					cgen(&nod1, &nod2);
+					nod1 = nod2;
+				}
+				cgen(&nod1, nn);
+			}
+			break;
+		}
 		if(nn == Z) {
 			sugen(n, nodrat, w);
 			break;
diff --git a/src/cmd/5c/gc.h b/src/cmd/5c/gc.h
index 166900c..7417b7d 100644
--- a/src/cmd/5c/gc.h
+++ b/src/cmd/5c/gc.h
@@ -210,7 +210,7 @@
 void	xcom(Node*);
 int	bcomplex(Node*, Node*);
 Prog*	gtext(Sym*, int32);
-vlong	argsize(void);
+vlong	argsize(int);
 
 /*
  * cgen.c
@@ -236,7 +236,7 @@
 Node*	nod32const(vlong);
 Node*	nodfconst(double);
 void	nodreg(Node*, Node*, int);
-void	regret(Node*, Node*);
+void	regret(Node*, Node*, Type*, int);
 int	tmpreg(void);
 void	regalloc(Node*, Node*, Node*);
 void	regfree(Node*);
diff --git a/src/cmd/5c/sgen.c b/src/cmd/5c/sgen.c
index efcc043..a36612c 100644
--- a/src/cmd/5c/sgen.c
+++ b/src/cmd/5c/sgen.c
@@ -36,7 +36,7 @@
 {
 	int32 a;
 
-	a = argsize();
+	a = argsize(1);
 	if((textflag & NOSPLIT) != 0 && stkoff >= 128)
 		yyerror("stack frame too large for NOSPLIT function");
 
diff --git a/src/cmd/5c/txt.c b/src/cmd/5c/txt.c
index a753510..af40220 100644
--- a/src/cmd/5c/txt.c
+++ b/src/cmd/5c/txt.c
@@ -274,15 +274,43 @@
 }
 
 void
-regret(Node *n, Node *nn)
+regret(Node *n, Node *nn, Type *t, int mode)
 {
 	int r;
 
-	r = REGRET;
-	if(typefd[nn->type->etype])
-		r = FREGRET+NREG;
-	nodreg(n, nn, r);
-	reg[r]++;
+	if(mode == 0 || hasdotdotdot(t) || nn->type->width == 0) {
+		r = REGRET;
+		if(typefd[nn->type->etype])
+			r = FREGRET+NREG;
+		nodreg(n, nn, r);
+		reg[r]++;
+		return;
+	}
+	
+	if(mode == 1) {
+		// fetch returned value after call.
+		// already called gargs, so curarg is set.
+		curarg = (curarg+3) & ~3;
+		regaalloc(n, nn);
+		return;
+	}
+	
+	if(mode == 2) {
+		// store value to be returned.
+		// must compute arg offset.
+		if(t->etype != TFUNC)
+			fatal(Z, "bad regret func %T", t);
+		*n = *nn;
+		n->op = ONAME;
+		n->class = CPARAM;
+		n->sym = slookup(".ret");
+		n->complex = nodret->complex;
+		n->xoffset = argsize(0);
+		n->addable = 20;
+		return;
+	}
+	
+	fatal(Z, "bad regret");
 }
 
 int
diff --git a/src/cmd/6c/cgen.c b/src/cmd/6c/cgen.c
index bdef76f..b66c6ad 100644
--- a/src/cmd/6c/cgen.c
+++ b/src/cmd/6c/cgen.c
@@ -51,7 +51,7 @@
 	}
 	if(n == Z || n->type == T)
 		return;
-	if(typesu[n->type->etype]) {
+	if(typesu[n->type->etype] && (n->op != OFUNC || nn != Z)) {
 		sugen(n, nn, n->type->width);
 		return;
 	}
@@ -88,7 +88,7 @@
 		if(cond(o) && typesu[l->type->etype])
 			break;
 
-		regret(&nod, r);
+		regret(&nod, r, 0, 0);
 		cgen(r, &nod);
 
 		regsalloc(&nod1, r);
@@ -135,7 +135,7 @@
 		if(!hardleft) {
 			if(nn != Z || r->addable < INDEXED || hardconst(r)) {
 				if(r->complex >= FNX && nn == Z)
-					regret(&nod, r);
+					regret(&nod, r, 0, 0);
 				else
 					regalloc(&nod, r, nn);
 				cgen(r, &nod);
@@ -929,7 +929,7 @@
 			if(l->op != OIND)
 				diag(n, "bad function call");
 
-			regret(&nod, l->left);
+			regret(&nod, l->left, 0, 0);
 			cgen(l->left, &nod);
 			regsalloc(&nod1, l->left);
 			gmove(&nod, &nod1);
@@ -956,11 +956,13 @@
 		gpcdata(PCDATA_ArgSize, -1);
 		if(REGARG >= 0 && reg[REGARG])
 			reg[REGARG]--;
-		if(nn != Z) {
-			regret(&nod, n);
+		regret(&nod, n, l->type, 1); // update maxarg if nothing else
+		gpcdata(PCDATA_ArgSize, curarg);
+		gpcdata(PCDATA_ArgSize, -1);
+		if(nn != Z)
 			gmove(&nod, nn);
+		if(nod.op == OREGISTER)
 			regfree(&nod);
-		}
 		break;
 
 	case OIND:
@@ -1382,7 +1384,7 @@
 		if(true)
 			o = comrel[relindex(o)];
 		if(l->complex >= FNX && r->complex >= FNX) {
-			regret(&nod, r);
+			regret(&nod, r, 0, 0);
 			cgen(r, &nod);
 			regsalloc(&nod1, r);
 			gmove(&nod, &nod1);
@@ -1535,7 +1537,7 @@
 		if(nn != Z && side(nn)) {
 			nod1 = *n;
 			nod1.type = typ(TIND, n->type);
-			regret(&nod2, &nod1);
+			regret(&nod2, &nod1, 0, 0);
 			lcgen(nn, &nod2);
 			regsalloc(&nod0, &nod1);
 			cgen(&nod2, &nod0);
@@ -1617,6 +1619,20 @@
 		break;
 
 	case OFUNC:
+		if(!hasdotdotdot(n->left->type)) {
+			cgen(n, Z);
+			if(nn != Z) {
+				curarg -= n->type->width;
+				regret(&nod1, n, n->left->type, 1);
+				if(nn->complex >= FNX) {
+					regsalloc(&nod2, n);
+					cgen(&nod1, &nod2);
+					nod1 = nod2;
+				}
+				cgen(&nod1, nn);
+			}
+			break;
+		}
 		if(nn == Z) {
 			sugen(n, nodrat, w);
 			break;
diff --git a/src/cmd/6c/gc.h b/src/cmd/6c/gc.h
index bc4e36c..aa9d95d 100644
--- a/src/cmd/6c/gc.h
+++ b/src/cmd/6c/gc.h
@@ -210,7 +210,7 @@
 void	indx(Node*);
 int	bcomplex(Node*, Node*);
 Prog*	gtext(Sym*, int32);
-vlong	argsize(void);
+vlong	argsize(int);
 
 /*
  * cgen.c
@@ -239,7 +239,7 @@
 Node*	nodgconst(vlong, Type*);
 int	nodreg(Node*, Node*, int);
 int	isreg(Node*, int);
-void	regret(Node*, Node*);
+void	regret(Node*, Node*, Type*, int);
 void	regalloc(Node*, Node*, Node*);
 void	regfree(Node*);
 void	regialloc(Node*, Node*, Node*);
diff --git a/src/cmd/6c/sgen.c b/src/cmd/6c/sgen.c
index c048e78..d995101 100644
--- a/src/cmd/6c/sgen.c
+++ b/src/cmd/6c/sgen.c
@@ -36,7 +36,7 @@
 {
 	vlong v;
 
-	v = ((uvlong)argsize() << 32) | (stkoff & 0xffffffff);
+	v = ((uvlong)argsize(1) << 32) | (stkoff & 0xffffffff);
 	if((textflag & NOSPLIT) && stkoff >= 128)
 		yyerror("stack frame too large for NOSPLIT function");
 
diff --git a/src/cmd/6c/txt.c b/src/cmd/6c/txt.c
index 4d07436..3bdbf41 100644
--- a/src/cmd/6c/txt.c
+++ b/src/cmd/6c/txt.c
@@ -351,15 +351,43 @@
 }
 
 void
-regret(Node *n, Node *nn)
+regret(Node *n, Node *nn, Type *t, int mode)
 {
 	int r;
+	
+	if(mode == 0 || hasdotdotdot(t) || nn->type->width == 0) {
+		r = REGRET;
+		if(typefd[nn->type->etype])
+			r = FREGRET;
+		nodreg(n, nn, r);
+		reg[r]++;
+		return;
+	}
+	
+	if(mode == 1) {
+		// fetch returned value after call.
+		// already called gargs, so curarg is set.
+		curarg = (curarg+7) & ~7;
+		regaalloc(n, nn);
+		return;
+	}
 
-	r = REGRET;
-	if(typefd[nn->type->etype])
-		r = FREGRET;
-	nodreg(n, nn, r);
-	reg[r]++;
+	if(mode == 2) {
+		// store value to be returned.
+		// must compute arg offset.
+		if(t->etype != TFUNC)
+			fatal(Z, "bad regret func %T", t);
+		*n = *nn;
+		n->op = ONAME;
+		n->class = CPARAM;
+		n->sym = slookup(".ret");
+		n->complex = nodret->complex;
+		n->addable = 20;
+		n->xoffset = argsize(0);
+		return;
+	}
+	
+	fatal(Z, "bad regret");	
 }
 
 void
diff --git a/src/cmd/8c/cgen.c b/src/cmd/8c/cgen.c
index f541022..8ac8e36 100644
--- a/src/cmd/8c/cgen.c
+++ b/src/cmd/8c/cgen.c
@@ -49,7 +49,7 @@
 	}
 	if(n == Z || n->type == T)
 		return;
-	if(typesuv[n->type->etype]) {
+	if(typesuv[n->type->etype] && (n->op != OFUNC || nn != Z)) {
 		sugen(n, nn, n->type->width);
 		return;
 	}
@@ -86,7 +86,7 @@
 		if(cond(o) && typesuv[l->type->etype])
 			break;
 
-		regret(&nod, r);
+		regret(&nod, r, 0, 0);
 		cgen(r, &nod);
 
 		regsalloc(&nod1, r);
@@ -147,7 +147,7 @@
 		if(!hardleft) {
 			if(nn != Z || r->addable < INDEXED) {
 				if(r->complex >= FNX && nn == Z)
-					regret(&nod, r);
+					regret(&nod, r, 0, 0);
 				else
 					regalloc(&nod, r, nn);
 				cgen(r, &nod);
@@ -922,7 +922,7 @@
 			if(l->op != OIND)
 				diag(n, "bad function call");
 
-			regret(&nod, l->left);
+			regret(&nod, l->left, 0, 0);
 			cgen(l->left, &nod);
 			regsalloc(&nod1, l->left);
 			gmove(&nod, &nod1);
@@ -949,12 +949,12 @@
 		gpcdata(PCDATA_ArgSize, -1);
 		if(REGARG >= 0 && reg[REGARG])
 			reg[REGARG]--;
-		if(nn != Z) {
-			regret(&nod, n);
+		regret(&nod, n, l->type, 1); // update maxarg if nothing else
+		if(nn != Z)
 			gmove(&nod, nn);
+		if(nod.op == OREGISTER)
 			regfree(&nod);
-		} else
-		if(typefd[n->type->etype])
+		if(nn == Z && hasdotdotdot(l->type) && typefd[n->type->etype])
 			gins(AFMOVDP, &fregnode0, &fregnode0);
 		break;
 
@@ -1374,7 +1374,7 @@
 		if(true)
 			o = comrel[relindex(o)];
 		if(l->complex >= FNX && r->complex >= FNX) {
-			regret(&nod, r);
+			regret(&nod, r, 0, 0);
 			cgen(r, &nod);
 			regsalloc(&nod1, r);
 			gmove(&nod, &nod1);
@@ -1567,7 +1567,7 @@
 		if(nn != Z && side(nn)) {
 			nod1 = *n;
 			nod1.type = typ(TIND, n->type);
-			regret(&nod2, &nod1);
+			regret(&nod2, &nod1, 0, 0);
 			lcgen(nn, &nod2);
 			regsalloc(&nod0, &nod1);
 			cgen(&nod2, &nod0);
@@ -1649,6 +1649,20 @@
 		break;
 
 	case OFUNC:
+		if(!hasdotdotdot(n->left->type)) {
+			cgen(n, Z);
+			if(nn != Z) {
+				curarg -= n->type->width;
+				regret(&nod1, n, n->left->type, 1);
+				if(nn->complex >= FNX) {
+					regsalloc(&nod2, n);
+					cgen(&nod1, &nod2);
+					nod1 = nod2;
+				}
+				cgen(&nod1, nn);
+			}
+			break;
+		}
 		if(nn == Z) {
 			sugen(n, nodrat, w);
 			break;
diff --git a/src/cmd/8c/gc.h b/src/cmd/8c/gc.h
index 9c4613f..aa3888d 100644
--- a/src/cmd/8c/gc.h
+++ b/src/cmd/8c/gc.h
@@ -210,7 +210,7 @@
 void	indx(Node*);
 int	bcomplex(Node*, Node*);
 Prog*	gtext(Sym*, int32);
-vlong	argsize(void);
+vlong	argsize(int);
 
 /*
  * cgen.c
@@ -244,7 +244,7 @@
 Node*	nodfconst(double);
 int	nodreg(Node*, Node*, int);
 int	isreg(Node*, int);
-void	regret(Node*, Node*);
+void	regret(Node*, Node*, Type*, int);
 void	regalloc(Node*, Node*, Node*);
 void	regfree(Node*);
 void	regialloc(Node*, Node*, Node*);
diff --git a/src/cmd/8c/sgen.c b/src/cmd/8c/sgen.c
index 069bbc1..d647010 100644
--- a/src/cmd/8c/sgen.c
+++ b/src/cmd/8c/sgen.c
@@ -35,7 +35,7 @@
 {
 	int32 a;
 
-	a = argsize();
+	a = argsize(1);
 	if((textflag & NOSPLIT) != 0 && stkoff >= 128)
 		yyerror("stack frame too large for NOSPLIT function");
 
diff --git a/src/cmd/8c/txt.c b/src/cmd/8c/txt.c
index 25082de..7f87a0a0 100644
--- a/src/cmd/8c/txt.c
+++ b/src/cmd/8c/txt.c
@@ -311,15 +311,43 @@
 }
 
 void
-regret(Node *n, Node *nn)
+regret(Node *n, Node *nn, Type *t, int mode)
 {
 	int r;
 
-	r = REGRET;
-	if(typefd[nn->type->etype])
-		r = FREGRET;
-	nodreg(n, nn, r);
-	reg[r]++;
+	if(mode == 0 || hasdotdotdot(t) || nn->type->width == 0) {
+		r = REGRET;
+		if(typefd[nn->type->etype])
+			r = FREGRET;
+		nodreg(n, nn, r);
+		reg[r]++;
+		return;
+	}
+	
+	if(mode == 1) {
+		// fetch returned value after call.
+		// already called gargs, so curarg is set.
+		curarg = (curarg+3) & ~3;
+		regaalloc(n, nn);
+		return;
+	}
+	
+	if(mode == 2) {
+		// store value to be returned.
+		// must compute arg offset.
+		if(t->etype != TFUNC)
+			fatal(Z, "bad regret func %T", t);
+		*n = *nn;
+		n->op = ONAME;
+		n->class = CPARAM;
+		n->sym = slookup(".retx");
+		n->complex = 0;
+		n->addable = 20;
+		n->xoffset = argsize(0);
+		return;
+	}
+	
+	fatal(Z, "bad regret");
 }
 
 void
diff --git a/src/cmd/api/goapi.go b/src/cmd/api/goapi.go
index 54c84b4..c3ab9c5 100644
--- a/src/cmd/api/goapi.go
+++ b/src/cmd/api/goapi.go
@@ -385,6 +385,7 @@
 			" mcache struct{}; bucket struct{}; sudog struct{}; g struct{};" +
 			" hchan struct{}; chantype struct{}; waitq struct{};" +
 			" note struct{}; wincallbackcontext struct{};" +
+			" gobuf struct{}; funcval struct{};" +
 			"); " +
 			"const ( cb_max = 2000 )"
 		f, err = parser.ParseFile(fset, filename, src, 0)
diff --git a/src/cmd/cc/cc.h b/src/cmd/cc/cc.h
index c8aac12..1dae5ac 100644
--- a/src/cmd/cc/cc.h
+++ b/src/cmd/cc/cc.h
@@ -794,7 +794,7 @@
 int32	exreg(Type*);
 int32	align(int32, Type*, int, int32*);
 int32	maxround(int32, int32);
-int	hasdotdotdot(void);
+int	hasdotdotdot(Type*);
 void    linkarchinit(void);
 
 extern	schar	ewidth[];
diff --git a/src/cmd/cc/dcl.c b/src/cmd/cc/dcl.c
index 051a6c0..7cda9f9 100644
--- a/src/cmd/cc/dcl.c
+++ b/src/cmd/cc/dcl.c
@@ -697,7 +697,8 @@
 {
 	Type *t;
 
-	autoffset = align(0, thisfn->link, Aarg0, nil);
+	if(hasdotdotdot(thisfn->link))
+		autoffset = align(0, thisfn->link, Aarg0, nil);
 	stkoff = 0;
 	for(; n->left != Z; n = n->left) {
 		if(n->op != OFUNC || n->left->op != ONAME)
diff --git a/src/cmd/cc/pgen.c b/src/cmd/cc/pgen.c
index 0ee1378..53410a1 100644
--- a/src/cmd/cc/pgen.c
+++ b/src/cmd/cc/pgen.c
@@ -56,24 +56,24 @@
 }
 
 int
-hasdotdotdot(void)
+hasdotdotdot(Type *t)
 {
-	Type *t;
-
-	for(t=thisfn->down; t!=T; t=t->down)
+	for(t=t->down; t!=T; t=t->down)
 		if(t->etype == TDOT)
 			return 1;
 	return 0;
 }
 
 vlong
-argsize(void)
+argsize(int doret)
 {
 	Type *t;
 	int32 s;
 
 //print("t=%T\n", thisfn);
-	s = align(0, thisfn->link, Aarg0, nil);
+	s = 0;
+	if(hasdotdotdot(thisfn))
+		s = align(s, thisfn->link, Aarg0, nil);
 	for(t=thisfn->down; t!=T; t=t->down) {
 		switch(t->etype) {
 		case TVOID:
@@ -93,6 +93,14 @@
 		s = (s+7) & ~7;
 	else
 		s = (s+3) & ~3;
+	if(doret && thisfn->link->etype != TVOID) {
+		s = align(s, thisfn->link, Aarg1, nil);
+		s = align(s, thisfn->link, Aarg2, nil);
+		if(thechar == '6')
+			s = (s+7) & ~7;
+		else
+			s = (s+3) & ~3;
+	}
 	return s;
 }
 
@@ -129,7 +137,7 @@
 	 * generate funcdata symbol for this function.
 	 * data is filled in at the end of codgen().
 	 */
-	isvarargs = hasdotdotdot();
+	isvarargs = hasdotdotdot(thisfn);
 	gcargs = nil;
 	if(!isvarargs)
 		gcargs = makefuncdatasym("gcargs·%d", FUNCDATA_ArgsPointerMaps);
@@ -212,7 +220,7 @@
 void
 gen(Node *n)
 {
-	Node *l, nod;
+	Node *l, nod, nod1;
 	Prog *sp, *spc, *spb;
 	Case *cn;
 	long sbc, scc;
@@ -273,14 +281,26 @@
 			gbranch(ORETURN);
 			break;
 		}
+		if(typecmplx[n->type->etype] && !hasdotdotdot(thisfn)) {
+			regret(&nod, n, thisfn, 2);
+			sugen(l, &nod, n->type->width);
+			noretval(3);
+			gbranch(ORETURN);
+			break;
+		}
 		if(typecmplx[n->type->etype]) {
 			sugen(l, nodret, n->type->width);
 			noretval(3);
 			gbranch(ORETURN);
 			break;
 		}
-		regret(&nod, n);
+		regret(&nod1, n, thisfn, 2);
+		nod = nod1;
+		if(nod.op != OREGISTER)
+			regalloc(&nod, n, Z);
 		cgen(l, &nod);
+		if(nod1.op != OREGISTER)
+			gmove(&nod, &nod1);
 		regfree(&nod);
 		if(typefd[n->type->etype])
 			noretval(1);
@@ -729,9 +749,11 @@
 	symoffset = 0;
 	gextern(sym, nodconst(1), symoffset, 4);
 	symoffset += 4;
-	argbytes = (argsize() + ewidth[TIND] - 1);
+	argbytes = (argsize(1) + ewidth[TIND] - 1);
 	bv = bvalloc((argbytes  / ewidth[TIND]) * BitsPerPointer);
-	argoffset = align(0, fn->link, Aarg0, nil);
+	argoffset = 0;
+	if(hasdotdotdot(thisfn))
+		argoffset = align(0, fn->link, Aarg0, nil);
 	if(argoffset > 0) {
 		// The C calling convention returns structs by copying them to a
 		// location pointed to by a hidden first argument.  This first
diff --git a/src/pkg/runtime/alg.go b/src/pkg/runtime/alg.go
index ff296b6..be6eaac 100644
--- a/src/pkg/runtime/alg.go
+++ b/src/pkg/runtime/alg.go
@@ -43,6 +43,9 @@
 
 // in asm_*.s
 func aeshash(p unsafe.Pointer, s, h uintptr) uintptr
+func aeshash32(p unsafe.Pointer, s, h uintptr) uintptr
+func aeshash64(p unsafe.Pointer, s, h uintptr) uintptr
+func aeshashstr(p unsafe.Pointer, s, h uintptr) uintptr
 
 func memhash(p unsafe.Pointer, s, h uintptr) uintptr {
 	if !nacl && use_aeshash {
diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s
index 2163c91..d52eca3 100644
--- a/src/pkg/runtime/asm_386.s
+++ b/src/pkg/runtime/asm_386.s
@@ -134,8 +134,8 @@
 // void gosave(Gobuf*)
 // save state in Gobuf; setjmp
 TEXT runtime·gosave(SB), NOSPLIT, $0-4
-	MOVL	4(SP), AX		// gobuf
-	LEAL	4(SP), BX		// caller's SP
+	MOVL	buf+0(FP), AX		// gobuf
+	LEAL	buf+0(FP), BX		// caller's SP
 	MOVL	BX, gobuf_sp(AX)
 	MOVL	0(SP), BX		// caller's PC
 	MOVL	BX, gobuf_pc(AX)
@@ -149,7 +149,7 @@
 // void gogo(Gobuf*)
 // restore state from Gobuf; longjmp
 TEXT runtime·gogo(SB), NOSPLIT, $0-4
-	MOVL	4(SP), BX		// gobuf
+	MOVL	buf+0(FP), BX		// gobuf
 	MOVL	gobuf_g(BX), DX
 	MOVL	0(DX), CX		// make sure g != nil
 	get_tls(CX)
@@ -174,7 +174,7 @@
 	MOVL	g(CX), AX	// save state in g->sched
 	MOVL	0(SP), BX	// caller's PC
 	MOVL	BX, (g_sched+gobuf_pc)(AX)
-	LEAL	4(SP), BX	// caller's SP
+	LEAL	fn+0(FP), BX	// caller's SP
 	MOVL	BX, (g_sched+gobuf_sp)(AX)
 	MOVL	AX, (g_sched+gobuf_g)(AX)
 
@@ -318,7 +318,7 @@
 	// restore when returning from f.
 	MOVL	0(SP), AX	// our caller's PC
 	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
-	LEAL	4(SP), AX	// our caller's SP
+	LEAL	fv+0(FP), AX	// our caller's SP
 	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
 	MOVL	g(CX), AX
 	MOVL	AX, (m_morebuf+gobuf_g)(BX)
@@ -334,9 +334,9 @@
 	// If it turns out that f needs a larger frame than
 	// the default stack, f's usual stack growth prolog will
 	// allocate a new segment (and recopy the arguments).
-	MOVL	4(SP), AX	// fn
-	MOVL	8(SP), DX	// arg frame
-	MOVL	12(SP), CX	// arg size
+	MOVL	fv+0(FP), AX	// fn
+	MOVL	addr+4(FP), DX	// arg frame
+	MOVL	size+8(FP), CX	// arg size
 
 	MOVL	AX, m_cret(BX)	// f's PC
 	MOVL	DX, m_moreargp(BX)	// f's argument pointer
@@ -481,7 +481,6 @@
 	MOVL	$0, 0x1004	// crash if oldstack returns
 	RET
 
-
 // bool cas(int32 *val, int32 old, int32 new)
 // Atomically:
 //	if(*val == old){
@@ -489,16 +488,18 @@
 //		return 1;
 //	}else
 //		return 0;
-TEXT runtime·cas(SB), NOSPLIT, $0-12
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
-	MOVL	12(SP), CX
+TEXT runtime·cas(SB), NOSPLIT, $0-13
+	MOVL	ptr+0(FP), BX
+	MOVL	old+4(FP), AX
+	MOVL	new+8(FP), CX
 	LOCK
 	CMPXCHGL	CX, 0(BX)
-	JZ 3(PC)
+	JZ 4(PC)
 	MOVL	$0, AX
+	MOVB	AX, ret+12(FP)
 	RET
 	MOVL	$1, AX
+	MOVB	AX, ret+12(FP)
 	RET
 
 // bool runtime·cas64(uint64 *val, uint64 old, uint64 new)
@@ -509,19 +510,21 @@
 //	} else {
 //		return 0;
 //	}
-TEXT runtime·cas64(SB), NOSPLIT, $0-20
-	MOVL	4(SP), BP
-	MOVL	8(SP), AX
-	MOVL	12(SP), DX
-	MOVL	16(SP), BX
-	MOVL	20(SP), CX
+TEXT runtime·cas64(SB), NOSPLIT, $0-21
+	MOVL	ptr+0(FP), BP
+	MOVL	old_lo+4(FP), AX
+	MOVL	old_hi+8(FP), DX
+	MOVL	new_lo+12(FP), BX
+	MOVL	new_hi+16(FP), CX
 	LOCK
 	CMPXCHG8B	0(BP)
 	JNZ	cas64_fail
 	MOVL	$1, AX
+	MOVB	AX, ret+20(FP)
 	RET
 cas64_fail:
 	MOVL	$0, AX
+	MOVB	AX, ret+20(FP)
 	RET
 
 // bool casp(void **p, void *old, void *new)
@@ -531,45 +534,50 @@
 //		return 1;
 //	}else
 //		return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-12
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
-	MOVL	12(SP), CX
+TEXT runtime·casp(SB), NOSPLIT, $0-13
+	MOVL	ptr+0(FP), BX
+	MOVL	old+4(FP), AX
+	MOVL	new+8(FP), CX
 	LOCK
 	CMPXCHGL	CX, 0(BX)
-	JZ 3(PC)
+	JZ 4(PC)
 	MOVL	$0, AX
+	MOVB	AX, ret+12(FP)
 	RET
 	MOVL	$1, AX
+	MOVB	AX, ret+12(FP)
 	RET
 
 // uint32 xadd(uint32 volatile *val, int32 delta)
 // Atomically:
 //	*val += delta;
 //	return *val;
-TEXT runtime·xadd(SB), NOSPLIT, $0-8
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
+TEXT runtime·xadd(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	delta+4(FP), AX
 	MOVL	AX, CX
 	LOCK
 	XADDL	AX, 0(BX)
 	ADDL	CX, AX
+	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·xchg(SB), NOSPLIT, $0-8
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
+TEXT runtime·xchg(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	new+4(FP), AX
 	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·xchgp(SB), NOSPLIT, $0-8
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
+TEXT runtime·xchgp(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
+	MOVL	new+4(FP), AX
 	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·procyield(SB),NOSPLIT,$0-0
-	MOVL	4(SP), AX
+	MOVL	cycles+0(FP), AX
 again:
 	PAUSE
 	SUBL	$1, AX
@@ -577,23 +585,21 @@
 	RET
 
 TEXT runtime·atomicstorep(SB), NOSPLIT, $0-8
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
+	MOVL	ptr+0(FP), BX
+	MOVL	val+4(FP), AX
 	XCHGL	AX, 0(BX)
 	RET
 
 TEXT runtime·atomicstore(SB), NOSPLIT, $0-8
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
+	MOVL	ptr+0(FP), BX
+	MOVL	val+4(FP), AX
 	XCHGL	AX, 0(BX)
 	RET
 
 // uint64 atomicload64(uint64 volatile* addr);
-// so actually
-// void atomicload64(uint64 *res, uint64 volatile *addr);
-TEXT runtime·atomicload64(SB), NOSPLIT, $0-8
-	MOVL	4(SP), BX
-	MOVL	8(SP), AX
+TEXT runtime·atomicload64(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), AX
+	LEAL	ret_lo+4(FP), BX
 	// MOVQ (%EAX), %MM0
 	BYTE $0x0f; BYTE $0x6f; BYTE $0x00
 	// MOVQ %MM0, 0(%EBX)
@@ -604,7 +610,7 @@
 
 // void runtime·atomicstore64(uint64 volatile* addr, uint64 v);
 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-12
-	MOVL	4(SP), AX
+	MOVL	ptr+0(FP), AX
 	// MOVQ and EMMS were introduced on the Pentium MMX.
 	// MOVQ 0x8(%ESP), %MM0
 	BYTE $0x0f; BYTE $0x6f; BYTE $0x44; BYTE $0x24; BYTE $0x08
@@ -620,7 +626,7 @@
 	RET
 
 // void	runtime·atomicor8(byte volatile*, byte);
-TEXT runtime·atomicor8(SB), NOSPLIT, $0-8
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
 	MOVL	ptr+0(FP), AX
 	MOVB	val+4(FP), BX
 	LOCK
@@ -633,8 +639,8 @@
 // 2. sub 5 bytes from the callers return
 // 3. jmp to the argument
 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
-	MOVL	4(SP), DX	// fn
-	MOVL	8(SP), BX	// caller sp
+	MOVL	fv+0(FP), DX	// fn
+	MOVL	argp+4(FP), BX	// caller sp
 	LEAL	-4(BX), SP	// caller sp after CALL
 	SUBL	$5, (SP)	// return to CALL again
 	MOVL	0(DX), BX
@@ -804,7 +810,7 @@
 	RET
 
 // void setg(G*); set g. for use by needm.
-TEXT runtime·setg(SB), NOSPLIT, $0-8
+TEXT runtime·setg(SB), NOSPLIT, $0-4
 	MOVL	gg+0(FP), BX
 #ifdef GOOS_windows
 	CMPL	BX, $0
@@ -839,9 +845,10 @@
 	INT	$3
 	RET
 
-TEXT runtime·getcallerpc(SB),NOSPLIT,$0-4
-	MOVL	x+0(FP),AX		// addr of first arg
+TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
+	MOVL	argp+0(FP),AX		// addr of first arg
 	MOVL	-4(AX),AX		// get calling pc
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
@@ -851,13 +858,14 @@
 	RET
 
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
-	MOVL	x+0(FP),AX		// addr of first arg
-	MOVL	x+4(FP), BX
+	MOVL	argp+0(FP),AX		// addr of first arg
+	MOVL	pc+4(FP), BX
 	MOVL	BX, -4(AX)		// set calling pc
 	RET
 
-TEXT runtime·getcallersp(SB), NOSPLIT, $0-4
-	MOVL	sp+0(FP), AX
+TEXT runtime·getcallersp(SB), NOSPLIT, $0-8
+	MOVL	argp+0(FP), AX
+	MOVL	AX, ret+4(FP)
 	RET
 
 // func gogetcallersp(p unsafe.Pointer) uintptr
@@ -868,11 +876,10 @@
 
 // int64 runtime·cputicks(void), so really
 // void runtime·cputicks(int64 *ticks)
-TEXT runtime·cputicks(SB),NOSPLIT,$0-4
+TEXT runtime·cputicks(SB),NOSPLIT,$0-8
 	RDTSC
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
@@ -976,7 +983,7 @@
 	AESENC	X2, X0
 	AESENC	X3, X0
 	AESENC	X2, X0
-	MOVL	X0, res+12(FP)
+	MOVL	X0, ret+12(FP)
 	RET
 
 TEXT runtime·aeshash32(SB),NOSPLIT,$0-16
@@ -987,7 +994,7 @@
 	AESENC	runtime·aeskeysched+0(SB), X0
 	AESENC	runtime·aeskeysched+16(SB), X0
 	AESENC	runtime·aeskeysched+0(SB), X0
-	MOVL	X0, res+12(FP)
+	MOVL	X0, ret+12(FP)
 	RET
 
 TEXT runtime·aeshash64(SB),NOSPLIT,$0-16
@@ -998,7 +1005,7 @@
 	AESENC	runtime·aeskeysched+0(SB), X0
 	AESENC	runtime·aeskeysched+16(SB), X0
 	AESENC	runtime·aeskeysched+0(SB), X0
-	MOVL	X0, res+12(FP)
+	MOVL	X0, ret+12(FP)
 	RET
 
 // simple mask to get rid of data in the high part of the register.
@@ -1309,12 +1316,12 @@
 	RET
 
 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
-	MOVL	s1+0(FP), SI
-	MOVL	s1+4(FP), BX
-	MOVL	s2+8(FP), DI
-	MOVL	s2+12(FP), DX
+	MOVL	s1_base+0(FP), SI
+	MOVL	s1_len+4(FP), BX
+	MOVL	s2_base+8(FP), DI
+	MOVL	s2_len+12(FP), DX
 	CALL	runtime·cmpbody(SB)
-	MOVL	AX, res+16(FP)
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT bytes·Compare(SB),NOSPLIT,$0-28
@@ -1323,7 +1330,7 @@
 	MOVL	s2+12(FP), DI
 	MOVL	s2+16(FP), DX
 	CALL	runtime·cmpbody(SB)
-	MOVL	AX, res+24(FP)
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT bytes·IndexByte(SB),NOSPLIT,$0
diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s
index c53e2d3..70e2225 100644
--- a/src/pkg/runtime/asm_amd64.s
+++ b/src/pkg/runtime/asm_amd64.s
@@ -125,8 +125,8 @@
 // void gosave(Gobuf*)
 // save state in Gobuf; setjmp
 TEXT runtime·gosave(SB), NOSPLIT, $0-8
-	MOVQ	8(SP), AX		// gobuf
-	LEAQ	8(SP), BX		// caller's SP
+	MOVQ	buf+0(FP), AX		// gobuf
+	LEAQ	buf+0(FP), BX		// caller's SP
 	MOVQ	BX, gobuf_sp(AX)
 	MOVQ	0(SP), BX		// caller's PC
 	MOVQ	BX, gobuf_pc(AX)
@@ -140,7 +140,7 @@
 // void gogo(Gobuf*)
 // restore state from Gobuf; longjmp
 TEXT runtime·gogo(SB), NOSPLIT, $0-8
-	MOVQ	8(SP), BX		// gobuf
+	MOVQ	buf+0(FP), BX		// gobuf
 	MOVQ	gobuf_g(BX), DX
 	MOVQ	0(DX), CX		// make sure g != nil
 	get_tls(CX)
@@ -165,7 +165,7 @@
 	MOVQ	g(CX), AX	// save state in g->sched
 	MOVQ	0(SP), BX	// caller's PC
 	MOVQ	BX, (g_sched+gobuf_pc)(AX)
-	LEAQ	8(SP), BX	// caller's SP
+	LEAQ	fn+0(FP), BX	// caller's SP
 	MOVQ	BX, (g_sched+gobuf_sp)(AX)
 	MOVQ	AX, (g_sched+gobuf_g)(AX)
 
@@ -297,7 +297,7 @@
 	// restore when returning from f.
 	MOVQ	0(SP), AX	// our caller's PC
 	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
-	LEAQ	8(SP), AX	// our caller's SP
+	LEAQ	fv+0(FP), AX	// our caller's SP
 	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
 	MOVQ	g(CX), AX
 	MOVQ	AX, (m_morebuf+gobuf_g)(BX)
@@ -314,9 +314,9 @@
 	// If it turns out that f needs a larger frame than
 	// the default stack, f's usual stack growth prolog will
 	// allocate a new segment (and recopy the arguments).
-	MOVQ	8(SP), AX	// fn
-	MOVQ	16(SP), DX	// arg frame
-	MOVL	24(SP), CX	// arg size
+	MOVQ	fv+0(FP), AX	// fn
+	MOVQ	addr+8(FP), DX	// arg frame
+	MOVL	size+16(FP), CX	// arg size
 
 	MOVQ	AX, m_cret(BX)	// f's PC
 	MOVQ	DX, m_moreargp(BX)	// argument frame pointer
@@ -584,16 +584,18 @@
 //		return 1;
 //	} else
 //		return 0;
-TEXT runtime·cas(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), BX
-	MOVL	16(SP), AX
-	MOVL	20(SP), CX
+TEXT runtime·cas(SB), NOSPLIT, $0-17
+	MOVQ	ptr+0(FP), BX
+	MOVL	old+8(FP), AX
+	MOVL	new+12(FP), CX
 	LOCK
 	CMPXCHGL	CX, 0(BX)
-	JZ 3(PC)
+	JZ 4(PC)
 	MOVL	$0, AX
+	MOVB	AX, ret+16(FP)
 	RET
 	MOVL	$1, AX
+	MOVB	AX, ret+16(FP)
 	RET
 
 // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
@@ -604,17 +606,19 @@
 //	} else {
 //		return 0;
 //	}
-TEXT runtime·cas64(SB), NOSPLIT, $0-24
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
-	MOVQ	24(SP), CX
+TEXT runtime·cas64(SB), NOSPLIT, $0-25
+	MOVQ	ptr+0(FP), BX
+	MOVQ	old+8(FP), AX
+	MOVQ	new+16(FP), CX
 	LOCK
 	CMPXCHGQ	CX, 0(BX)
 	JNZ	cas64_fail
 	MOVL	$1, AX
+	MOVB	AX, ret+24(FP)
 	RET
 cas64_fail:
 	MOVL	$0, AX
+	MOVB	AX, ret+24(FP)
 	RET
 
 // bool casp(void **val, void *old, void *new)
@@ -624,60 +628,67 @@
 //		return 1;
 //	} else
 //		return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-24
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
-	MOVQ	24(SP), CX
+TEXT runtime·casp(SB), NOSPLIT, $0-25
+	MOVQ	ptr+0(FP), BX
+	MOVQ	old+8(FP), AX
+	MOVQ	new+16(FP), CX
 	LOCK
 	CMPXCHGQ	CX, 0(BX)
-	JZ 3(PC)
+	JZ 4(PC)
 	MOVL	$0, AX
+	MOVB	AX, ret+24(FP)
 	RET
 	MOVL	$1, AX
+	MOVB	AX, ret+24(FP)
 	RET
 
 // uint32 xadd(uint32 volatile *val, int32 delta)
 // Atomically:
 //	*val += delta;
 //	return *val;
-TEXT runtime·xadd(SB), NOSPLIT, $0-12
-	MOVQ	8(SP), BX
-	MOVL	16(SP), AX
+TEXT runtime·xadd(SB), NOSPLIT, $0-20
+	MOVQ	ptr+0(FP), BX
+	MOVL	delta+8(FP), AX
 	MOVL	AX, CX
 	LOCK
 	XADDL	AX, 0(BX)
 	ADDL	CX, AX
+	MOVL	AX, ret+16(FP)
 	RET
 
-TEXT runtime·xadd64(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
+TEXT runtime·xadd64(SB), NOSPLIT, $0-24
+	MOVQ	ptr+0(FP), BX
+	MOVQ	delta+8(FP), AX
 	MOVQ	AX, CX
 	LOCK
 	XADDQ	AX, 0(BX)
 	ADDQ	CX, AX
+	MOVQ	AX, ret+16(FP)
 	RET
 
-TEXT runtime·xchg(SB), NOSPLIT, $0-12
-	MOVQ	8(SP), BX
-	MOVL	16(SP), AX
+TEXT runtime·xchg(SB), NOSPLIT, $0-20
+	MOVQ	ptr+0(FP), BX
+	MOVL	new+8(FP), AX
 	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+16(FP)
 	RET
 
-TEXT runtime·xchg64(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
+TEXT runtime·xchg64(SB), NOSPLIT, $0-24
+	MOVQ	ptr+0(FP), BX
+	MOVQ	new+8(FP), AX
 	XCHGQ	AX, 0(BX)
+	MOVQ	AX, ret+16(FP)
 	RET
 
-TEXT runtime·xchgp(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
+TEXT runtime·xchgp(SB), NOSPLIT, $0-24
+	MOVQ	ptr+0(FP), BX
+	MOVQ	new+8(FP), AX
 	XCHGQ	AX, 0(BX)
+	MOVQ	AX, ret+16(FP)
 	RET
 
 TEXT runtime·procyield(SB),NOSPLIT,$0-0
-	MOVL	8(SP), AX
+	MOVL	cycles+0(FP), AX
 again:
 	PAUSE
 	SUBL	$1, AX
@@ -685,25 +696,25 @@
 	RET
 
 TEXT runtime·atomicstorep(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
+	MOVQ	ptr+0(FP), BX
+	MOVQ	val+8(FP), AX
 	XCHGQ	AX, 0(BX)
 	RET
 
 TEXT runtime·atomicstore(SB), NOSPLIT, $0-12
-	MOVQ	8(SP), BX
-	MOVL	16(SP), AX
+	MOVQ	ptr+0(FP), BX
+	MOVL	val+8(FP), AX
 	XCHGL	AX, 0(BX)
 	RET
 
 TEXT runtime·atomicstore64(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), BX
-	MOVQ	16(SP), AX
+	MOVQ	ptr+0(FP), BX
+	MOVQ	val+8(FP), AX
 	XCHGQ	AX, 0(BX)
 	RET
 
 // void	runtime·atomicor8(byte volatile*, byte);
-TEXT runtime·atomicor8(SB), NOSPLIT, $0-16
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-9
 	MOVQ	ptr+0(FP), AX
 	MOVB	val+8(FP), BX
 	LOCK
@@ -716,8 +727,8 @@
 // 2. sub 5 bytes from the callers return
 // 3. jmp to the argument
 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
-	MOVQ	8(SP), DX	// fn
-	MOVQ	16(SP), BX	// caller sp
+	MOVQ	fv+0(FP), DX	// fn
+	MOVQ	argp+8(FP), BX	// caller sp
 	LEAQ	-8(BX), SP	// caller sp after CALL
 	SUBQ	$5, (SP)	// return to CALL again
 	MOVQ	0(DX), BX
@@ -891,7 +902,7 @@
 	RET
 
 // void setg(G*); set g. for use by needm.
-TEXT runtime·setg(SB), NOSPLIT, $0-16
+TEXT runtime·setg(SB), NOSPLIT, $0-8
 	MOVQ	gg+0(FP), BX
 #ifdef GOOS_windows
 	CMPQ	BX, $0
@@ -925,9 +936,10 @@
 	INT	$3
 	RET
 
-TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
-	MOVQ	x+0(FP),AX		// addr of first arg
+TEXT runtime·getcallerpc(SB),NOSPLIT,$0-16
+	MOVQ	argp+0(FP),AX		// addr of first arg
 	MOVQ	-8(AX),AX		// get calling pc
+	MOVQ	AX, ret+8(FP)
 	RET
 
 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-16
@@ -937,13 +949,14 @@
 	RET
 
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
-	MOVQ	x+0(FP),AX		// addr of first arg
-	MOVQ	x+8(FP), BX
+	MOVQ	argp+0(FP),AX		// addr of first arg
+	MOVQ	pc+8(FP), BX
 	MOVQ	BX, -8(AX)		// set calling pc
 	RET
 
-TEXT runtime·getcallersp(SB),NOSPLIT,$0-8
-	MOVQ	sp+0(FP), AX
+TEXT runtime·getcallersp(SB),NOSPLIT,$0-16
+	MOVQ	argp+0(FP), AX
+	MOVQ	AX, ret+8(FP)
 	RET
 
 // func gogetcallersp(p unsafe.Pointer) uintptr
@@ -957,6 +970,7 @@
 	RDTSC
 	SHLQ	$32, DX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
@@ -1057,7 +1071,7 @@
 	AESENC	runtime·aeskeysched+0(SB), X0
 	AESENC	runtime·aeskeysched+16(SB), X0
 	AESENC	runtime·aeskeysched+0(SB), X0
-	MOVQ	X0, res+24(FP)
+	MOVQ	X0, ret+24(FP)
 	RET
 
 TEXT runtime·aeshash64(SB),NOSPLIT,$0-32
@@ -1068,7 +1082,7 @@
 	AESENC	runtime·aeskeysched+0(SB), X0
 	AESENC	runtime·aeskeysched+16(SB), X0
 	AESENC	runtime·aeskeysched+0(SB), X0
-	MOVQ	X0, res+24(FP)
+	MOVQ	X0, ret+24(FP)
 	RET
 
 // simple mask to get rid of data in the high part of the register.
@@ -1266,12 +1280,12 @@
 	RET
 
 TEXT runtime·cmpstring(SB),NOSPLIT,$0-40
-	MOVQ	s1+0(FP), SI
-	MOVQ	s1+8(FP), BX
-	MOVQ	s2+16(FP), DI
-	MOVQ	s2+24(FP), DX
+	MOVQ	s1_base+0(FP), SI
+	MOVQ	s1_len+8(FP), BX
+	MOVQ	s2_base+16(FP), DI
+	MOVQ	s2_len+24(FP), DX
 	CALL	runtime·cmpbody(SB)
-	MOVQ	AX, res+32(FP)
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT bytes·Compare(SB),NOSPLIT,$0-56
diff --git a/src/pkg/runtime/asm_amd64p32.s b/src/pkg/runtime/asm_amd64p32.s
index 4c039d7..83faff2 100644
--- a/src/pkg/runtime/asm_amd64p32.s
+++ b/src/pkg/runtime/asm_amd64p32.s
@@ -103,8 +103,8 @@
 // void gosave(Gobuf*)
 // save state in Gobuf; setjmp
 TEXT runtime·gosave(SB), NOSPLIT, $0-4
-	MOVL	b+0(FP), AX	// gobuf
-	LEAL	b+0(FP), BX	// caller's SP
+	MOVL	buf+0(FP), AX	// gobuf
+	LEAL	buf+0(FP), BX	// caller's SP
 	MOVL	BX, gobuf_sp(AX)
 	MOVL	0(SP), BX		// caller's PC
 	MOVL	BX, gobuf_pc(AX)
@@ -118,7 +118,7 @@
 // void gogo(Gobuf*)
 // restore state from Gobuf; longjmp
 TEXT runtime·gogo(SB), NOSPLIT, $0-4
-	MOVL	b+0(FP), BX		// gobuf
+	MOVL	buf+0(FP), BX		// gobuf
 	MOVL	gobuf_g(BX), DX
 	MOVL	0(DX), CX		// make sure g != nil
 	get_tls(CX)
@@ -266,7 +266,7 @@
 // with the desired args running the desired function.
 //
 // func call(fn *byte, arg *byte, argsize uint32).
-TEXT runtime·newstackcall(SB), NOSPLIT, $0-20
+TEXT runtime·newstackcall(SB), NOSPLIT, $0-12
 	get_tls(CX)
 	MOVL	g(CX), BX
 	MOVL	g_m(BX), BX
@@ -275,7 +275,7 @@
 	// restore when returning from f.
 	MOVL	0(SP), AX	// our caller's PC
 	MOVL	AX, (m_morebuf+gobuf_pc)(BX)
-	LEAL	8(SP), AX	// our caller's SP
+	LEAL	addr+4(FP), AX	// our caller's SP
 	MOVL	AX, (m_morebuf+gobuf_sp)(BX)
 	MOVL	g(CX), AX
 	MOVL	AX, (m_morebuf+gobuf_g)(BX)
@@ -292,9 +292,9 @@
 	// If it turns out that f needs a larger frame than
 	// the default stack, f's usual stack growth prolog will
 	// allocate a new segment (and recopy the arguments).
-	MOVL	8(SP), AX	// fn
-	MOVL	12(SP), DX	// arg frame
-	MOVL	16(SP), CX	// arg size
+	MOVL	fv+0(FP), AX	// fn
+	MOVL	addr+4(FP), DX	// arg frame
+	MOVL	size+8(FP), CX	// arg size
 
 	MOVQ	AX, m_cret(BX)	// f's PC
 	MOVL	DX, m_moreargp(BX)	// argument frame pointer
@@ -548,16 +548,18 @@
 //		return 1;
 //	} else
 //		return 0;
-TEXT runtime·cas(SB), NOSPLIT, $0-12
-	MOVL	val+0(FP), BX
+TEXT runtime·cas(SB), NOSPLIT, $0-17
+	MOVL	ptr+0(FP), BX
 	MOVL	old+4(FP), AX
 	MOVL	new+8(FP), CX
 	LOCK
 	CMPXCHGL	CX, 0(BX)
-	JZ 3(PC)
+	JZ 4(PC)
 	MOVL	$0, AX
+	MOVB	AX, ret+16(FP)
 	RET
 	MOVL	$1, AX
+	MOVB	AX, ret+16(FP)
 	RET
 
 // bool	runtime·cas64(uint64 *val, uint64 old, uint64 new)
@@ -568,17 +570,19 @@
 //	} else {
 //		return 0;
 //	}
-TEXT runtime·cas64(SB), NOSPLIT, $0-24
-	MOVL	val+0(FP), BX
+TEXT runtime·cas64(SB), NOSPLIT, $0-25
+	MOVL	ptr+0(FP), BX
 	MOVQ	old+8(FP), AX
 	MOVQ	new+16(FP), CX
 	LOCK
 	CMPXCHGQ	CX, 0(BX)
 	JNZ	cas64_fail
 	MOVL	$1, AX
+	MOVB	AX, ret+24(FP)
 	RET
 cas64_fail:
 	MOVL	$0, AX
+	MOVB	AX, ret+24(FP)
 	RET
 
 // bool casp(void **val, void *old, void *new)
@@ -588,54 +592,60 @@
 //		return 1;
 //	} else
 //		return 0;
-TEXT runtime·casp(SB), NOSPLIT, $0-12
-	MOVL	val+0(FP), BX
+TEXT runtime·casp(SB), NOSPLIT, $0-17
+	MOVL	ptr+0(FP), BX
 	MOVL	old+4(FP), AX
 	MOVL	new+8(FP), CX
 	LOCK
 	CMPXCHGL	CX, 0(BX)
-	JZ 3(PC)
+	JZ 4(PC)
 	MOVL	$0, AX
+	MOVB	AX, ret+16(FP)
 	RET
 	MOVL	$1, AX
+	MOVB	AX, ret+16(FP)
 	RET
 
 // uint32 xadd(uint32 volatile *val, int32 delta)
 // Atomically:
 //	*val += delta;
 //	return *val;
-TEXT runtime·xadd(SB), NOSPLIT, $0-8
-	MOVL	val+0(FP), BX
+TEXT runtime·xadd(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
 	MOVL	delta+4(FP), AX
 	MOVL	AX, CX
 	LOCK
 	XADDL	AX, 0(BX)
 	ADDL	CX, AX
+	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·xadd64(SB), NOSPLIT, $0-16
-	MOVL	val+0(FP), BX
+TEXT runtime·xadd64(SB), NOSPLIT, $0-24
+	MOVL	ptr+0(FP), BX
 	MOVQ	delta+8(FP), AX
 	MOVQ	AX, CX
 	LOCK
 	XADDQ	AX, 0(BX)
 	ADDQ	CX, AX
+	MOVQ	AX, ret+16(FP)
 	RET
 
-TEXT runtime·xchg(SB), NOSPLIT, $0-8
-	MOVL	val+0(FP), BX
+TEXT runtime·xchg(SB), NOSPLIT, $0-12
+	MOVL	ptr+0(FP), BX
 	MOVL	new+4(FP), AX
 	XCHGL	AX, 0(BX)
+	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·xchg64(SB), NOSPLIT, $0-16
-	MOVL	val+0(FP), BX
+TEXT runtime·xchg64(SB), NOSPLIT, $0-24
+	MOVL	ptr+0(FP), BX
 	MOVQ	new+8(FP), AX
 	XCHGQ	AX, 0(BX)
+	MOVQ	AX, ret+16(FP)
 	RET
 
 TEXT runtime·procyield(SB),NOSPLIT,$0-0
-	MOVL	val+0(FP), AX
+	MOVL	cycles+0(FP), AX
 again:
 	PAUSE
 	SUBL	$1, AX
@@ -661,7 +671,7 @@
 	RET
 
 // void	runtime·atomicor8(byte volatile*, byte);
-TEXT runtime·atomicor8(SB), NOSPLIT, $0-8
+TEXT runtime·atomicor8(SB), NOSPLIT, $0-5
 	MOVL	ptr+0(FP), BX
 	MOVB	val+4(FP), AX
 	LOCK
@@ -673,9 +683,9 @@
 // 1. pop the caller
 // 2. sub 5 bytes from the callers return
 // 3. jmp to the argument
-TEXT runtime·jmpdefer(SB), NOSPLIT, $0-16
-	MOVL	fn+0(FP), DX
-	MOVL	callersp+4(FP), BX
+TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
+	MOVL	fv+0(FP), DX
+	MOVL	argp+4(FP), BX
 	LEAL	-8(BX), SP	// caller sp after CALL
 	SUBL	$5, (SP)	// return to CALL again
 	MOVL	0(DX), BX
@@ -695,7 +705,7 @@
 
 // void setg(G*); set g. for use by needm.
 // Not implemented.
-TEXT runtime·setg(SB), NOSPLIT, $0-8
+TEXT runtime·setg(SB), NOSPLIT, $0-4
 	MOVL	0, AX
 	RET
 
@@ -726,9 +736,10 @@
 	STOSB
 	RET
 
-TEXT runtime·getcallerpc(SB),NOSPLIT,$0-8
-	MOVL	x+0(FP),AX		// addr of first arg
+TEXT runtime·getcallerpc(SB),NOSPLIT,$0-12
+	MOVL	argp+0(FP),AX		// addr of first arg
 	MOVL	-8(AX),AX		// get calling pc
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-12
@@ -737,14 +748,15 @@
 	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
-	MOVL	x+0(FP),AX		// addr of first arg
+TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
+	MOVL	argp+0(FP),AX		// addr of first arg
 	MOVL	pc+4(FP), BX		// pc to set
 	MOVQ	BX, -8(AX)		// set calling pc
 	RET
 
-TEXT runtime·getcallersp(SB),NOSPLIT,$0-8
-	MOVL	sp+0(FP), AX
+TEXT runtime·getcallersp(SB),NOSPLIT,$0-12
+	MOVL	argp+0(FP), AX
+	MOVL	AX, ret+8(FP)
 	RET
 
 // func gogetcallersp(p unsafe.Pointer) uintptr
@@ -758,6 +770,7 @@
 	RDTSC
 	SHLQ	$32, DX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·gocputicks(SB),NOSPLIT,$0-8
@@ -784,16 +797,20 @@
 // write the implementations. Can copy and adjust the ones
 // in asm_amd64.s when the time comes.
 
-TEXT runtime·aeshash(SB),NOSPLIT,$0-24
+TEXT runtime·aeshash(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
 	RET
 
-TEXT runtime·aeshashstr(SB),NOSPLIT,$0-24
+TEXT runtime·aeshashstr(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
 	RET
 
-TEXT runtime·aeshash32(SB),NOSPLIT,$0-24
+TEXT runtime·aeshash32(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
 	RET
 
-TEXT runtime·aeshash64(SB),NOSPLIT,$0-24
+TEXT runtime·aeshash64(SB),NOSPLIT,$0-20
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·memeq(SB),NOSPLIT,$0-17
@@ -925,12 +942,12 @@
 	RET
 
 TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
-	MOVL	s1+0(FP), SI
-	MOVL	s1+4(FP), BX
-	MOVL	s2+8(FP), DI
-	MOVL	s2+12(FP), DX
+	MOVL	s1_base+0(FP), SI
+	MOVL	s1_len+4(FP), BX
+	MOVL	s2_base+8(FP), DI
+	MOVL	s2_len+12(FP), DX
 	CALL	runtime·cmpbody(SB)
-	MOVL	AX, res+16(FP)
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT bytes·Compare(SB),NOSPLIT,$0-28
diff --git a/src/pkg/runtime/asm_arm.s b/src/pkg/runtime/asm_arm.s
index 551ba0c..3ced211 100644
--- a/src/pkg/runtime/asm_arm.s
+++ b/src/pkg/runtime/asm_arm.s
@@ -468,7 +468,7 @@
 TEXT runtime·jmpdefer(SB), NOSPLIT, $0-8
 	MOVW	0(SP), LR
 	MOVW	$-4(LR), LR	// BL deferreturn
-	MOVW	fn+0(FP), R7
+	MOVW	fv+0(FP), R7
 	MOVW	argp+4(FP), SP
 	MOVW	$-4(SP), SP	// SP is 4 below argp, due to saved LR
 	MOVW	0(R7), R1
@@ -579,9 +579,6 @@
 	// the earlier calls.
 	//
 	// In the new goroutine, -8(SP) and -4(SP) are unused.
-	MOVW	fn+4(FP), R0
-	MOVW	frame+8(FP), R1
-	MOVW	framesize+12(FP), R2
 	MOVW	m_curg(R8), g
 	MOVW	(g_sched+gobuf_sp)(g), R4 // prepare stack as R4
 	MOVW	(g_sched+gobuf_pc)(g), R5
@@ -616,7 +613,7 @@
 	RET
 
 // void setg(G*); set g. for use by needm.
-TEXT runtime·setg(SB), NOSPLIT, $0-8
+TEXT runtime·setg(SB), NOSPLIT, $0-4
 	MOVW	gg+0(FP), g
 
 	// Save g to thread-local storage.
@@ -628,6 +625,7 @@
 
 TEXT runtime·getcallerpc(SB),NOSPLIT,$-4-4
 	MOVW	0(SP), R0
+	MOVW	R0, ret+4(FP)
 	RET
 
 TEXT runtime·gogetcallerpc(SB),NOSPLIT,$-4-8
@@ -635,13 +633,14 @@
 	RET
 
 TEXT runtime·setcallerpc(SB),NOSPLIT,$-4-8
-	MOVW	x+4(FP), R0
+	MOVW	pc+4(FP), R0
 	MOVW	R0, 0(SP)
 	RET
 
 TEXT runtime·getcallersp(SB),NOSPLIT,$-4-4
 	MOVW	0(FP), R0
 	MOVW	$-4(R0), R0
+	MOVW	R0, ret+4(FP)
 	RET
 
 // func gogetcallersp(p unsafe.Pointer) uintptr
@@ -658,12 +657,6 @@
 	MOVW	$0, R0
 	MOVW	(R0), R1
 
-TEXT runtime·gocputicks(SB),NOSPLIT,$4-8
-	MOVW	$ret_lo+0(FP), R0
-	MOVW	R0, 4(R13)
-	BL      runtime·cputicks(SB)
-	RET
-
 // bool armcas(int32 *val, int32 old, int32 new)
 // Atomically:
 //	if(*val == old){
@@ -1264,3 +1257,7 @@
 	MOVW	R0, m_fastrand(R1)
 	MOVW	R0, ret+0(FP)
 	RET
+
+TEXT runtime·gocputicks(SB), NOSPLIT, $0
+	B runtime·cputicks(SB)
+
diff --git a/src/pkg/runtime/memclr_plan9_amd64.s b/src/pkg/runtime/memclr_plan9_amd64.s
index 1fabcd5f..64f3c99 100644
--- a/src/pkg/runtime/memclr_plan9_amd64.s
+++ b/src/pkg/runtime/memclr_plan9_amd64.s
@@ -6,8 +6,8 @@
 
 // void runtime·memclr(void*, uintptr)
 TEXT runtime·memclr(SB),NOSPLIT,$0-16
-	MOVQ	addr+0(FP), DI
-	MOVQ	count+8(FP), CX
+	MOVQ	ptr+0(FP), DI
+	MOVQ	n+8(FP), CX
 	MOVQ	CX, BX
 	ANDQ	$7, BX
 	SHRQ	$3, CX
diff --git a/src/pkg/runtime/memmove_nacl_amd64p32.s b/src/pkg/runtime/memmove_nacl_amd64p32.s
index 1b57331..ba47e85 100644
--- a/src/pkg/runtime/memmove_nacl_amd64p32.s
+++ b/src/pkg/runtime/memmove_nacl_amd64p32.s
@@ -6,7 +6,7 @@
 
 TEXT runtime·memmove(SB), NOSPLIT, $0-12
 	MOVL	to+0(FP), DI
-	MOVL	fr+4(FP), SI
+	MOVL	from+4(FP), SI
 	MOVL	n+8(FP), BX
 
 	CMPL	SI, DI
diff --git a/src/pkg/runtime/memmove_plan9_386.s b/src/pkg/runtime/memmove_plan9_386.s
index 5ac5c27..4d5f7c6 100644
--- a/src/pkg/runtime/memmove_plan9_386.s
+++ b/src/pkg/runtime/memmove_plan9_386.s
@@ -27,7 +27,7 @@
 
 TEXT runtime·memmove(SB), NOSPLIT, $0-12
 	MOVL	to+0(FP), DI
-	MOVL	fr+4(FP), SI
+	MOVL	from+4(FP), SI
 	MOVL	n+8(FP), BX
 
 	// REP instructions have a high startup cost, so we handle small sizes
diff --git a/src/pkg/runtime/memmove_plan9_amd64.s b/src/pkg/runtime/memmove_plan9_amd64.s
index 3664e45..035d475 100644
--- a/src/pkg/runtime/memmove_plan9_amd64.s
+++ b/src/pkg/runtime/memmove_plan9_amd64.s
@@ -29,7 +29,7 @@
 TEXT runtime·memmove(SB), NOSPLIT, $0-24
 
 	MOVQ	to+0(FP), DI
-	MOVQ	fr+8(FP), SI
+	MOVQ	from+8(FP), SI
 	MOVQ	n+16(FP), BX
 
 	// REP instructions have a high startup cost, so we handle small sizes
diff --git a/src/pkg/runtime/os_darwin.go b/src/pkg/runtime/os_darwin.go
new file mode 100644
index 0000000..37ed55c
--- /dev/null
+++ b/src/pkg/runtime/os_darwin.go
@@ -0,0 +1,27 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func bsdthread_create(stk, mm, gg, fn unsafe.Pointer) int32
+func bsdthread_register() int32
+func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
+func mach_reply_port() uint32
+func mach_task_self() uint32
+func mach_thread_self() uint32
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func sigprocmask(sig int32, new, old unsafe.Pointer)
+func sigaction(mode uint32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigtramp()
+func setitimer(mode int32, new, old unsafe.Pointer)
+func kqueue() int32
+func kevent(fd int32, ev1 unsafe.Pointer, nev1 int32, ev2 unsafe.Pointer, nev2 int32, ts unsafe.Pointer) int32
+func closeonexec(fd int32)
+func mach_semaphore_wait(sema uint32) int32
+func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
+func mach_semaphore_signal(sema uint32) int32
+func mach_semaphore_signal_all(sema uint32) int32
diff --git a/src/pkg/runtime/os_dragonfly.go b/src/pkg/runtime/os_dragonfly.go
new file mode 100644
index 0000000..ec7ddef
--- /dev/null
+++ b/src/pkg/runtime/os_dragonfly.go
@@ -0,0 +1,21 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func lwp_create(param unsafe.Pointer) int32
+func sigaltstack(new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigprocmask(new, old unsafe.Pointer)
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func kqueue() int32
+func kevent(fd int32, ev1 unsafe.Pointer, nev1 int32, ev2 unsafe.Pointer, nev2 int32, ts unsafe.Pointer) int32
+func closeonexec(fd int32)
+func sys_umtx_sleep(addr unsafe.Pointer, val, timeout int32) int32
+func sys_umtx_wakeup(addr unsafe.Pointer, val int32) int32
diff --git a/src/pkg/runtime/os_freebsd.go b/src/pkg/runtime/os_freebsd.go
new file mode 100644
index 0000000..a973d3f
--- /dev/null
+++ b/src/pkg/runtime/os_freebsd.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func thr_new(param unsafe.Pointer, size int32)
+func sigaltstack(new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigprocmask(new, old unsafe.Pointer)
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func kqueue() int32
+func kevent(fd int32, ev1 unsafe.Pointer, nev1 int32, ev2 unsafe.Pointer, nev2 int32, ts unsafe.Pointer) int32
+func closeonexec(fd int32)
+func sys_umtx_op(addr unsafe.Pointer, mode int32, val uint32, ptr2, ts unsafe.Pointer) int32
diff --git a/src/pkg/runtime/os_linux.go b/src/pkg/runtime/os_linux.go
new file mode 100644
index 0000000..fc82382
--- /dev/null
+++ b/src/pkg/runtime/os_linux.go
@@ -0,0 +1,22 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
+func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
+func rt_sigaction(sig uintptr, new, old unsafe.Pointer, size uintptr) int32
+func sigaltstack(new, old unsafe.Pointer)
+func setitimer(mode int32, new, old unsafe.Pointer)
+func rtsigprocmask(sig int32, new, old unsafe.Pointer, size int32)
+func getrlimit(kind int32, limit unsafe.Pointer) int32
+func raise(sig int32)
+func epollcreate(size int32) int32
+func epollcreate1(flags int32) int32
+func epollctl(epfd, op, fd int32, ev unsafe.Pointer) int32
+func epollwait(epfd int32, ev unsafe.Pointer, nev, timeout int32) int32
+func closeonexec(fd int32)
+func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
diff --git a/src/pkg/runtime/os_nacl.go b/src/pkg/runtime/os_nacl.go
new file mode 100644
index 0000000..5b5bcf6
--- /dev/null
+++ b/src/pkg/runtime/os_nacl.go
@@ -0,0 +1,24 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func nacl_exception_stack(p unsafe.Pointer, size int32) int32
+func nacl_exception_handler(fn, arg unsafe.Pointer) int32
+func nacl_sem_create(flag int32) int32
+func nacl_sem_wait(sem int32) int32
+func nacl_sem_post(sem int32) int32
+func nacl_mutex_create(flag int32) int32
+func nacl_mutex_lock(mutex int32) int32
+func nacl_mutex_trylock(mutex int32) int32
+func nacl_mutex_unlock(mutex int32) int32
+func nacl_cond_create(flag int32) int32
+func nacl_cond_wait(cond, n int32) int32
+func nacl_cond_signal(cond int32) int32
+func nacl_cond_broadcast(cond int32) int32
+func nacl_cond_timed_wait_abs(cond, lock int32, ts unsafe.Pointer)
+func nacl_thread_create(fn, stk, tls, xx unsafe.Pointer) int32
+func nacl_nanosleep(ts, extra unsafe.Pointer) int32
diff --git a/src/pkg/runtime/os_netbsd.go b/src/pkg/runtime/os_netbsd.go
new file mode 100644
index 0000000..5cdf522
--- /dev/null
+++ b/src/pkg/runtime/os_netbsd.go
@@ -0,0 +1,23 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigprocmask(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func lwp_tramp()
+func raise(sig int32)
+func kqueue() int32
+func kevent(fd int32, ev1 unsafe.Pointer, nev1 int32, ev2 unsafe.Pointer, nev2 int32, ts unsafe.Pointer) int32
+func closeonexec(fd int32)
+func getcontext(ctxt unsafe.Pointer)
+func lwp_create(ctxt unsafe.Pointer, flags uintptr, lwpid unsafe.Pointer) int32
+func lwp_park(abstime unsafe.Pointer, unpark int32, hint, unparkhint unsafe.Pointer) int32
+func lwp_unpark(lwp int32, hint unsafe.Pointer) int32
+func lwp_self() int32
diff --git a/src/pkg/runtime/os_openbsd.go b/src/pkg/runtime/os_openbsd.go
new file mode 100644
index 0000000..6bb6baa6
--- /dev/null
+++ b/src/pkg/runtime/os_openbsd.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigprocmask(mode int32, new uint32) uint32
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func raise(sig int32)
+func kqueue() int32
+func kevent(fd int32, ev1 unsafe.Pointer, nev1 int32, ev2 unsafe.Pointer, nev2 int32, ts unsafe.Pointer) int32
+func closeonexec(fd int32)
+func tfork(param unsafe.Pointer, psize uintptr, mm, gg, fn unsafe.Pointer) int64
+func thrsleep(ident unsafe.Pointer, clock_id int32, tsp, lock, abort unsafe.Pointer) int32
+func thrwakeup(ident unsafe.Pointer, n int32) int32
diff --git a/src/pkg/runtime/os_plan9.go b/src/pkg/runtime/os_plan9.go
new file mode 100644
index 0000000..a50211a
--- /dev/null
+++ b/src/pkg/runtime/os_plan9.go
@@ -0,0 +1,24 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func pread(fd int32, buf unsafe.Pointer, nbytes int32, offset int64) int32
+func pwrite(fd int32, buf unsafe.Pointer, nbytes int32, offset int64) int32
+func seek(fd int32, offset int64, whence int32) int64
+func exits(msg *byte)
+func brk_(addr unsafe.Pointer) uintptr
+func sleep(ms int32) int32
+func rfork(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
+func plan9_semacquire(addr *uint32, block int32) int32
+func plan9_tsemacquire(addr *uint32, ms int32) int32
+func plan9_semrelease(addr *uint32, count int32) int32
+func notify(fn unsafe.Pointer) int32
+func noted(mode int32) int32
+func nsec(*int64) int64
+func sigtramp(ureg, msg unsafe.Pointer)
+func setfpmasks()
+func errstr() string
diff --git a/src/pkg/runtime/os_solaris.go b/src/pkg/runtime/os_solaris.go
new file mode 100644
index 0000000..72528c7
--- /dev/null
+++ b/src/pkg/runtime/os_solaris.go
@@ -0,0 +1,22 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func setitimer(mode int32, new, old unsafe.Pointer)
+func sigaction(sig int32, new, old unsafe.Pointer)
+func sigaltstack(new, old unsafe.Pointer)
+func sigprocmask(mode int32, new, old unsafe.Pointer)
+func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+func getrlimit(kind int32, limit unsafe.Pointer)
+func asmsysvicall6(fn unsafe.Pointer)
+func miniterrno(fn unsafe.Pointer)
+func raise(sig int32)
+func getcontext(ctxt unsafe.Pointer)
+func tstart_sysvicall(mm unsafe.Pointer) uint32
+func nanotime1() int64
+func usleep1(usec uint32)
+func osyield1()
diff --git a/src/pkg/runtime/os_windows.go b/src/pkg/runtime/os_windows.go
new file mode 100644
index 0000000..188ca32
--- /dev/null
+++ b/src/pkg/runtime/os_windows.go
@@ -0,0 +1,12 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+func asmstdcall(fn unsafe.Pointer)
+func getlasterror() uint32
+func setlasterror(err uint32)
+func usleep1(usec uint32)
diff --git a/src/pkg/runtime/stubs.go b/src/pkg/runtime/stubs.go
index e3e14ca..5f396aa 100644
--- a/src/pkg/runtime/stubs.go
+++ b/src/pkg/runtime/stubs.go
@@ -218,3 +218,48 @@
 
 func traceback(pc, sp, lr uintptr, gp *g)
 func tracebackothers(gp *g)
+
+func cgocallback(fn, frame unsafe.Pointer, framesize uintptr)
+func gogo(buf *gobuf)
+func gosave(buf *gobuf)
+func open(name *byte, mode, perm int32) int32
+func read(fd int32, p unsafe.Pointer, n int32) int32
+func write(fd uintptr, p unsafe.Pointer, n int32) int32
+func close(fd int32) int32
+func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
+func jmpdefer(fv *funcval, argp unsafe.Pointer)
+func exit1(code int32)
+func asminit()
+func getcallersp(argp unsafe.Pointer) uintptr
+func cas(ptr *uint32, old, new uint32) bool
+func cas64(ptr *uint64, old, new uint64) bool
+func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+func xadd(ptr *uint32, delta int32) uint32
+func xadd64(ptr *uint64, delta int64) uint64
+func xchg(ptr *uint32, new uint32) uint32
+func xchg64(ptr *uint64, new uint64) uint64
+func xchgp(ptr *unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
+func atomicstore(ptr *uint32, val uint32)
+func atomicstore64(ptr *uint64, val uint64)
+func atomicstorep(ptr *unsafe.Pointer, val unsafe.Pointer)
+func atomicload(ptr *uint32) uint32
+func atomicload64(ptr *uint64) uint64
+func atomicloadp(ptr *unsafe.Pointer) unsafe.Pointer
+func atomicor8(ptr *uint8, val uint8)
+func setg(gg *g)
+func exit(code int32)
+func breakpoint()
+func asmcgocall(fn, arg unsafe.Pointer)
+func nanotime() int64
+func usleep(usec uint32)
+func cputicks() int64
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+func munmap(addr unsafe.Pointer, n uintptr)
+func madvise(addr unsafe.Pointer, n uintptr, flags int32)
+func setcallerpc(argp unsafe.Pointer, pc uintptr)
+func getcallerpc(argp unsafe.Pointer) uintptr
+func newstackcall(fv *funcval, addr unsafe.Pointer, size uint32)
+func procyield(cycles uint32)
+func osyield()
+func cgocallback_gofunc(fv *funcval, frame unsafe.Pointer, framesize uintptr)
+func cmpstring(s1, s2 string) int
diff --git a/src/pkg/runtime/sys_darwin_386.s b/src/pkg/runtime/sys_darwin_386.s
index a702d9b..1ec694d 100644
--- a/src/pkg/runtime/sys_darwin_386.s
+++ b/src/pkg/runtime/sys_darwin_386.s
@@ -28,21 +28,25 @@
 TEXT runtime·open(SB),NOSPLIT,$0
 	MOVL	$5, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$0
 	MOVL	$6, AX
 	INT	$0x80
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$0
 	MOVL	$3, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$0
 	MOVL	$4, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$16
@@ -59,6 +63,7 @@
 TEXT runtime·mmap(SB),NOSPLIT,$0
 	MOVL	$197, AX
 	INT	$0x80
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
@@ -206,9 +211,8 @@
 // void nanotime(int64 *nsec)
 TEXT runtime·nanotime(SB),NOSPLIT,$0
 	CALL	runtime·now(SB)
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 TEXT runtime·sigprocmask(SB),NOSPLIT,$0
@@ -315,7 +319,7 @@
 TEXT runtime·bsdthread_create(SB),NOSPLIT,$32
 	MOVL	$360, AX
 	// 0(SP) is where the caller PC would be; kernel skips it
-	MOVL	func+12(FP), BX
+	MOVL	fn+12(FP), BX
 	MOVL	BX, 4(SP)	// func
 	MOVL	mm+4(FP), BX
 	MOVL	BX, 8(SP)	// arg
@@ -325,10 +329,12 @@
 	MOVL	BX, 16(SP)	// pthread
 	MOVL	$0x1000000, 20(SP)	// flags = PTHREAD_START_CUSTOM
 	INT	$0x80
-	JAE	3(PC)
+	JAE	4(PC)
 	NEGL	AX
+	MOVL	AX, ret+16(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+16(FP)
 	RET
 
 // The thread that bsdthread_create creates starts executing here,
@@ -382,10 +388,12 @@
 	MOVL	$0, 20(SP)	// targetconc_ptr
 	MOVL	$0, 24(SP)	// dispatchqueue_offset
 	INT	$0x80
-	JAE	3(PC)
+	JAE	4(PC)
 	NEGL	AX
+	MOVL	AX, ret+0(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // Invoke Mach system call.
@@ -408,16 +416,19 @@
 TEXT runtime·mach_msg_trap(SB),NOSPLIT,$0
 	MOVL	$-31, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+28(FP)
 	RET
 
 TEXT runtime·mach_reply_port(SB),NOSPLIT,$0
 	MOVL	$-26, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+0(FP)
 	RET
 
 TEXT runtime·mach_task_self(SB),NOSPLIT,$0
 	MOVL	$-28, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+0(FP)
 	RET
 
 // Mach provides trap versions of the semaphore ops,
@@ -427,24 +438,28 @@
 TEXT runtime·mach_semaphore_wait(SB),NOSPLIT,$0
 	MOVL	$-36, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+4(FP)
 	RET
 
 // uint32 mach_semaphore_timedwait(uint32, uint32, uint32)
 TEXT runtime·mach_semaphore_timedwait(SB),NOSPLIT,$0
 	MOVL	$-38, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+12(FP)
 	RET
 
 // uint32 mach_semaphore_signal(uint32)
 TEXT runtime·mach_semaphore_signal(SB),NOSPLIT,$0
 	MOVL	$-33, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+4(FP)
 	RET
 
 // uint32 mach_semaphore_signal_all(uint32)
 TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
 	MOVL	$-34, AX
 	CALL	runtime·sysenter(SB)
+	MOVL	AX, ret+4(FP)
 	RET
 
 // setldt(int entry, int address, int limit)
@@ -486,10 +501,12 @@
 TEXT runtime·sysctl(SB),NOSPLIT,$0
 	MOVL	$202, AX
 	INT	$0x80
-	JAE	3(PC)
+	JAE	4(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·kqueue(void);
@@ -498,6 +515,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
@@ -506,6 +524,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·closeonexec(int32 fd);
diff --git a/src/pkg/runtime/sys_darwin_amd64.s b/src/pkg/runtime/sys_darwin_amd64.s
index 23995db7..5f0d9df 100644
--- a/src/pkg/runtime/sys_darwin_amd64.s
+++ b/src/pkg/runtime/sys_darwin_amd64.s
@@ -16,7 +16,7 @@
 
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 exit status
+	MOVL	code+0(FP), DI		// arg 1 exit status
 	MOVL	$(0x2000000+1), AX	// syscall entry
 	SYSCALL
 	MOVL	$0xf1, 0xf1  // crash
@@ -25,40 +25,44 @@
 // Exit this OS thread (like pthread_exit, which eventually
 // calls __bsdthread_terminate).
 TEXT runtime·exit1(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 exit status
+	MOVL	code+0(FP), DI		// arg 1 exit status
 	MOVL	$(0x2000000+361), AX	// syscall entry
 	SYSCALL
 	MOVL	$0xf1, 0xf1  // crash
 	RET
 
 TEXT runtime·open(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 pathname
-	MOVL	16(SP), SI		// arg 2 flags
-	MOVL	20(SP), DX		// arg 3 mode
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
 	MOVL	$(0x2000000+5), AX	// syscall entry
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 fd
+	MOVL	fd+0(FP), DI		// arg 1 fd
 	MOVL	$(0x2000000+6), AX	// syscall entry
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$(0x2000000+3), AX	// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVQ	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$(0x2000000+4), AX	// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$24
@@ -72,17 +76,17 @@
 	RET
 
 TEXT runtime·setitimer(SB), NOSPLIT, $0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
 	MOVL	$(0x2000000+83), AX	// syscall entry
 	SYSCALL
 	RET
 
 TEXT runtime·madvise(SB), NOSPLIT, $0
-	MOVQ	8(SP), DI		// arg 1 addr
-	MOVQ	16(SP), SI		// arg 2 len
-	MOVL	24(SP), DX		// arg 3 advice
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	flags+16(FP), DX		// arg 3 advice
 	MOVL	$(0x2000000+75), AX	// syscall entry madvise
 	SYSCALL
 	// ignore failure - maybe pages are locked
@@ -99,8 +103,7 @@
 #define	gtod_ns_base	0x70
 #define	gtod_sec_base	0x78
 
-// int64 nanotime(void)
-TEXT runtime·nanotime(SB), NOSPLIT, $32
+TEXT nanotime<>(SB), NOSPLIT, $32
 	MOVQ	$0x7fffffe00000, BP	/* comm page base */
 	// Loop trying to take a consistent snapshot
 	// of the time parameters.
@@ -149,9 +152,14 @@
 	ADDQ	DX, AX
 	RET
 
+TEXT runtime·nanotime(SB),NOSPLIT,$0-8
+	CALL	nanotime<>(SB)
+	MOVQ	AX, ret+0(FP)
+	RET
+
 // func now() (sec int64, nsec int32)
-TEXT time·now(SB),NOSPLIT,$0
-	CALL	runtime·nanotime(SB)
+TEXT time·now(SB),NOSPLIT,$8
+	CALL	nanotime<>(SB)
 
 	// generated code for
 	//	func f(x uint64) (uint64, uint64) { return x/1000000000, x%100000000 }
@@ -169,9 +177,9 @@
 	RET
 
 TEXT runtime·sigprocmask(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVL	sig+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
 	MOVL	$(0x2000000+329), AX  // pthread_sigmask (on OS X, sigprocmask==entire process)
 	SYSCALL
 	JCC	2(PC)
@@ -179,11 +187,11 @@
 	RET
 
 TEXT runtime·sigaction(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 sig
-	MOVQ	16(SP), SI		// arg 2 act
-	MOVQ	24(SP), DX		// arg 3 oact
-	MOVQ	24(SP), CX		// arg 3 oact
-	MOVQ	24(SP), R10		// arg 3 oact
+	MOVL	mode+0(FP), DI		// arg 1 sig
+	MOVQ	new+8(FP), SI		// arg 2 act
+	MOVQ	old+16(FP), DX		// arg 3 oact
+	MOVQ	old+16(FP), CX		// arg 3 oact
+	MOVQ	old+16(FP), R10		// arg 3 oact
 	MOVL	$(0x2000000+46), AX	// syscall entry
 	SYSCALL
 	JCC	2(PC)
@@ -234,19 +242,20 @@
 	INT $3	// not reached
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 addr
-	MOVQ	16(SP), SI		// arg 2 len
-	MOVL	24(SP), DX		// arg 3 prot
-	MOVL	28(SP), R10		// arg 4 flags
-	MOVL	32(SP), R8		// arg 5 fid
-	MOVL	36(SP), R9		// arg 6 offset
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	prot+16(FP), DX		// arg 3 prot
+	MOVL	flags+20(FP), R10		// arg 4 flags
+	MOVL	fd+24(FP), R8		// arg 5 fid
+	MOVL	off+28(FP), R9		// arg 6 offset
 	MOVL	$(0x2000000+197), AX	// syscall entry
 	SYSCALL
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 addr
-	MOVQ	16(SP), SI		// arg 2 len
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
 	MOVL	$(0x2000000+73), AX	// syscall entry
 	SYSCALL
 	JCC	2(PC)
@@ -293,10 +302,12 @@
 	MOVQ	$0, R9	// paranoia
 	MOVQ	$(0x2000000+360), AX	// bsdthread_create
 	SYSCALL
-	JCC 3(PC)
+	JCC 4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+32(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+32(FP)
 	RET
 
 // The thread that bsdthread_create creates starts executing here,
@@ -346,42 +357,48 @@
 	MOVQ	$0, R9	// dispatchqueue_offset
 	MOVQ	$(0x2000000+366), AX	// bsdthread_register
 	SYSCALL
-	JCC 3(PC)
+	JCC 4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+0(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // Mach system calls use 0x1000000 instead of the BSD's 0x2000000.
 
 // uint32 mach_msg_trap(void*, uint32, uint32, uint32, uint32, uint32, uint32)
 TEXT runtime·mach_msg_trap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVL	16(SP), SI
-	MOVL	20(SP), DX
-	MOVL	24(SP), R10
-	MOVL	28(SP), R8
-	MOVL	32(SP), R9
-	MOVL	36(SP), R11
+	MOVQ	h+0(FP), DI
+	MOVL	op+8(FP), SI
+	MOVL	send_size+12(FP), DX
+	MOVL	rcv_size+16(FP), R10
+	MOVL	rcv_name+20(FP), R8
+	MOVL	timeout+24(FP), R9
+	MOVL	notify+28(FP), R11
 	PUSHQ	R11	// seventh arg, on stack
 	MOVL	$(0x1000000+31), AX	// mach_msg_trap
 	SYSCALL
 	POPQ	R11
+	MOVL	AX, ret+32(FP)
 	RET
 
 TEXT runtime·mach_task_self(SB),NOSPLIT,$0
 	MOVL	$(0x1000000+28), AX	// task_self_trap
 	SYSCALL
+	MOVL	AX, ret+0(FP)
 	RET
 
 TEXT runtime·mach_thread_self(SB),NOSPLIT,$0
 	MOVL	$(0x1000000+27), AX	// thread_self_trap
 	SYSCALL
+	MOVL	AX, ret+0(FP)
 	RET
 
 TEXT runtime·mach_reply_port(SB),NOSPLIT,$0
 	MOVL	$(0x1000000+26), AX	// mach_reply_port
 	SYSCALL
+	MOVL	AX, ret+0(FP)
 	RET
 
 // Mach provides trap versions of the semaphore ops,
@@ -389,32 +406,36 @@
 
 // uint32 mach_semaphore_wait(uint32)
 TEXT runtime·mach_semaphore_wait(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
+	MOVL	sema+0(FP), DI
 	MOVL	$(0x1000000+36), AX	// semaphore_wait_trap
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 // uint32 mach_semaphore_timedwait(uint32, uint32, uint32)
 TEXT runtime·mach_semaphore_timedwait(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVL	12(SP), SI
-	MOVL	16(SP), DX
+	MOVL	sema+0(FP), DI
+	MOVL	sec+4(FP), SI
+	MOVL	nsec+8(FP), DX
 	MOVL	$(0x1000000+38), AX	// semaphore_timedwait_trap
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 // uint32 mach_semaphore_signal(uint32)
 TEXT runtime·mach_semaphore_signal(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
+	MOVL	sema+0(FP), DI
 	MOVL	$(0x1000000+33), AX	// semaphore_signal_trap
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 // uint32 mach_semaphore_signal_all(uint32)
 TEXT runtime·mach_semaphore_signal_all(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
+	MOVL	sema+0(FP), DI
 	MOVL	$(0x1000000+34), AX	// semaphore_signal_all_trap
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 // set tls base to DI
@@ -431,18 +452,20 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVL	16(SP), SI
-	MOVQ	24(SP), DX
-	MOVQ	32(SP), R10
-	MOVQ	40(SP), R8
-	MOVQ	48(SP), R9
+	MOVQ	mib+0(FP), DI
+	MOVL	miblen+8(FP), SI
+	MOVQ	out+16(FP), DX
+	MOVQ	size+24(FP), R10
+	MOVQ	dst+32(FP), R8
+	MOVQ	ndst+40(FP), R9
 	MOVL	$(0x2000000+202), AX	// syscall entry
 	SYSCALL
-	JCC 3(PC)
+	JCC 4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // int32 runtime·kqueue(void);
@@ -454,25 +477,27 @@
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
 TEXT runtime·kevent(SB),NOSPLIT,$0
-	MOVL    8(SP), DI
-	MOVQ    16(SP), SI
-	MOVL    24(SP), DX
-	MOVQ    32(SP), R10
-	MOVL    40(SP), R8
-	MOVQ    48(SP), R9
+	MOVL    fd+0(FP), DI
+	MOVQ    ev1+8(FP), SI
+	MOVL    nev1+16(FP), DX
+	MOVQ    ev2+24(FP), R10
+	MOVL    nev2+32(FP), R8
+	MOVQ    ts+40(FP), R9
 	MOVL	$(0x2000000+363), AX
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd);
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
-	MOVL    8(SP), DI  // fd
+	MOVL    fd+0(FP), DI  // fd
 	MOVQ    $2, SI  // F_SETFD
 	MOVQ    $1, DX  // FD_CLOEXEC
 	MOVL	$(0x2000000+92), AX  // fcntl
diff --git a/src/pkg/runtime/sys_dragonfly_386.s b/src/pkg/runtime/sys_dragonfly_386.s
index 0b8d219..bd8c8d8 100644
--- a/src/pkg/runtime/sys_dragonfly_386.s
+++ b/src/pkg/runtime/sys_dragonfly_386.s
@@ -14,6 +14,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·sys_umtx_wakeup(SB),NOSPLIT,$-4
@@ -21,11 +22,13 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·lwp_create(SB),NOSPLIT,$-4
 	MOVL	$495, AX		// lwp_create
 	INT	$0x80
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·lwp_start(SB),NOSPLIT,$0
@@ -81,26 +84,31 @@
 TEXT runtime·open(SB),NOSPLIT,$-4
 	MOVL	$5, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-4
 	MOVL	$6, AX
 	INT	$0x80
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-4
 	MOVL	$3, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-4
 	MOVL	$4, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$-4
 	MOVL	$194, AX
 	INT	$0x80
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$16
@@ -116,7 +124,7 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$36
-	LEAL	arg0+0(FP), SI
+	LEAL	addr+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - addr
@@ -131,6 +139,7 @@
 	STOSL
 	MOVL	$197, AX		// sys_mmap
 	INT	$0x80
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$-4
@@ -185,9 +194,8 @@
 	ADDL	BX, AX
 	ADCL	$0, DX
 
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 
@@ -302,7 +310,7 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$28
-	LEAL	arg0+0(FP), SI
+	LEAL	mib+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - name
@@ -313,10 +321,12 @@
 	MOVSL				// arg 6 - newlen
 	MOVL	$202, AX		// sys___sysctl
 	INT	$0x80
-	JCC	3(PC)
+	JCC	4(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$-4
@@ -327,9 +337,9 @@
 TEXT runtime·sigprocmask(SB),NOSPLIT,$16
 	MOVL	$0, 0(SP)		// syscall gap
 	MOVL	$3, 4(SP)		// arg 1 - how (SIG_SETMASK)
-	MOVL	args+0(FP), AX
+	MOVL	new+0(FP), AX
 	MOVL	AX, 8(SP)		// arg 2 - set
-	MOVL	args+4(FP), AX
+	MOVL	old+4(FP), AX
 	MOVL	AX, 12(SP)		// arg 3 - oset
 	MOVL	$340, AX		// sys_sigprocmask
 	INT	$0x80
@@ -343,6 +353,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
@@ -351,6 +362,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·closeonexec(int32 fd);
diff --git a/src/pkg/runtime/sys_dragonfly_amd64.s b/src/pkg/runtime/sys_dragonfly_amd64.s
index 25d2be3..1c279df 100644
--- a/src/pkg/runtime/sys_dragonfly_amd64.s
+++ b/src/pkg/runtime/sys_dragonfly_amd64.s
@@ -10,28 +10,31 @@
 #include "../../cmd/ld/textflag.h"
 	
 TEXT runtime·sys_umtx_sleep(SB),NOSPLIT,$0
-	MOVQ 8(SP), DI		// arg 1 - ptr
-	MOVL 16(SP), SI		// arg 2 - value
-	MOVL 20(SP), DX		// arg 3 - timeout
+	MOVQ addr+0(FP), DI		// arg 1 - ptr
+	MOVL val+8(FP), SI		// arg 2 - value
+	MOVL timeout+12(FP), DX		// arg 3 - timeout
 	MOVL $469, AX		// umtx_sleep
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·sys_umtx_wakeup(SB),NOSPLIT,$0
-	MOVQ 8(SP), DI		// arg 1 - ptr
-	MOVL 16(SP), SI		// arg 2 - count
+	MOVQ addr+0(FP), DI		// arg 1 - ptr
+	MOVL val+8(FP), SI		// arg 2 - count
 	MOVL $470, AX		// umtx_wakeup
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·lwp_create(SB),NOSPLIT,$0
-	MOVQ 8(SP), DI		// arg 1 - params
+	MOVQ param+0(FP), DI		// arg 1 - params
 	MOVL $495, AX		// lwp_create
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·lwp_start(SB),NOSPLIT,$0
@@ -54,54 +57,59 @@
 
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 exit status
+	MOVL	code+0(FP), DI		// arg 1 exit status
 	MOVL	$1, AX
 	SYSCALL
 	MOVL	$0xf1, 0xf1  // crash
 	RET
 
 TEXT runtime·exit1(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 exit status
+	MOVL	code+0(FP), DI		// arg 1 exit status
 	MOVL	$431, AX
 	SYSCALL
 	MOVL	$0xf1, 0xf1  // crash
 	RET
 
 TEXT runtime·open(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 pathname
-	MOVL	16(SP), SI		// arg 2 flags
-	MOVL	20(SP), DX		// arg 3 mode
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
 	MOVL	$5, AX
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
+	MOVL	fd+0(FP), DI		// arg 1 fd
 	MOVL	$6, AX
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$3, AX
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVQ	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$4, AX
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
+	MOVL	kind+0(FP), DI
+	MOVQ	limit+8(FP), SI
 	MOVL	$194, AX
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$16
@@ -115,9 +123,9 @@
 	RET
 
 TEXT runtime·setitimer(SB), NOSPLIT, $-8
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
 	MOVL	$83, AX
 	SYSCALL
 	RET
@@ -148,12 +156,13 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·sigaction(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 sig
-	MOVQ	16(SP), SI		// arg 2 act
-	MOVQ	24(SP), DX		// arg 3 oact
+	MOVL	sig+0(FP), DI		// arg 1 sig
+	MOVQ	new+8(FP), SI		// arg 2 act
+	MOVQ	old+16(FP), DX		// arg 3 oact
 	MOVL	$342, AX
 	SYSCALL
 	JCC	2(PC)
@@ -194,23 +203,24 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - addr
-	MOVQ	16(SP), SI		// arg 2 - len
-	MOVL	24(SP), DX		// arg 3 - prot
-	MOVL	28(SP), R10		// arg 4 - flags
-	MOVL	32(SP), R8		// arg 5 - fd
-	MOVL	36(SP), R9
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	prot+16(FP), DX		// arg 3 - prot
+	MOVL	flags+20(FP), R10		// arg 4 - flags
+	MOVL	fd+24(FP), R8		// arg 5 - fd
+	MOVL	off+28(FP), R9
 	SUBQ	$16, SP
 	MOVQ	R9, 8(SP)		// arg 7 - offset (passed on stack)
 	MOVQ	$0, R9			// arg 6 - pad
 	MOVL	$197, AX
 	SYSCALL
 	ADDQ	$16, SP
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 addr
-	MOVQ	16(SP), SI		// arg 2 len
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
 	MOVL	$73, AX
 	SYSCALL
 	JCC	2(PC)
@@ -218,9 +228,9 @@
 	RET
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	flags+16(FP), DX
 	MOVQ	$75, AX	// madvise
 	SYSCALL
 	// ignore failure - maybe pages are locked
@@ -266,18 +276,20 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - name
-	MOVL	16(SP), SI		// arg 2 - namelen
-	MOVQ	24(SP), DX		// arg 3 - oldp
-	MOVQ	32(SP), R10		// arg 4 - oldlenp
-	MOVQ	40(SP), R8		// arg 5 - newp
-	MOVQ	48(SP), R9		// arg 6 - newlen
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
 	MOVQ	$202, AX		// sys___sysctl
 	SYSCALL
-	JCC 3(PC)
+	JCC 4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$-4
@@ -287,8 +299,8 @@
 
 TEXT runtime·sigprocmask(SB),NOSPLIT,$0
 	MOVL	$3, DI			// arg 1 - how (SIG_SETMASK)
-	MOVQ	8(SP), SI		// arg 2 - set
-	MOVQ	16(SP), DX		// arg 3 - oset
+	MOVQ	new+0(FP), SI		// arg 2 - set
+	MOVQ	old+8(FP), DX		// arg 3 - oset
 	MOVL	$340, AX		// sys_sigprocmask
 	SYSCALL
 	JAE	2(PC)
@@ -304,25 +316,27 @@
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
 TEXT runtime·kevent(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
-	MOVQ	32(SP), R10
-	MOVL	40(SP), R8
-	MOVQ	48(SP), R9
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
 	MOVL	$363, AX
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd);
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
-	MOVL	8(SP), DI	// fd
+	MOVL	fd+0(FP), DI	// fd
 	MOVQ	$2, SI		// F_SETFD
 	MOVQ	$1, DX		// FD_CLOEXEC
 	MOVL	$92, AX		// fcntl
diff --git a/src/pkg/runtime/sys_freebsd_386.s b/src/pkg/runtime/sys_freebsd_386.s
index d2ce25f..929572f 100644
--- a/src/pkg/runtime/sys_freebsd_386.s
+++ b/src/pkg/runtime/sys_freebsd_386.s
@@ -12,6 +12,7 @@
 TEXT runtime·sys_umtx_op(SB),NOSPLIT,$-4
 	MOVL	$454, AX
 	INT	$0x80
+	MOVL	AX, ret+20(FP)
 	RET
 
 TEXT runtime·thr_new(SB),NOSPLIT,$-4
@@ -60,26 +61,31 @@
 TEXT runtime·open(SB),NOSPLIT,$-4
 	MOVL	$5, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-4
 	MOVL	$6, AX
 	INT	$0x80
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-4
 	MOVL	$3, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-4
 	MOVL	$4, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$-4
 	MOVL	$194, AX
 	INT	$0x80
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$16
@@ -98,7 +104,7 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$32
-	LEAL arg0+0(FP), SI
+	LEAL addr+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL
@@ -111,6 +117,7 @@
 	STOSL
 	MOVL	$477, AX
 	INT	$0x80
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$-4
@@ -167,9 +174,8 @@
 	ADDL	BX, AX
 	ADCL	$0, DX
 
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 
@@ -314,7 +320,7 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$28
-	LEAL	arg0+0(FP), SI
+	LEAL	mib+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - name
@@ -325,10 +331,12 @@
 	MOVSL				// arg 6 - newlen
 	MOVL	$202, AX		// sys___sysctl
 	INT	$0x80
-	JAE	3(PC)
+	JAE	4(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$-4
@@ -339,9 +347,9 @@
 TEXT runtime·sigprocmask(SB),NOSPLIT,$16
 	MOVL	$0, 0(SP)		// syscall gap
 	MOVL	$3, 4(SP)		// arg 1 - how (SIG_SETMASK)
-	MOVL	args+0(FP), AX
+	MOVL	new+0(FP), AX
 	MOVL	AX, 8(SP)		// arg 2 - set
-	MOVL	args+4(FP), AX
+	MOVL	old+4(FP), AX
 	MOVL	AX, 12(SP)		// arg 3 - oset
 	MOVL	$340, AX		// sys_sigprocmask
 	INT	$0x80
@@ -355,6 +363,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
@@ -363,6 +372,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·closeonexec(int32 fd);
diff --git a/src/pkg/runtime/sys_freebsd_amd64.s b/src/pkg/runtime/sys_freebsd_amd64.s
index 2c6e335..f119854 100644
--- a/src/pkg/runtime/sys_freebsd_amd64.s
+++ b/src/pkg/runtime/sys_freebsd_amd64.s
@@ -35,18 +35,19 @@
 #define SYSCALL MOVQ R10, CX; INT $0x80
 	
 TEXT runtime·sys_umtx_op(SB),NOSPLIT,$0
-	MOVQ 8(SP), DI
-	MOVL 16(SP), SI
-	MOVL 20(SP), DX
-	MOVQ 24(SP), R10
-	MOVQ 32(SP), R8
+	MOVQ addr+0(FP), DI
+	MOVL mode+8(FP), SI
+	MOVL val+12(FP), DX
+	MOVQ ptr2+16(FP), R10
+	MOVQ ts+24(FP), R8
 	MOVL $454, AX
 	SYSCALL
+	MOVL	AX, ret+32(FP)
 	RET
 
 TEXT runtime·thr_new(SB),NOSPLIT,$0
-	MOVQ 8(SP), DI
-	MOVQ 16(SP), SI
+	MOVQ param+0(FP), DI
+	MOVL size+8(FP), SI
 	MOVL $455, AX
 	SYSCALL
 	RET
@@ -71,54 +72,59 @@
 
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 exit status
+	MOVL	code+0(FP), DI		// arg 1 exit status
 	MOVL	$1, AX
 	SYSCALL
 	MOVL	$0xf1, 0xf1  // crash
 	RET
 
 TEXT runtime·exit1(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 exit status
+	MOVL	code+0(FP), DI		// arg 1 exit status
 	MOVL	$431, AX
 	SYSCALL
 	MOVL	$0xf1, 0xf1  // crash
 	RET
 
 TEXT runtime·open(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 pathname
-	MOVL	16(SP), SI		// arg 2 flags
-	MOVL	20(SP), DX		// arg 3 mode
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
 	MOVL	$5, AX
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
+	MOVL	fd+0(FP), DI		// arg 1 fd
 	MOVL	$6, AX
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$3, AX
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVQ	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$4, AX
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
+	MOVL	kind+0(FP), DI
+	MOVQ	limit+8(FP), SI
 	MOVL	$194, AX
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$16
@@ -134,9 +140,9 @@
 	RET
 
 TEXT runtime·setitimer(SB), NOSPLIT, $-8
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
 	MOVL	$83, AX
 	SYSCALL
 	RET
@@ -169,12 +175,13 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·sigaction(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 sig
-	MOVQ	16(SP), SI		// arg 2 act
-	MOVQ	24(SP), DX		// arg 3 oact
+	MOVL	sig+0(FP), DI		// arg 1 sig
+	MOVQ	new+8(FP), SI		// arg 2 act
+	MOVQ	old+16(FP), DX		// arg 3 oact
 	MOVL	$416, AX
 	SYSCALL
 	JCC	2(PC)
@@ -215,19 +222,20 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 addr
-	MOVQ	16(SP), SI		// arg 2 len
-	MOVL	24(SP), DX		// arg 3 prot
-	MOVL	28(SP), R10		// arg 4 flags
-	MOVL	32(SP), R8		// arg 5 fid
-	MOVL	36(SP), R9		// arg 6 offset
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
+	MOVL	prot+16(FP), DX		// arg 3 prot
+	MOVL	flags+20(FP), R10		// arg 4 flags
+	MOVL	fd+24(FP), R8		// arg 5 fid
+	MOVL	off+28(FP), R9		// arg 6 offset
 	MOVL	$477, AX
 	SYSCALL
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 addr
-	MOVQ	16(SP), SI		// arg 2 len
+	MOVQ	addr+0(FP), DI		// arg 1 addr
+	MOVQ	n+8(FP), SI		// arg 2 len
 	MOVL	$73, AX
 	SYSCALL
 	JCC	2(PC)
@@ -235,9 +243,9 @@
 	RET
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	flags+16(FP), DX
 	MOVQ	$75, AX	// madvise
 	SYSCALL
 	// ignore failure - maybe pages are locked
@@ -281,18 +289,20 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - name
-	MOVL	16(SP), SI		// arg 2 - namelen
-	MOVQ	24(SP), DX		// arg 3 - oldp
-	MOVQ	32(SP), R10		// arg 4 - oldlenp
-	MOVQ	40(SP), R8		// arg 5 - newp
-	MOVQ	48(SP), R9		// arg 6 - newlen
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
 	MOVQ	$202, AX		// sys___sysctl
 	SYSCALL
-	JCC 3(PC)
+	JCC 4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$-4
@@ -302,8 +312,8 @@
 
 TEXT runtime·sigprocmask(SB),NOSPLIT,$0
 	MOVL	$3, DI			// arg 1 - how (SIG_SETMASK)
-	MOVQ	8(SP), SI		// arg 2 - set
-	MOVQ	16(SP), DX		// arg 3 - oset
+	MOVQ	new+0(FP), SI		// arg 2 - set
+	MOVQ	old+8(FP), DX		// arg 3 - oset
 	MOVL	$340, AX		// sys_sigprocmask
 	SYSCALL
 	JAE	2(PC)
@@ -319,25 +329,27 @@
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
 TEXT runtime·kevent(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
-	MOVQ	32(SP), R10
-	MOVL	40(SP), R8
-	MOVQ	48(SP), R9
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
 	MOVL	$363, AX
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd);
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
-	MOVL	8(SP), DI	// fd
+	MOVL	fd+0(FP), DI	// fd
 	MOVQ	$2, SI		// F_SETFD
 	MOVQ	$1, DX		// FD_CLOEXEC
 	MOVL	$92, AX		// fcntl
diff --git a/src/pkg/runtime/sys_freebsd_arm.s b/src/pkg/runtime/sys_freebsd_arm.s
index dbb2583..da43871 100644
--- a/src/pkg/runtime/sys_freebsd_arm.s
+++ b/src/pkg/runtime/sys_freebsd_arm.s
@@ -48,6 +48,7 @@
 	SWI $0
 	SUB $20, R13
 	// BCS error
+	MOVW	R0, ret+20(FP)
 	RET
 
 TEXT runtime·thr_new(SB),NOSPLIT,$0
@@ -91,6 +92,7 @@
 	MOVW 8(FP), R2	// arg 3 perm
 	MOVW $SYS_open, R7
 	SWI $0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-8
@@ -99,6 +101,7 @@
 	MOVW 8(FP), R2	// arg 3 count
 	MOVW $SYS_read, R7
 	SWI $0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-8
@@ -107,12 +110,14 @@
 	MOVW 8(FP), R2	// arg 3 count
 	MOVW $SYS_write, R7
 	SWI $0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-8
 	MOVW 0(FP), R0	// arg 1 fd
 	MOVW $SYS_close, R7
 	SWI $0
+	MOVW	R0, ret+4(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$-8
@@ -120,6 +125,7 @@
 	MOVW 4(FP), R1
 	MOVW $SYS_getrlimit, R7
 	SWI $0
+	MOVW	R0, ret+8(FP)
 	RET
 
 TEXT runtime·raise(SB),NOSPLIT,$8
@@ -178,9 +184,8 @@
 	ADD.S R2, R0
 	ADC R4, R1
 
-	MOVW 0(FP), R3
-	MOVW R0, 0(R3)
-	MOVW R1, 4(R3)
+	MOVW R0, ret_lo+0(FP)
+	MOVW R1, ret_hi+4(FP)
 	RET
 
 TEXT runtime·sigaction(SB),NOSPLIT,$-8
@@ -247,6 +252,7 @@
 	SWI $0
 	SUB $4, R13
 	// TODO(dfc) error checking ?
+	MOVW	R0, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -307,6 +313,7 @@
 	SWI $0
 	SUB.CS $0, R0, R0
 	SUB $20, R13
+	MOVW	R0, ret+24(FP)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$-4
@@ -329,6 +336,7 @@
 	MOVW $SYS_kqueue, R7
 	SWI $0
 	RSB.CS $0, R0
+	MOVW	R0, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
@@ -342,6 +350,7 @@
 	SWI $0
 	RSB.CS $0, R0
 	SUB $20, R13
+	MOVW	R0, ret+24(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd)
diff --git a/src/pkg/runtime/sys_linux_386.s b/src/pkg/runtime/sys_linux_386.s
index 3a8371c..ace5a18 100644
--- a/src/pkg/runtime/sys_linux_386.s
+++ b/src/pkg/runtime/sys_linux_386.s
@@ -11,53 +11,58 @@
 
 TEXT runtime·exit(SB),NOSPLIT,$0
 	MOVL	$252, AX	// syscall number
-	MOVL	4(SP), BX
+	MOVL	code+0(FP), BX
 	CALL	*runtime·_vdso(SB)
 	INT $3	// not reached
 	RET
 
 TEXT runtime·exit1(SB),NOSPLIT,$0
 	MOVL	$1, AX	// exit - exit the current os thread
-	MOVL	4(SP), BX
+	MOVL	code+0(FP), BX
 	CALL	*runtime·_vdso(SB)
 	INT $3	// not reached
 	RET
 
 TEXT runtime·open(SB),NOSPLIT,$0
 	MOVL	$5, AX		// syscall - open
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	name+0(FP), BX
+	MOVL	mode+4(FP), CX
+	MOVL	perm+8(FP), DX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$0
 	MOVL	$6, AX		// syscall - close
-	MOVL	4(SP), BX
+	MOVL	fd+0(FP), BX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$0
 	MOVL	$4, AX		// syscall - write
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	fd+0(FP), BX
+	MOVL	p+4(FP), CX
+	MOVL	n+8(FP), DX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$0
 	MOVL	$3, AX		// syscall - read
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	fd+0(FP), BX
+	MOVL	p+4(FP), CX
+	MOVL	n+8(FP), DX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$0
 	MOVL	$191, AX		// syscall - ugetrlimit
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
+	MOVL	kind+0(FP), BX
+	MOVL	limit+4(FP), CX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$8
@@ -87,20 +92,21 @@
 	CALL	*runtime·_vdso(SB)
 	RET
 
-TEXT runtime·setitimer(SB),NOSPLIT,$0-24
+TEXT runtime·setitimer(SB),NOSPLIT,$0-12
 	MOVL	$104, AX			// syscall - setitimer
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	mode+0(FP), BX
+	MOVL	new+4(FP), CX
+	MOVL	old+8(FP), DX
 	CALL	*runtime·_vdso(SB)
 	RET
 
-TEXT runtime·mincore(SB),NOSPLIT,$0-24
+TEXT runtime·mincore(SB),NOSPLIT,$0-16
 	MOVL	$218, AX			// syscall - mincore
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	MOVL	dst+8(FP), DX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
 	RET
 
 // func now() (sec int64, nsec int32)
@@ -137,17 +143,16 @@
 	ADDL	BX, AX
 	ADCL	$0, DX
 
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0
 	MOVL	$175, AX		// syscall entry
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
-	MOVL	16(SP), SI
+	MOVL	sig+0(FP), BX
+	MOVL	new+4(FP), CX
+	MOVL	old+8(FP), DX
+	MOVL	size+12(FP), SI
 	CALL	*runtime·_vdso(SB)
 	CMPL	AX, $0xfffff001
 	JLS	2(PC)
@@ -156,11 +161,12 @@
 
 TEXT runtime·rt_sigaction(SB),NOSPLIT,$0
 	MOVL	$174, AX		// syscall - rt_sigaction
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
-	MOVL	16(SP), SI
+	MOVL	sig+0(FP), BX
+	MOVL	new+4(FP), CX
+	MOVL	old+8(FP), DX
+	MOVL	size+12(FP), SI
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·sigtramp(SB),NOSPLIT,$44
@@ -212,24 +218,25 @@
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
 	MOVL	$192, AX	// mmap2
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
-	MOVL	16(SP), SI
-	MOVL	20(SP), DI
-	MOVL	24(SP), BP
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	MOVL	prot+8(FP), DX
+	MOVL	flags+12(FP), SI
+	MOVL	fd+16(FP), DI
+	MOVL	off+20(FP), BP
 	SHRL	$12, BP
 	CALL	*runtime·_vdso(SB)
 	CMPL	AX, $0xfffff001
 	JLS	3(PC)
 	NOTL	AX
 	INCL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
 	MOVL	$91, AX	// munmap
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
 	CALL	*runtime·_vdso(SB)
 	CMPL	AX, $0xfffff001
 	JLS	2(PC)
@@ -238,9 +245,9 @@
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
 	MOVL	$219, AX	// madvise
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	addr+0(FP), BX
+	MOVL	n+4(FP), CX
+	MOVL	flags+8(FP), DX
 	CALL	*runtime·_vdso(SB)
 	// ignore failure - maybe pages are locked
 	RET
@@ -249,13 +256,14 @@
 //	struct timespec *timeout, int32 *uaddr2, int32 val2);
 TEXT runtime·futex(SB),NOSPLIT,$0
 	MOVL	$240, AX	// futex
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
-	MOVL	16(SP), SI
-	MOVL	20(SP), DI
-	MOVL	24(SP), BP
+	MOVL	addr+0(FP), BX
+	MOVL	op+4(FP), CX
+	MOVL	val+8(FP), DX
+	MOVL	ts+12(FP), SI
+	MOVL	addr2+16(FP), DI
+	MOVL	val3+20(FP), BP
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
@@ -284,11 +292,12 @@
 
 	// In parent, return.
 	CMPL	AX, $0
-	JEQ	2(PC)
+	JEQ	3(PC)
+	MOVL	AX, ret+20(FP)
 	RET
 
 	// Paranoia: check that SP is as we expect.
-	MOVL	12(SP), BP
+	MOVL	mm+8(FP), BP
 	CMPL	BP, $1234
 	JEQ	2(PC)
 	INT	$3
@@ -299,8 +308,8 @@
 
 	// In child on new stack.  Reload registers (paranoia).
 	MOVL	0(SP), BX	// m
-	MOVL	4(SP), DX	// g
-	MOVL	8(SP), SI	// fn
+	MOVL	flags+0(FP), DX	// g
+	MOVL	stk+4(FP), SI	// fn
 
 	MOVL	AX, m_procid(BX)	// save tid as m->procid
 
@@ -337,7 +346,6 @@
 	CALL	SI	// fn()
 	CALL	runtime·exit1(SB)
 	MOVL	$0x1234, 0x1005
-	RET
 
 TEXT runtime·sigaltstack(SB),NOSPLIT,$-8
 	MOVL	$186, AX	// sigaltstack
@@ -426,50 +434,55 @@
 
 TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
 	MOVL	$242, AX		// syscall - sched_getaffinity
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
+	MOVL	pid+0(FP), BX
+	MOVL	len+4(FP), CX
+	MOVL	buf+8(FP), DX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+12(FP)
 	RET
 
 // int32 runtime·epollcreate(int32 size);
 TEXT runtime·epollcreate(SB),NOSPLIT,$0
 	MOVL    $254, AX
-	MOVL	4(SP), BX
+	MOVL	size+0(FP), BX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+4(FP)
 	RET
 
 // int32 runtime·epollcreate1(int32 flags);
 TEXT runtime·epollcreate1(SB),NOSPLIT,$0
 	MOVL    $329, AX
-	MOVL	4(SP), BX
+	MOVL	flags+0(FP), BX
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+4(FP)
 	RET
 
 // int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
 TEXT runtime·epollctl(SB),NOSPLIT,$0
 	MOVL	$255, AX
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
-	MOVL	16(SP), SI
+	MOVL	epfd+0(FP), BX
+	MOVL	op+4(FP), CX
+	MOVL	fd+8(FP), DX
+	MOVL	ev+12(FP), SI
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+16(FP)
 	RET
 
 // int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
 TEXT runtime·epollwait(SB),NOSPLIT,$0
 	MOVL	$256, AX
-	MOVL	4(SP), BX
-	MOVL	8(SP), CX
-	MOVL	12(SP), DX
-	MOVL	16(SP), SI
+	MOVL	epfd+0(FP), BX
+	MOVL	ev+4(FP), CX
+	MOVL	nev+8(FP), DX
+	MOVL	timeout+12(FP), SI
 	CALL	*runtime·_vdso(SB)
+	MOVL	AX, ret+16(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd);
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
 	MOVL	$55, AX  // fcntl
-	MOVL	4(SP), BX  // fd
+	MOVL	fd+0(FP), BX  // fd
 	MOVL	$2, CX  // F_SETFD
 	MOVL	$1, DX  // FD_CLOEXEC
 	CALL	*runtime·_vdso(SB)
diff --git a/src/pkg/runtime/sys_linux_amd64.s b/src/pkg/runtime/sys_linux_amd64.s
index c402c86..f263ef3 100644
--- a/src/pkg/runtime/sys_linux_amd64.s
+++ b/src/pkg/runtime/sys_linux_amd64.s
@@ -9,53 +9,58 @@
 #include "zasm_GOOS_GOARCH.h"
 #include "../../cmd/ld/textflag.h"
 
-TEXT runtime·exit(SB),NOSPLIT,$0-8
-	MOVL	8(SP), DI
+TEXT runtime·exit(SB),NOSPLIT,$0-4
+	MOVL	code+0(FP), DI
 	MOVL	$231, AX	// exitgroup - force all os threads to exit
 	SYSCALL
 	RET
 
-TEXT runtime·exit1(SB),NOSPLIT,$0-8
-	MOVL	8(SP), DI
+TEXT runtime·exit1(SB),NOSPLIT,$0-4
+	MOVL	code+0(FP), DI
 	MOVL	$60, AX	// exit - exit the current os thread
 	SYSCALL
 	RET
 
-TEXT runtime·open(SB),NOSPLIT,$0-16
-	MOVQ	8(SP), DI
-	MOVL	16(SP), SI
-	MOVL	20(SP), DX
+TEXT runtime·open(SB),NOSPLIT,$0-20
+	MOVQ	name+0(FP), DI
+	MOVL	mode+8(FP), SI
+	MOVL	perm+12(FP), DX
 	MOVL	$2, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
-TEXT runtime·close(SB),NOSPLIT,$0-16
-	MOVL	8(SP), DI
+TEXT runtime·close(SB),NOSPLIT,$0-12
+	MOVL	fd+0(FP), DI
 	MOVL	$3, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·write(SB),NOSPLIT,$0-24
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
+TEXT runtime·write(SB),NOSPLIT,$0-28
+	MOVQ	fd+0(FP), DI
+	MOVQ	p+8(FP), SI
+	MOVL	n+16(FP), DX
 	MOVL	$1, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
-TEXT runtime·read(SB),NOSPLIT,$0-24
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
+TEXT runtime·read(SB),NOSPLIT,$0-28
+	MOVL	fd+0(FP), DI
+	MOVQ	p+8(FP), SI
+	MOVL	n+16(FP), DX
 	MOVL	$0, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
-TEXT runtime·getrlimit(SB),NOSPLIT,$0-24
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
+TEXT runtime·getrlimit(SB),NOSPLIT,$0-20
+	MOVL	kind+0(FP), DI
+	MOVQ	limit+8(FP), SI
 	MOVL	$97, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$16
@@ -86,19 +91,20 @@
 	RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$0-24
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVL	mode+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
 	MOVL	$38, AX			// syscall entry
 	SYSCALL
 	RET
 
-TEXT runtime·mincore(SB),NOSPLIT,$0-24
-	MOVQ	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+TEXT runtime·mincore(SB),NOSPLIT,$0-28
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVQ	dst+16(FP), DX
 	MOVL	$27, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 // func now() (sec int64, nsec int32)
@@ -145,6 +151,7 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 fallback_gtod_nt:
 	LEAQ	0(SP), DI
@@ -158,13 +165,14 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
-TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0-32
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
-	MOVL	32(SP), R10
+TEXT runtime·rtsigprocmask(SB),NOSPLIT,$0-28
+	MOVL	sig+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVL	size+24(FP), R10
 	MOVL	$14, AX			// syscall entry
 	SYSCALL
 	CMPQ	AX, $0xfffffffffffff001
@@ -172,13 +180,14 @@
 	MOVL	$0xf1, 0xf1  // crash
 	RET
 
-TEXT runtime·rt_sigaction(SB),NOSPLIT,$0-32
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
-	MOVQ	32(SP), R10
+TEXT runtime·rt_sigaction(SB),NOSPLIT,$0-36
+	MOVQ	sig+0(FP), DI
+	MOVQ	new+8(FP), SI
+	MOVQ	old+16(FP), DX
+	MOVQ	size+24(FP), R10
 	MOVL	$13, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+32(FP)
 	RET
 
 TEXT runtime·sigtramp(SB),NOSPLIT,$64
@@ -220,12 +229,12 @@
 	INT $3	// not reached
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
-	MOVL	28(SP), R10
-	MOVL	32(SP), R8
-	MOVL	36(SP), R9
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	prot+16(FP), DX
+	MOVL	flags+20(FP), R10
+	MOVL	fd+24(FP), R8
+	MOVL	off+28(FP), R9
 
 	MOVL	$9, AX			// mmap
 	SYSCALL
@@ -233,11 +242,12 @@
 	JLS	3(PC)
 	NOTQ	AX
 	INCQ	AX
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVQ	16(SP), SI
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
 	MOVQ	$11, AX	// munmap
 	SYSCALL
 	CMPQ	AX, $0xfffffffffffff001
@@ -246,9 +256,9 @@
 	RET
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVQ	addr+0(FP), DI
+	MOVQ	n+8(FP), SI
+	MOVL	flags+16(FP), DX
 	MOVQ	$28, AX	// madvise
 	SYSCALL
 	// ignore failure - maybe pages are locked
@@ -257,17 +267,18 @@
 // int64 futex(int32 *uaddr, int32 op, int32 val,
 //	struct timespec *timeout, int32 *uaddr2, int32 val2);
 TEXT runtime·futex(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVL	16(SP), SI
-	MOVL	20(SP), DX
-	MOVQ	24(SP), R10
-	MOVQ	32(SP), R8
-	MOVL	40(SP), R9
+	MOVQ	addr+0(FP), DI
+	MOVL	op+8(FP), SI
+	MOVL	val+12(FP), DX
+	MOVQ	ts+16(FP), R10
+	MOVQ	addr2+24(FP), R8
+	MOVL	val3+32(FP), R9
 	MOVL	$202, AX
 	SYSCALL
+	MOVL	AX, ret+40(FP)
 	RET
 
-// int64 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
+// int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
 TEXT runtime·clone(SB),NOSPLIT,$0
 	MOVL	flags+8(SP), DI
 	MOVQ	stack+16(SP), SI
@@ -283,7 +294,8 @@
 
 	// In parent, return.
 	CMPQ	AX, $0
-	JEQ	2(PC)
+	JEQ	3(PC)
+	MOVL	AX, ret+40(FP)
 	RET
 
 	// In child, on new stack.
@@ -342,50 +354,55 @@
 	RET
 
 TEXT runtime·sched_getaffinity(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI
-	MOVL	16(SP), SI
-	MOVQ	24(SP), DX
+	MOVQ	pid+0(FP), DI
+	MOVQ	len+8(FP), SI
+	MOVQ	buf+16(FP), DX
 	MOVL	$204, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·epollcreate(int32 size);
 TEXT runtime·epollcreate(SB),NOSPLIT,$0
-	MOVL    8(SP), DI
+	MOVL    size+0(FP), DI
 	MOVL    $213, AX                        // syscall entry
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 // int32 runtime·epollcreate1(int32 flags);
 TEXT runtime·epollcreate1(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
+	MOVL	flags+0(FP), DI
 	MOVL	$291, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 // int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev);
 TEXT runtime·epollctl(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVL	12(SP), SI
-	MOVL	16(SP), DX
-	MOVQ	24(SP), R10
+	MOVL	epfd+0(FP), DI
+	MOVL	op+4(FP), SI
+	MOVL	fd+8(FP), DX
+	MOVQ	ev+16(FP), R10
 	MOVL	$233, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout);
 TEXT runtime·epollwait(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
-	MOVL	28(SP), R10
+	MOVL	epfd+0(FP), DI
+	MOVQ	ev+8(FP), SI
+	MOVL	nev+16(FP), DX
+	MOVL	timeout+20(FP), R10
 	MOVL	$232, AX			// syscall entry
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd);
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
-	MOVL    8(SP), DI  // fd
+	MOVL    fd+0(FP), DI  // fd
 	MOVQ    $2, SI  // F_SETFD
 	MOVQ    $1, DX  // FD_CLOEXEC
 	MOVL	$72, AX  // fcntl
diff --git a/src/pkg/runtime/sys_linux_arm.s b/src/pkg/runtime/sys_linux_arm.s
index 770b963..3221cdf 100644
--- a/src/pkg/runtime/sys_linux_arm.s
+++ b/src/pkg/runtime/sys_linux_arm.s
@@ -51,12 +51,14 @@
 	MOVW	8(FP), R2
 	MOVW	$SYS_open, R7
 	SWI	$0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$0
 	MOVW	0(FP), R0
 	MOVW	$SYS_close, R7
 	SWI	$0
+	MOVW	R0, ret+4(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$0
@@ -65,6 +67,7 @@
 	MOVW	8(FP), R2
 	MOVW	$SYS_write, R7
 	SWI	$0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$0
@@ -73,6 +76,7 @@
 	MOVW	8(FP), R2
 	MOVW	$SYS_read, R7
 	SWI	$0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·getrlimit(SB),NOSPLIT,$0
@@ -80,6 +84,7 @@
 	MOVW	4(FP), R1
 	MOVW	$SYS_ugetrlimit, R7
 	SWI	$0
+	MOVW	R0, ret+8(FP)
 	RET
 
 TEXT runtime·exit(SB),NOSPLIT,$-4
@@ -119,6 +124,7 @@
 	MOVW	$0xfffff001, R6
 	CMP		R6, R0
 	RSB.HI	$0, R0
+	MOVW	R0, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -155,6 +161,7 @@
 	MOVW	8(FP), R2
 	MOVW	$SYS_mincore, R7
 	SWI	$0
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT time·now(SB), NOSPLIT, $32
@@ -172,8 +179,7 @@
 	MOVW	R2, 8(FP)
 	RET	
 
-// int64 nanotime(void) so really
-// void nanotime(int64 *nsec)
+// int64 nanotime(void)
 TEXT runtime·nanotime(SB),NOSPLIT,$32
 	MOVW	$1, R0  // CLOCK_MONOTONIC
 	MOVW	$8(R13), R1  // timespec
@@ -189,9 +195,8 @@
 	ADD.S	R2, R0
 	ADC	R4, R1
 
-	MOVW	0(FP), R3
-	MOVW	R0, 0(R3)
-	MOVW	R1, 4(R3)
+	MOVW	R0, ret_lo+0(FP)
+	MOVW	R1, ret_hi+4(FP)
 	RET
 
 // int32 futex(int32 *uaddr, int32 op, int32 val,
@@ -205,13 +210,14 @@
 	MOVW	24(SP), R5
 	MOVW	$SYS_futex, R7
 	SWI	$0
+	MOVW	R0, ret+24(FP)
 	RET
 
 
 // int32 clone(int32 flags, void *stack, M *mp, G *gp, void (*fn)(void));
 TEXT runtime·clone(SB),NOSPLIT,$0
 	MOVW	flags+0(FP), R0
-	MOVW	stack+4(FP), R1
+	MOVW	stk+4(FP), R1
 	MOVW	$0, R2	// parent tid ptr
 	MOVW	$0, R3	// tls_val
 	MOVW	$0, R4	// child tid ptr
@@ -234,7 +240,8 @@
 
 	// In parent, return.
 	CMP	$0, R0
-	BEQ	2(PC)
+	BEQ	3(PC)
+	MOVW	R0, ret+20(FP)
 	RET
 
 	// Paranoia: check that SP is as we expect. Use R13 to avoid linker 'fixup'
@@ -338,6 +345,7 @@
 	MOVW	12(FP), R3
 	MOVW	$SYS_rt_sigaction, R7
 	SWI	$0
+	MOVW	R0, ret+16(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$12
@@ -363,22 +371,24 @@
 	MOVW	$0xffff0fc0, PC
 
 TEXT runtime·cas(SB),NOSPLIT,$0
-	MOVW	valptr+0(FP), R2
+	MOVW	ptr+0(FP), R2
 	MOVW	old+4(FP), R0
 casagain:
 	MOVW	new+8(FP), R1
 	BL	cas<>(SB)
 	BCC	cascheck
 	MOVW	$1, R0
+	MOVB	R0, ret+12(FP)
 	RET
 cascheck:
 	// Kernel lies; double-check.
-	MOVW	valptr+0(FP), R2
+	MOVW	ptr+0(FP), R2
 	MOVW	old+4(FP), R0
 	MOVW	0(R2), R3
 	CMP	R0, R3
 	BEQ	casagain
 	MOVW	$0, R0
+	MOVB	R0, ret+12(FP)
 	RET
 
 TEXT runtime·casp(SB),NOSPLIT,$0
@@ -395,6 +405,7 @@
 	MOVW	8(FP), R2
 	MOVW	$SYS_sched_getaffinity, R7
 	SWI	$0
+	MOVW	R0, ret+12(FP)
 	RET
 
 // int32 runtime·epollcreate(int32 size)
@@ -402,6 +413,7 @@
 	MOVW	0(FP), R0
 	MOVW	$SYS_epoll_create, R7
 	SWI	$0
+	MOVW	R0, ret+4(FP)
 	RET
 
 // int32 runtime·epollcreate1(int32 flags)
@@ -409,6 +421,7 @@
 	MOVW	0(FP), R0
 	MOVW	$SYS_epoll_create1, R7
 	SWI	$0
+	MOVW	R0, ret+4(FP)
 	RET
 
 // int32 runtime·epollctl(int32 epfd, int32 op, int32 fd, EpollEvent *ev)
@@ -419,6 +432,7 @@
 	MOVW	12(FP), R3
 	MOVW	$SYS_epoll_ctl, R7
 	SWI	$0
+	MOVW	R0, ret+16(FP)
 	RET
 
 // int32 runtime·epollwait(int32 epfd, EpollEvent *ev, int32 nev, int32 timeout)
@@ -429,6 +443,7 @@
 	MOVW	12(FP), R3
 	MOVW	$SYS_epoll_wait, R7
 	SWI	$0
+	MOVW	R0, ret+16(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd)
diff --git a/src/pkg/runtime/sys_nacl_386.s b/src/pkg/runtime/sys_nacl_386.s
index 50dca31..e460e8e 100644
--- a/src/pkg/runtime/sys_nacl_386.s
+++ b/src/pkg/runtime/sys_nacl_386.s
@@ -96,22 +96,23 @@
 	NACL_SYSJMP(SYS_sched_yield)
 
 TEXT runtime·mmap(SB),NOSPLIT,$32
-	MOVL	arg1+0(FP), AX
+	MOVL	addr+0(FP), AX
 	MOVL	AX, 0(SP)
-	MOVL	arg2+4(FP), AX
+	MOVL	n+4(FP), AX
 	MOVL	AX, 4(SP)
-	MOVL	arg3+8(FP), AX
+	MOVL	prot+8(FP), AX
 	MOVL	AX, 8(SP)
-	MOVL	arg4+12(FP), AX
+	MOVL	flags+12(FP), AX
 	MOVL	AX, 12(SP)
-	MOVL	arg5+16(FP), AX
+	MOVL	fd+16(FP), AX
 	MOVL	AX, 16(SP)
-	MOVL	arg6+20(FP), AX
+	MOVL	off+20(FP), AX
 	MOVL	AX, 24(SP)
 	MOVL	$0, 28(SP)
 	LEAL	24(SP), AX
 	MOVL	AX, 20(SP)
 	NACL_SYSCALL(SYS_mmap)
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT time·now(SB),NOSPLIT,$20
@@ -150,9 +151,8 @@
 	ADDL	BX, AX
 	ADCL	$0, DX
 
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 TEXT runtime·setldt(SB),NOSPLIT,$8
diff --git a/src/pkg/runtime/sys_nacl_amd64p32.s b/src/pkg/runtime/sys_nacl_amd64p32.s
index d4e32ff..213e12d 100644
--- a/src/pkg/runtime/sys_nacl_amd64p32.s
+++ b/src/pkg/runtime/sys_nacl_amd64p32.s
@@ -17,27 +17,27 @@
 	RET
 
 TEXT runtime·exit(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL code+0(FP), DI
 	NACL_SYSJMP(SYS_exit)
 
 TEXT runtime·exit1(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL code+0(FP), DI
 	NACL_SYSJMP(SYS_thread_exit)
 
 TEXT runtime·open(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
+	MOVL name+0(FP), DI
+	MOVL mode+4(FP), SI
+	MOVL perm+8(FP), DX
 	NACL_SYSJMP(SYS_open)
 
 TEXT runtime·close(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL fd+0(FP), DI
 	NACL_SYSJMP(SYS_close)
 
 TEXT runtime·read(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
+	MOVL fd+0(FP), DI
+	MOVL p+4(FP), SI
+	MOVL n+8(FP), DX
 	NACL_SYSJMP(SYS_read)
 
 TEXT syscall·naclWrite(SB), NOSPLIT, $16-20
@@ -51,13 +51,13 @@
 	MOVL AX, ret+16(FP)
 	RET
 
-TEXT runtime·write(SB),NOSPLIT,$16-12
+TEXT runtime·write(SB),NOSPLIT,$16-20
 	// If using fake time and writing to stdout or stderr,
 	// emit playback header before actual data.
 	MOVQ runtime·timens(SB), AX
 	CMPQ AX, $0
 	JEQ write
-	MOVL arg1+0(FP), DI
+	MOVL fd+0(FP), DI
 	CMPL DI, $1
 	JEQ playback
 	CMPL DI, $2
@@ -65,10 +65,11 @@
 
 write:
 	// Ordinary write.
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
+	MOVL fd+0(FP), DI
+	MOVL p+4(FP), SI
+	MOVL n+8(FP), DX
 	NACL_SYSCALL(SYS_write)
+	MOVL	AX, ret+16(FP)
 	RET
 
 	// Write with playback header.
@@ -83,7 +84,7 @@
 	MOVL $(('B'<<24) | ('P'<<16)), 0(SP)
 	BSWAPQ AX
 	MOVQ AX, 4(SP)
-	MOVL arg3+8(FP), DX
+	MOVL n+8(FP), DX
 	BSWAPL DX
 	MOVL DX, 12(SP)
 	MOVL $1, DI // standard output
@@ -93,81 +94,82 @@
 
 	// Write actual data.
 	MOVL $1, DI // standard output
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
+	MOVL p+4(FP), SI
+	MOVL n+8(FP), DX
 	NACL_SYSCALL(SYS_write)
 
 	// Unlock.
 	MOVL	$0, runtime·writelock(SB)
 
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
+	MOVL p+0(FP), DI
+	MOVL size+4(FP), SI
 	NACL_SYSJMP(SYS_exception_stack)
 
 TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
+	MOVL fn+0(FP), DI
+	MOVL arg+4(FP), SI
 	NACL_SYSJMP(SYS_exception_handler)
 
 TEXT runtime·nacl_sem_create(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL flag+0(FP), DI
 	NACL_SYSJMP(SYS_sem_create)
 
 TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL sem+0(FP), DI
 	NACL_SYSJMP(SYS_sem_wait)
 
 TEXT runtime·nacl_sem_post(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL sem+0(FP), DI
 	NACL_SYSJMP(SYS_sem_post)
 
 TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL flag+0(FP), DI
 	NACL_SYSJMP(SYS_mutex_create)
 
 TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL mutex+0(FP), DI
 	NACL_SYSJMP(SYS_mutex_lock)
 
 TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL mutex+0(FP), DI
 	NACL_SYSJMP(SYS_mutex_trylock)
 
 TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL mutex+0(FP), DI
 	NACL_SYSJMP(SYS_mutex_unlock)
 
 TEXT runtime·nacl_cond_create(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL flag+0(FP), DI
 	NACL_SYSJMP(SYS_cond_create)
 
 TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
+	MOVL cond+0(FP), DI
+	MOVL n+4(FP), SI
 	NACL_SYSJMP(SYS_cond_wait)
 
 TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL cond+0(FP), DI
 	NACL_SYSJMP(SYS_cond_signal)
 
 TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
+	MOVL cond+0(FP), DI
 	NACL_SYSJMP(SYS_cond_broadcast)
 
 TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
+	MOVL cond+0(FP), DI
+	MOVL lock+4(FP), SI
+	MOVL ts+8(FP), DX
 	NACL_SYSJMP(SYS_cond_timed_wait_abs)
 
 TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
-	MOVL arg4+12(FP), CX
+	MOVL fn+0(FP), DI
+	MOVL stk+4(FP), SI
+	MOVL tls+8(FP), DX
+	MOVL xx+12(FP), CX
 	NACL_SYSJMP(SYS_thread_create)
 
 TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
@@ -177,26 +179,27 @@
 	JMP runtime·mstart(SB)
 
 TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
+	MOVL ts+0(FP), DI
+	MOVL extra+4(FP), SI
 	NACL_SYSJMP(SYS_nanosleep)
 
 TEXT runtime·osyield(SB),NOSPLIT,$0
 	NACL_SYSJMP(SYS_sched_yield)
 
 TEXT runtime·mmap(SB),NOSPLIT,$8
-	MOVL arg1+0(FP), DI
-	MOVL arg2+4(FP), SI
-	MOVL arg3+8(FP), DX
-	MOVL arg4+12(FP), CX
-	MOVL arg5+16(FP), R8
-	MOVL arg6+20(FP), AX
+	MOVL addr+0(FP), DI
+	MOVL n+4(FP), SI
+	MOVL prot+8(FP), DX
+	MOVL flags+12(FP), CX
+	MOVL fd+16(FP), R8
+	MOVL off+20(FP), AX
 	MOVQ AX, 0(SP)
 	MOVL SP, R9
 	NACL_SYSCALL(SYS_mmap)
 	CMPL AX, $-4095
 	JNA 2(PC)
 	NEGL AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT time·now(SB),NOSPLIT,$16
@@ -235,7 +238,8 @@
 TEXT runtime·nanotime(SB),NOSPLIT,$16
 	MOVQ runtime·timens(SB), AX
 	CMPQ AX, $0
-	JEQ 2(PC)
+	JEQ 3(PC)
+	MOVQ	AX, ret+0(FP)
 	RET
 	MOVL $0, DI // real time clock
 	LEAL 0(SP), AX
@@ -248,6 +252,7 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·sigtramp(SB),NOSPLIT,$80
diff --git a/src/pkg/runtime/sys_nacl_arm.s b/src/pkg/runtime/sys_nacl_arm.s
index 6a22368..48b9aae 100644
--- a/src/pkg/runtime/sys_nacl_arm.s
+++ b/src/pkg/runtime/sys_nacl_arm.s
@@ -13,27 +13,27 @@
 	MOVW	$(0x10000 + ((code)<<5)), R8; B (R8)
 
 TEXT runtime·exit(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	code+0(FP), R0
 	NACL_SYSJMP(SYS_exit)
 
 TEXT runtime·exit1(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	code+0(FP), R0
 	NACL_SYSJMP(SYS_thread_exit)
 
 TEXT runtime·open(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+0(FP), R1
-	MOVW	arg3+0(FP), R2
+	MOVW	name+0(FP), R0
+	MOVW	name+0(FP), R1
+	MOVW	name+0(FP), R2
 	NACL_SYSJMP(SYS_open)
 
 TEXT runtime·close(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	fd+0(FP), R0
 	NACL_SYSJMP(SYS_close)
 
 TEXT runtime·read(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
-	MOVW	arg3+8(FP), R2
+	MOVW	fd+0(FP), R0
+	MOVW	p+4(FP), R1
+	MOVW	n+8(FP), R2
 	NACL_SYSJMP(SYS_read)
 
 // func naclWrite(fd int, b []byte) int
@@ -46,77 +46,77 @@
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
-	MOVW	arg3+8(FP), R2
+	MOVW	fd+0(FP), R0
+	MOVW	p+4(FP), R1
+	MOVW	n+8(FP), R2
 	NACL_SYSJMP(SYS_write)
 
 TEXT runtime·nacl_exception_stack(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
+	MOVW	p+0(FP), R0
+	MOVW	size+4(FP), R1
 	NACL_SYSJMP(SYS_exception_stack)
 
 TEXT runtime·nacl_exception_handler(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
+	MOVW	fn+0(FP), R0
+	MOVW	arg+4(FP), R1
 	NACL_SYSJMP(SYS_exception_handler)
 
 TEXT runtime·nacl_sem_create(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	flag+0(FP), R0
 	NACL_SYSJMP(SYS_sem_create)
 
 TEXT runtime·nacl_sem_wait(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	sem+0(FP), R0
 	NACL_SYSJMP(SYS_sem_wait)
 
 TEXT runtime·nacl_sem_post(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	sem+0(FP), R0
 	NACL_SYSJMP(SYS_sem_post)
 
 TEXT runtime·nacl_mutex_create(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	flag+0(FP), R0
 	NACL_SYSJMP(SYS_mutex_create)
 
 TEXT runtime·nacl_mutex_lock(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	mutex+0(FP), R0
 	NACL_SYSJMP(SYS_mutex_lock)
 
 TEXT runtime·nacl_mutex_trylock(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	mutex+0(FP), R0
 	NACL_SYSJMP(SYS_mutex_trylock)
 
 TEXT runtime·nacl_mutex_unlock(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	mutex+0(FP), R0
 	NACL_SYSJMP(SYS_mutex_unlock)
 
 TEXT runtime·nacl_cond_create(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	flag+0(FP), R0
 	NACL_SYSJMP(SYS_cond_create)
 
 TEXT runtime·nacl_cond_wait(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
+	MOVW	cond+0(FP), R0
+	MOVW	n+4(FP), R1
 	NACL_SYSJMP(SYS_cond_wait)
 
 TEXT runtime·nacl_cond_signal(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	cond+0(FP), R0
 	NACL_SYSJMP(SYS_cond_signal)
 
 TEXT runtime·nacl_cond_broadcast(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
+	MOVW	cond+0(FP), R0
 	NACL_SYSJMP(SYS_cond_broadcast)
 
 TEXT runtime·nacl_cond_timed_wait_abs(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
-	MOVW	arg3+8(FP), R2
+	MOVW	cond+0(FP), R0
+	MOVW	lock+4(FP), R1
+	MOVW	ts+8(FP), R2
 	NACL_SYSJMP(SYS_cond_timed_wait_abs)
 
 TEXT runtime·nacl_thread_create(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
-	MOVW	arg3+8(FP), R2
-	MOVW	arg4+12(FP), R3
+	MOVW	fn+0(FP), R0
+	MOVW	stk+4(FP), R1
+	MOVW	tls+8(FP), R2
+	MOVW	xx+12(FP), R3
 	NACL_SYSJMP(SYS_thread_create)
 
 TEXT runtime·mstart_nacl(SB),NOSPLIT,$0
@@ -128,21 +128,21 @@
 	B runtime·mstart(SB)
 
 TEXT runtime·nacl_nanosleep(SB),NOSPLIT,$0
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
+	MOVW	ts+0(FP), R0
+	MOVW	extra+4(FP), R1
 	NACL_SYSJMP(SYS_nanosleep)
 
 TEXT runtime·osyield(SB),NOSPLIT,$0
 	NACL_SYSJMP(SYS_sched_yield)
 
 TEXT runtime·mmap(SB),NOSPLIT,$8
-	MOVW	arg1+0(FP), R0
-	MOVW	arg2+4(FP), R1
-	MOVW	arg3+8(FP), R2
-	MOVW	arg4+12(FP), R3
-	MOVW	arg5+16(FP), R4
+	MOVW	addr+0(FP), R0
+	MOVW	n+4(FP), R1
+	MOVW	prot+8(FP), R2
+	MOVW	flags+12(FP), R3
+	MOVW	fd+16(FP), R4
 	// arg6:offset should be passed as a pointer (to int64)
-	MOVW	arg6+20(FP), R5
+	MOVW	off+20(FP), R5
 	MOVW	R5, 4(R13)
 	MOVW	$0, R6
 	MOVW	R6, 8(R13)
@@ -152,6 +152,7 @@
 	MOVM.IA.W (R13), [R4, R5]
 	CMP	$-4095, R0
 	RSB.HI	$0, R0
+	MOVW	R0, ret+24(FP)
 	RET
 
 TEXT time·now(SB),NOSPLIT,$16
@@ -188,9 +189,8 @@
 	MOVW	$0, R4
 	ADD.S	R2, R0
 	ADC	R4, R1
-	MOVW	0(FP), R2
-	MOVW	R0, 0(R2)
-	MOVW	R1, 4(R2)
+	MOVW	R0, ret_lo+0(FP)
+	MOVW	R1, ret_hi+4(FP)
 	RET
 
 TEXT runtime·sigtramp(SB),NOSPLIT,$80
diff --git a/src/pkg/runtime/sys_netbsd_386.s b/src/pkg/runtime/sys_netbsd_386.s
index 4a78cb9..cecc389 100644
--- a/src/pkg/runtime/sys_netbsd_386.s
+++ b/src/pkg/runtime/sys_netbsd_386.s
@@ -26,21 +26,25 @@
 TEXT runtime·open(SB),NOSPLIT,$-4
 	MOVL	$5, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-4
 	MOVL	$6, AX
 	INT	$0x80
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-4
 	MOVL	$3, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-4
 	MOVL	$4, AX			// sys_write
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$24
@@ -74,7 +78,7 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$36
-	LEAL	arg0+0(FP), SI
+	LEAL	addr+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - addr
@@ -89,6 +93,7 @@
 	STOSL
 	MOVL	$197, AX		// sys_mmap
 	INT	$0x80
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$-4
@@ -146,9 +151,8 @@
 	ADDL	BX, AX
 	ADCL	CX, DX			// add high bits with carry
 
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 TEXT runtime·getcontext(SB),NOSPLIT,$-4
@@ -175,7 +179,7 @@
 	INT	$0x80
 
 TEXT runtime·sigaction(SB),NOSPLIT,$24
-	LEAL	arg0+0(FP), SI
+	LEAL	sig+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - sig
@@ -232,7 +236,7 @@
 // int32 lwp_create(void *context, uintptr flags, void *lwpid);
 TEXT runtime·lwp_create(SB),NOSPLIT,$16
 	MOVL	$0, 0(SP)
-	MOVL	context+0(FP), AX
+	MOVL	ctxt+0(FP), AX
 	MOVL	AX, 4(SP)		// arg 1 - context
 	MOVL	flags+4(FP), AX
 	MOVL	AX, 8(SP)		// arg 2 - flags
@@ -242,6 +246,7 @@
 	INT	$0x80
 	JCC	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
@@ -312,20 +317,23 @@
 TEXT runtime·lwp_park(SB),NOSPLIT,$-4
 	MOVL	$434, AX		// sys__lwp_park
 	INT	$0x80
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·lwp_unpark(SB),NOSPLIT,$-4
 	MOVL	$321, AX		// sys__lwp_unpark
 	INT	$0x80
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·lwp_self(SB),NOSPLIT,$-4
 	MOVL	$311, AX		// sys__lwp_self
 	INT	$0x80
+	MOVL	AX, ret+0(FP)
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$28
-	LEAL	arg0+0(FP), SI
+	LEAL	mib+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - name
@@ -350,6 +358,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
@@ -358,6 +367,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·closeonexec(int32 fd)
diff --git a/src/pkg/runtime/sys_netbsd_amd64.s b/src/pkg/runtime/sys_netbsd_amd64.s
index 13b1cdc..5b46dbe 100644
--- a/src/pkg/runtime/sys_netbsd_amd64.s
+++ b/src/pkg/runtime/sys_netbsd_amd64.s
@@ -11,13 +11,14 @@
 
 // int32 lwp_create(void *context, uintptr flags, void *lwpid)
 TEXT runtime·lwp_create(SB),NOSPLIT,$0
-	MOVQ	context+0(FP), DI
+	MOVQ	ctxt+0(FP), DI
 	MOVQ	flags+8(FP), SI
 	MOVQ	lwpid+16(FP), DX
 	MOVL	$309, AX		// sys__lwp_create
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
@@ -46,29 +47,32 @@
 	RET
 
 TEXT runtime·lwp_park(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - abstime
-	MOVL	16(SP), SI		// arg 2 - unpark
-	MOVQ	24(SP), DX		// arg 3 - hint
-	MOVQ	32(SP), R10		// arg 4 - unparkhint
+	MOVQ	abstime+0(FP), DI		// arg 1 - abstime
+	MOVL	unpark+8(FP), SI		// arg 2 - unpark
+	MOVQ	hint+16(FP), DX		// arg 3 - hint
+	MOVQ	unparkhint+24(FP), R10		// arg 4 - unparkhint
 	MOVL	$434, AX		// sys__lwp_park
 	SYSCALL
+	MOVL	AX, ret+32(FP)
 	RET
 
 TEXT runtime·lwp_unpark(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - lwp
-	MOVL	16(SP), SI		// arg 2 - hint
+	MOVL	lwp+0(FP), DI		// arg 1 - lwp
+	MOVQ	hint+8(FP), SI		// arg 2 - hint
 	MOVL	$321, AX		// sys__lwp_unpark
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·lwp_self(SB),NOSPLIT,$0
 	MOVL	$311, AX		// sys__lwp_self
 	SYSCALL
+	MOVL	AX, ret+0(FP)
 	RET
 
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - exit status
+	MOVL	code+0(FP), DI		// arg 1 - exit status
 	MOVL	$1, AX			// sys_exit
 	SYSCALL
 	MOVL	$0xf1, 0xf1		// crash
@@ -81,33 +85,37 @@
 	RET
 
 TEXT runtime·open(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 pathname
-	MOVL	16(SP), SI		// arg 2 flags
-	MOVL	20(SP), DX		// arg 3 mode
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
 	MOVL	$5, AX
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
+	MOVL	fd+0(FP), DI		// arg 1 fd
 	MOVL	$6, AX
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$3, AX
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - fd
-	MOVQ	16(SP), SI		// arg 2 - buf
-	MOVL	24(SP), DX		// arg 3 - nbyte
+	MOVQ	fd+0(FP), DI		// arg 1 - fd
+	MOVQ	p+8(FP), SI		// arg 2 - buf
+	MOVL	n+16(FP), DX		// arg 3 - nbyte
 	MOVL	$4, AX			// sys_write
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$16
@@ -136,9 +144,9 @@
 	RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - which
-	MOVQ	16(SP), SI		// arg 2 - itv
-	MOVQ	24(SP), DX		// arg 3 - oitv
+	MOVL	mode+0(FP), DI		// arg 1 - which
+	MOVQ	new+8(FP), SI		// arg 2 - itv
+	MOVQ	old+16(FP), DX		// arg 3 - oitv
 	MOVL	$425, AX		// sys_setitimer
 	SYSCALL
 	RET
@@ -169,10 +177,11 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·getcontext(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 - context
+	MOVQ	ctxt+0(FP), DI		// arg 1 - context
 	MOVL	$307, AX		// sys_getcontext
 	SYSCALL
 	JCC	2(PC)
@@ -180,9 +189,9 @@
 	RET
 
 TEXT runtime·sigprocmask(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 - how
-	MOVQ	16(SP), SI		// arg 2 - set
-	MOVQ	24(SP), DX		// arg 3 - oset
+	MOVL	mode+0(FP), DI		// arg 1 - how
+	MOVQ	new+8(FP), SI		// arg 2 - set
+	MOVQ	old+16(FP), DX		// arg 3 - oset
 	MOVL	$293, AX		// sys_sigprocmask
 	SYSCALL
 	JCC	2(PC)
@@ -198,9 +207,9 @@
 	SYSCALL
 
 TEXT runtime·sigaction(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - signum
-	MOVQ	16(SP), SI		// arg 2 - nsa
-	MOVQ	24(SP), DX		// arg 3 - osa
+	MOVL	sig+0(FP), DI		// arg 1 - signum
+	MOVQ	new+8(FP), SI		// arg 2 - nsa
+	MOVQ	old+16(FP), DX		// arg 3 - osa
 					// arg 4 - tramp
 	LEAQ	runtime·sigreturn_tramp(SB), R10
 	MOVQ	$2, R8			// arg 5 - vers
@@ -244,23 +253,24 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - addr
-	MOVQ	16(SP), SI		// arg 2 - len
-	MOVL	24(SP), DX		// arg 3 - prot
-	MOVL	28(SP), R10		// arg 4 - flags
-	MOVL	32(SP), R8		// arg 5 - fd
-	MOVQ	36(SP), R9
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	prot+16(FP), DX		// arg 3 - prot
+	MOVL	flags+20(FP), R10		// arg 4 - flags
+	MOVL	fd+24(FP), R8		// arg 5 - fd
+	MOVL	off+28(FP), R9
 	SUBQ	$16, SP
 	MOVQ	R9, 8(SP)		// arg 7 - offset (passed on stack)
 	MOVQ	$0, R9			// arg 6 - pad
 	MOVL	$197, AX		// sys_mmap
 	SYSCALL
 	ADDQ	$16, SP
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - addr
-	MOVQ	16(SP), SI		// arg 2 - len
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
 	MOVL	$73, AX			// sys_munmap
 	SYSCALL
 	JCC	2(PC)
@@ -270,8 +280,8 @@
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
 	MOVQ	addr+0(FP), DI		// arg 1 - addr
-	MOVQ	len+8(FP), SI		// arg 2 - len
-	MOVQ	behav+16(FP), DX	// arg 3 - behav
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	flags+16(FP), DX	// arg 3 - behav
 	MOVQ	$75, AX			// sys_madvise
 	SYSCALL
 	// ignore failure - maybe pages are locked
@@ -297,18 +307,20 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - name
-	MOVL	16(SP), SI		// arg 2 - namelen
-	MOVQ	24(SP), DX		// arg 3 - oldp
-	MOVQ	32(SP), R10		// arg 4 - oldlenp
-	MOVQ	40(SP), R8		// arg 5 - newp
-	MOVQ	48(SP), R9		// arg 6 - newlen
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
 	MOVQ	$202, AX		// sys___sysctl
 	SYSCALL
-	JCC 3(PC)
+	JCC 4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // int32 runtime·kqueue(void)
@@ -318,25 +330,27 @@
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
 TEXT runtime·kevent(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
-	MOVQ	32(SP), R10
-	MOVL	40(SP), R8
-	MOVQ	48(SP), R9
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
 	MOVL	$435, AX
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd)
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
-	MOVL	8(SP), DI	// fd
+	MOVL	fd+0(FP), DI	// fd
 	MOVQ	$2, SI		// F_SETFD
 	MOVQ	$1, DX		// FD_CLOEXEC
 	MOVL	$92, AX		// fcntl
diff --git a/src/pkg/runtime/sys_netbsd_arm.s b/src/pkg/runtime/sys_netbsd_arm.s
index acf01cf..bf133a2 100644
--- a/src/pkg/runtime/sys_netbsd_arm.s
+++ b/src/pkg/runtime/sys_netbsd_arm.s
@@ -28,11 +28,13 @@
 	MOVW 4(FP), R1
 	MOVW 8(FP), R2
 	SWI $0xa00005
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-8
 	MOVW 0(FP), R0
 	SWI $0xa00006
+	MOVW	R0, ret+4(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-8
@@ -40,6 +42,7 @@
 	MOVW 4(FP), R1
 	MOVW 8(FP), R2
 	SWI $0xa00003
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-4
@@ -47,14 +50,16 @@
 	MOVW	4(FP), R1	// arg 2 - buf
 	MOVW	8(FP), R2	// arg 3 - nbyte
 	SWI $0xa00004	// sys_write
+	MOVW	R0, ret+12(FP)
 	RET
 
 // int32 lwp_create(void *context, uintptr flags, void *lwpid)
 TEXT runtime·lwp_create(SB),NOSPLIT,$0
-	MOVW context+0(FP), R0
+	MOVW ctxt+0(FP), R0
 	MOVW flags+4(FP), R1
 	MOVW lwpid+8(FP), R2
 	SWI $0xa00135	// sys__lwp_create
+	MOVW	R0, ret+12(FP)
 	RET
 
 TEXT runtime·osyield(SB),NOSPLIT,$0
@@ -67,16 +72,19 @@
 	MOVW 8(FP), R2	// arg 3 - hint
 	MOVW 12(FP), R3	// arg 4 - unparkhint
 	SWI $0xa001b2	// sys__lwp_park
+	MOVW	R0, ret+16(FP)
 	RET
 
 TEXT runtime·lwp_unpark(SB),NOSPLIT,$0
 	MOVW	0(FP), R0	// arg 1 - lwp
 	MOVW	4(FP), R1	// arg 2 - hint
 	SWI $0xa00141 // sys__lwp_unpark
+	MOVW	R0, ret+8(FP)
 	RET
 
 TEXT runtime·lwp_self(SB),NOSPLIT,$0
 	SWI $0xa00137	// sys__lwp_self
+	MOVW	R0, ret+0(FP)
 	RET
 
 TEXT runtime·lwp_tramp(SB),NOSPLIT,$0
@@ -153,9 +161,8 @@
 	ADD.S R2, R0
 	ADC R4, R1
 
-	MOVW 0(FP), R3
-	MOVW R0, 0(R3)
-	MOVW R1, 4(R3)
+	MOVW R0, ret_lo+0(FP)
+	MOVW R1, ret_hi+4(FP)
 	RET
 
 TEXT runtime·getcontext(SB),NOSPLIT,$-4
@@ -249,6 +256,7 @@
 	ADD $4, R13 // pass arg 5 and arg 6 on stack
 	SWI $0xa000c5	// sys_mmap
 	SUB $4, R13
+	MOVW	R0, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
@@ -287,12 +295,14 @@
 	ADD $4, R13	// pass arg 5 and 6 on stack
 	SWI $0xa000ca	// sys___sysctl
 	SUB $4, R13
+	MOVW	R0, ret+24(FP)
 	RET
 
 // int32 runtime·kqueue(void)
 TEXT runtime·kqueue(SB),NOSPLIT,$0
 	SWI $0xa00158	// sys_kqueue
 	RSB.CS $0, R0
+	MOVW	R0, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout)
@@ -309,6 +319,7 @@
 	SWI $0xa001b3	// sys___kevent50
 	RSB.CS $0, R0
 	SUB $4, R13
+	MOVW	R0, ret+24(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd)
diff --git a/src/pkg/runtime/sys_openbsd_386.s b/src/pkg/runtime/sys_openbsd_386.s
index a94c4e4..596d45a 100644
--- a/src/pkg/runtime/sys_openbsd_386.s
+++ b/src/pkg/runtime/sys_openbsd_386.s
@@ -30,21 +30,25 @@
 TEXT runtime·open(SB),NOSPLIT,$-4
 	MOVL	$5, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-4
 	MOVL	$6, AX
 	INT	$0x80
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-4
 	MOVL	$3, AX
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-4
 	MOVL	$4, AX			// sys_write
 	INT	$0x80
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$24
@@ -78,7 +82,7 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$36
-	LEAL	arg0+0(FP), SI
+	LEAL	addr+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - addr
@@ -93,6 +97,7 @@
 	STOSL
 	MOVL	$197, AX		// sys_mmap
 	INT	$0x80
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$-4
@@ -151,9 +156,8 @@
 	ADDL	BX, AX
 	ADCL	CX, DX			// add high bits with carry
 
-	MOVL	ret+0(FP), DI
-	MOVL	AX, 0(DI)
-	MOVL	DX, 4(DI)
+	MOVL	AX, ret_lo+0(FP)
+	MOVL	DX, ret_hi+4(FP)
 	RET
 
 TEXT runtime·sigaction(SB),NOSPLIT,$-4
@@ -168,7 +172,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	MOVL	$0xf1, 0xf1		// crash
-	MOVL	AX, oset+0(FP)
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·sigtramp(SB),NOSPLIT,$44
@@ -222,22 +226,22 @@
 TEXT runtime·tfork(SB),NOSPLIT,$12
 
 	// Copy mp, gp and fn from the parent stack onto the child stack.
-	MOVL	params+4(FP), AX
+	MOVL	psize+4(FP), AX
 	MOVL	8(AX), CX		// tf_stack
 	SUBL	$16, CX
 	MOVL	CX, 8(AX)
-	MOVL	mm+12(FP), SI
+	MOVL	mm+8(FP), SI
 	MOVL	SI, 0(CX)
-	MOVL	gg+16(FP), SI
+	MOVL	gg+12(FP), SI
 	MOVL	SI, 4(CX)
-	MOVL	fn+20(FP), SI
+	MOVL	fn+16(FP), SI
 	MOVL	SI, 8(CX)
 	MOVL	$1234, 12(CX)
 
 	MOVL	$0, 0(SP)		// syscall gap
-	MOVL	params+4(FP), AX
+	MOVL	param+0(FP), AX
 	MOVL	AX, 4(SP)		// arg 1 - param
-	MOVL	psize+8(FP), AX
+	MOVL	psize+4(FP), AX
 	MOVL	AX, 8(SP)		// arg 2 - psize
 	MOVL	$8, AX			// sys___tfork
 	INT	$0x80
@@ -245,15 +249,15 @@
 	// Return if tfork syscall failed.
 	JCC	5(PC)
 	NEGL	AX
-	MOVL	ret+0(FP), DX
-	MOVL	AX, 0(DX)
+	MOVL	AX, ret_lo+20(FP)
+	MOVL	$-1, ret_hi+24(FP)
 	RET
 
 	// In parent, return.
 	CMPL	AX, $0
 	JEQ	4(PC)
-	MOVL	ret+0(FP), DX
-	MOVL	AX, 0(DX)
+	MOVL	AX, ret_lo+20(FP)
+	MOVL	$0, ret_hi+24(FP)
 	RET
 
 	// Paranoia: check that SP is as we expect.
@@ -333,15 +337,17 @@
 TEXT runtime·thrsleep(SB),NOSPLIT,$-4
 	MOVL	$94, AX			// sys___thrsleep
 	INT	$0x80
+	MOVL	AX, ret+20(FP)
 	RET
 
 TEXT runtime·thrwakeup(SB),NOSPLIT,$-4
 	MOVL	$301, AX		// sys___thrwakeup
 	INT	$0x80
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$28
-	LEAL	arg0+0(FP), SI
+	LEAL	mib+0(FP), SI
 	LEAL	4(SP), DI
 	CLD
 	MOVSL				// arg 1 - name
@@ -352,10 +358,12 @@
 	MOVSL				// arg 6 - newlen
 	MOVL	$202, AX		// sys___sysctl
 	INT	$0x80
-	JCC	3(PC)
+	JCC	4(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·kqueue(void);
@@ -364,6 +372,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
@@ -372,6 +381,7 @@
 	INT	$0x80
 	JAE	2(PC)
 	NEGL	AX
+	MOVL	AX, ret+24(FP)
 	RET
 
 // int32 runtime·closeonexec(int32 fd);
diff --git a/src/pkg/runtime/sys_openbsd_amd64.s b/src/pkg/runtime/sys_openbsd_amd64.s
index dac90ea..eb50101 100644
--- a/src/pkg/runtime/sys_openbsd_amd64.s
+++ b/src/pkg/runtime/sys_openbsd_amd64.s
@@ -25,13 +25,15 @@
 	SYSCALL
 
 	// Return if tfork syscall failed.
-	JCC	3(PC)
+	JCC	4(PC)
 	NEGQ	AX
+	MOVQ	AX, ret+40(FP)
 	RET
 
 	// In parent, return.
 	CMPL	AX, $0
-	JEQ	2(PC)
+	JEQ	3(PC)
+	MOVQ	AX, ret+40(FP)
 	RET
 
 	// Set FS to point at m->tls.
@@ -59,25 +61,27 @@
 	RET
 
 TEXT runtime·thrsleep(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - ident
-	MOVL	16(SP), SI		// arg 2 - clock_id
-	MOVQ	24(SP), DX		// arg 3 - tp
-	MOVQ	32(SP), R10		// arg 4 - lock
-	MOVQ	40(SP), R8		// arg 5 - abort
+	MOVQ	ident+0(FP), DI		// arg 1 - ident
+	MOVL	clock_id+8(FP), SI		// arg 2 - clock_id
+	MOVQ	tsp+16(FP), DX		// arg 3 - tp
+	MOVQ	lock+24(FP), R10		// arg 4 - lock
+	MOVQ	abort+32(FP), R8		// arg 5 - abort
 	MOVL	$94, AX			// sys___thrsleep
 	SYSCALL
+	MOVL	AX, ret+40(FP)
 	RET
 
 TEXT runtime·thrwakeup(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - ident
-	MOVL	16(SP), SI		// arg 2 - n
+	MOVQ	ident+0(FP), DI		// arg 1 - ident
+	MOVL	n+8(FP), SI		// arg 2 - n
 	MOVL	$301, AX		// sys___thrwakeup
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 // Exit the entire program (like C exit)
 TEXT runtime·exit(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - exit status
+	MOVL	code+0(FP), DI		// arg 1 - exit status
 	MOVL	$1, AX			// sys_exit
 	SYSCALL
 	MOVL	$0xf1, 0xf1		// crash
@@ -91,33 +95,37 @@
 	RET
 
 TEXT runtime·open(SB),NOSPLIT,$-8
-	MOVQ	8(SP), DI		// arg 1 pathname
-	MOVL	16(SP), SI		// arg 2 flags
-	MOVL	20(SP), DX		// arg 3 mode
+	MOVQ	name+0(FP), DI		// arg 1 pathname
+	MOVL	mode+8(FP), SI		// arg 2 flags
+	MOVL	perm+12(FP), DX		// arg 3 mode
 	MOVL	$5, AX
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
+	MOVL	fd+0(FP), DI		// arg 1 fd
 	MOVL	$6, AX
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·read(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 fd
-	MOVQ	16(SP), SI		// arg 2 buf
-	MOVL	24(SP), DX		// arg 3 count
+	MOVL	fd+0(FP), DI		// arg 1 fd
+	MOVQ	p+8(FP), SI		// arg 2 buf
+	MOVL	n+16(FP), DX		// arg 3 count
 	MOVL	$3, AX
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·write(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - fd
-	MOVQ	16(SP), SI		// arg 2 - buf
-	MOVL	24(SP), DX		// arg 3 - nbyte
+	MOVQ	fd+0(FP), DI		// arg 1 - fd
+	MOVQ	p+8(FP), SI		// arg 2 - buf
+	MOVL	n+16(FP), DX		// arg 3 - nbyte
 	MOVL	$4, AX			// sys_write
 	SYSCALL
+	MOVL	AX, ret+24(FP)
 	RET
 
 TEXT runtime·usleep(SB),NOSPLIT,$16
@@ -146,9 +154,9 @@
 	RET
 
 TEXT runtime·setitimer(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - which
-	MOVQ	16(SP), SI		// arg 2 - itv
-	MOVQ	24(SP), DX		// arg 3 - oitv
+	MOVL	mode+0(FP), DI		// arg 1 - which
+	MOVQ	new+8(FP), SI		// arg 2 - itv
+	MOVQ	old+16(FP), DX		// arg 3 - oitv
 	MOVL	$69, AX			// sys_setitimer
 	SYSCALL
 	RET
@@ -179,12 +187,13 @@
 	// return nsec in AX
 	IMULQ	$1000000000, AX
 	ADDQ	DX, AX
+	MOVQ	AX, ret+0(FP)
 	RET
 
 TEXT runtime·sigaction(SB),NOSPLIT,$-8
-	MOVL	8(SP), DI		// arg 1 - signum
-	MOVQ	16(SP), SI		// arg 2 - nsa
-	MOVQ	24(SP), DX		// arg 3 - osa
+	MOVL	sig+0(FP), DI		// arg 1 - signum
+	MOVQ	new+8(FP), SI		// arg 2 - nsa
+	MOVQ	old+16(FP), DX		// arg 3 - osa
 	MOVL	$46, AX
 	SYSCALL
 	JCC	2(PC)
@@ -192,13 +201,13 @@
 	RET
 
 TEXT runtime·sigprocmask(SB),NOSPLIT,$0
-	MOVL	8(SP), DI		// arg 1 - how
-	MOVL	12(SP), SI		// arg 2 - set
+	MOVL	mode+0(FP), DI		// arg 1 - how
+	MOVL	new+4(FP), SI		// arg 2 - set
 	MOVL	$48, AX			// sys_sigprocmask
 	SYSCALL
 	JCC	2(PC)
 	MOVL	$0xf1, 0xf1		// crash
-	MOVL	AX, oset+0(FP)		// Return oset
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·sigtramp(SB),NOSPLIT,$64
@@ -235,23 +244,24 @@
 	RET
 
 TEXT runtime·mmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - addr
-	MOVQ	16(SP), SI		// arg 2 - len
-	MOVL	24(SP), DX		// arg 3 - prot
-	MOVL	28(SP), R10		// arg 4 - flags
-	MOVL	32(SP), R8		// arg 5 - fd
-	MOVQ	36(SP), R9
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	prot+16(FP), DX		// arg 3 - prot
+	MOVL	flags+20(FP), R10		// arg 4 - flags
+	MOVL	fd+24(FP), R8		// arg 5 - fd
+	MOVL	off+28(FP), R9
 	SUBQ	$16, SP
 	MOVQ	R9, 8(SP)		// arg 7 - offset (passed on stack)
 	MOVQ	$0, R9			// arg 6 - pad
 	MOVL	$197, AX
 	SYSCALL
 	ADDQ	$16, SP
+	MOVQ	AX, ret+32(FP)
 	RET
 
 TEXT runtime·munmap(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - addr
-	MOVQ	16(SP), SI		// arg 2 - len
+	MOVQ	addr+0(FP), DI		// arg 1 - addr
+	MOVQ	n+8(FP), SI		// arg 2 - len
 	MOVL	$73, AX			// sys_munmap
 	SYSCALL
 	JCC	2(PC)
@@ -260,8 +270,8 @@
 
 TEXT runtime·madvise(SB),NOSPLIT,$0
 	MOVQ	addr+0(FP), DI		// arg 1 - addr
-	MOVQ	len+8(FP), SI		// arg 2 - len
-	MOVQ	behav+16(FP), DX	// arg 3 - behav
+	MOVQ	n+8(FP), SI		// arg 2 - len
+	MOVL	flags+16(FP), DX	// arg 3 - behav
 	MOVQ	$75, AX			// sys_madvise
 	SYSCALL
 	// ignore failure - maybe pages are locked
@@ -287,18 +297,20 @@
 	RET
 
 TEXT runtime·sysctl(SB),NOSPLIT,$0
-	MOVQ	8(SP), DI		// arg 1 - name
-	MOVL	16(SP), SI		// arg 2 - namelen
-	MOVQ	24(SP), DX		// arg 3 - oldp
-	MOVQ	32(SP), R10		// arg 4 - oldlenp
-	MOVQ	40(SP), R8		// arg 5 - newp
-	MOVQ	48(SP), R9		// arg 6 - newlen
+	MOVQ	mib+0(FP), DI		// arg 1 - name
+	MOVL	miblen+8(FP), SI		// arg 2 - namelen
+	MOVQ	out+16(FP), DX		// arg 3 - oldp
+	MOVQ	size+24(FP), R10		// arg 4 - oldlenp
+	MOVQ	dst+32(FP), R8		// arg 5 - newp
+	MOVQ	ndst+40(FP), R9		// arg 6 - newlen
 	MOVQ	$202, AX		// sys___sysctl
 	SYSCALL
-	JCC	3(PC)
+	JCC	4(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 	MOVL	$0, AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // int32 runtime·kqueue(void);
@@ -310,25 +322,27 @@
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 // int32 runtime·kevent(int kq, Kevent *changelist, int nchanges, Kevent *eventlist, int nevents, Timespec *timeout);
 TEXT runtime·kevent(SB),NOSPLIT,$0
-	MOVL	8(SP), DI
-	MOVQ	16(SP), SI
-	MOVL	24(SP), DX
-	MOVQ	32(SP), R10
-	MOVL	40(SP), R8
-	MOVQ	48(SP), R9
+	MOVL	fd+0(FP), DI
+	MOVQ	ev1+8(FP), SI
+	MOVL	nev1+16(FP), DX
+	MOVQ	ev2+24(FP), R10
+	MOVL	nev2+32(FP), R8
+	MOVQ	ts+40(FP), R9
 	MOVL	$72, AX
 	SYSCALL
 	JCC	2(PC)
 	NEGQ	AX
+	MOVL	AX, ret+48(FP)
 	RET
 
 // void runtime·closeonexec(int32 fd);
 TEXT runtime·closeonexec(SB),NOSPLIT,$0
-	MOVL	8(SP), DI	// fd
+	MOVL	fd+0(FP), DI	// fd
 	MOVQ	$2, SI		// F_SETFD
 	MOVQ	$1, DX		// FD_CLOEXEC
 	MOVL	$92, AX		// fcntl
diff --git a/src/pkg/runtime/sys_plan9_386.s b/src/pkg/runtime/sys_plan9_386.s
index 5e8c742..08d0b32 100644
--- a/src/pkg/runtime/sys_plan9_386.s
+++ b/src/pkg/runtime/sys_plan9_386.s
@@ -12,31 +12,49 @@
 TEXT runtime·open(SB),NOSPLIT,$0
 	MOVL    $14, AX
 	INT     $64
+	MOVL	AX, ret+12(FP)
 	RET
 
 TEXT runtime·pread(SB),NOSPLIT,$0
 	MOVL    $50, AX
 	INT     $64
+	MOVL	AX, ret+20(FP)
 	RET
 
 TEXT runtime·pwrite(SB),NOSPLIT,$0
 	MOVL    $51, AX
 	INT     $64
+	MOVL	AX, ret+20(FP)
 	RET
 
-TEXT runtime·seek(SB),NOSPLIT,$0
+// int32 _seek(int64*, int32, int64, int32)
+TEXT _seek<>(SB),NOSPLIT,$0
 	MOVL	$39, AX
 	INT	$64
-	CMPL	AX, $-1
-	JNE	4(PC)
-	MOVL	a+0(FP), CX
-	MOVL	AX, 0(CX)
-	MOVL	AX, 4(CX)
+	RET
+
+TEXT runtime·seek(SB),NOSPLIT,$24
+	LEAL	ret+16(FP), AX
+	MOVL	fd+0(FP), BX
+	MOVL	offset_lo+4(FP), CX
+	MOVL	offset_hi+8(FP), DX
+	MOVL	whence+12(FP), SI
+	MOVL	AX, 0(SP)
+	MOVL	BX, 4(SP)
+	MOVL	CX, 8(SP)
+	MOVL	DX, 12(SP)
+	MOVL	SI, 16(SP)
+	CALL	_seek<>(SB)
+	CMPL	AX, $0
+	JGE	3(PC)
+	MOVL	$-1, ret_lo+16(FP)
+	MOVL	$-1, ret_hi+20(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$0
 	MOVL	$4, AX
 	INT	$64
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·exits(SB),NOSPLIT,$0
@@ -47,50 +65,62 @@
 TEXT runtime·brk_(SB),NOSPLIT,$0
 	MOVL    $24, AX
 	INT     $64
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·sleep(SB),NOSPLIT,$0
 	MOVL    $17, AX
 	INT     $64
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·plan9_semacquire(SB),NOSPLIT,$0
 	MOVL	$37, AX
 	INT	$64
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·plan9_tsemacquire(SB),NOSPLIT,$0
 	MOVL	$52, AX
 	INT	$64
+	MOVL	AX, ret+8(FP)
 	RET
 
-TEXT runtime·nsec(SB),NOSPLIT,$0
+TEXT nsec<>(SB),NOSPLIT,$0
 	MOVL	$53, AX
 	INT	$64
-	CMPL	AX, $-1
-	JNE	4(PC)
-	MOVL	a+0(FP), CX
-	MOVL	AX, 0(CX)
-	MOVL	AX, 4(CX)
+	RET
+
+TEXT runtime·nsec(SB),NOSPLIT,$8
+	LEAL	ret+4(FP), AX
+	MOVL	AX, 0(SP)
+	CALL	nsec<>(SB)
+	CMPL	AX, $0
+	JGE	3(PC)
+	MOVL	$-1, ret_lo+4(FP)
+	MOVL	$-1, ret_hi+8(FP)
 	RET
 
 TEXT runtime·notify(SB),NOSPLIT,$0
 	MOVL	$28, AX
 	INT	$64
+	MOVL	AX, ret+4(FP)
 	RET
 
 TEXT runtime·noted(SB),NOSPLIT,$0
 	MOVL	$29, AX
 	INT	$64
+	MOVL	AX, ret+4(FP)
 	RET
 	
 TEXT runtime·plan9_semrelease(SB),NOSPLIT,$0
 	MOVL	$38, AX
 	INT	$64
+	MOVL	AX, ret+8(FP)
 	RET
 	
 TEXT runtime·rfork(SB),NOSPLIT,$0
-	MOVL    $19, AX // rfork
+	MOVL	$19, AX // rfork
 	MOVL	stack+8(SP), CX
 	MOVL	mm+12(SP), BX	// m
 	MOVL	gg+16(SP), DX	// g
@@ -99,7 +129,8 @@
 
 	// In parent, return.
 	CMPL	AX, $0
-	JEQ	2(PC)
+	JEQ	3(PC)
+	MOVL	AX, ret+20(FP)
 	RET
 
 	// set SP to be on the new child stack
@@ -127,6 +158,7 @@
 	
 	CALL	SI	// fn()
 	CALL	runtime·exit(SB)
+	MOVL	AX, ret+20(FP)
 	RET
 
 // void sigtramp(void *ureg, int8 *note)
@@ -195,17 +227,17 @@
 	MOVL	g(AX), BX
 	MOVL	g_m(BX), BX
 	MOVL	m_errstr(BX), CX
-	MOVL	CX, 4(SP)
-	MOVL	$ERRMAX, 8(SP)
+	MOVL	CX, ret_base+0(FP)
+	MOVL	$ERRMAX, ret_len+4(FP)
 	MOVL	$41, AX
 	INT	$64
 
 	// syscall requires caller-save
-	MOVL	4(SP), CX
+	MOVL	ret_base+0(FP), CX
 
 	// push the argument
 	PUSHL	CX
 	CALL	runtime·findnull(SB)
 	POPL	CX
-	MOVL	AX, 8(SP)
+	MOVL	AX, ret_len+4(FP)
 	RET
diff --git a/src/pkg/runtime/sys_plan9_amd64.s b/src/pkg/runtime/sys_plan9_amd64.s
index 8f4a5c0..c8fa444 100644
--- a/src/pkg/runtime/sys_plan9_amd64.s
+++ b/src/pkg/runtime/sys_plan9_amd64.s
@@ -12,16 +12,19 @@
 TEXT runtime·open(SB),NOSPLIT,$0
 	MOVQ	$14, BP
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·pread(SB),NOSPLIT,$0
 	MOVQ	$50, BP
 	SYSCALL
+	MOVL	AX, ret+32(FP)
 	RET
 
 TEXT runtime·pwrite(SB),NOSPLIT,$0
 	MOVQ	$51, BP
 	SYSCALL
+	MOVL	AX, ret+32(FP)
 	RET
 
 // int32 _seek(int64*, int32, int64, int32)
@@ -31,25 +34,26 @@
 	RET
 
 // int64 seek(int32, int64, int32)
-TEXT runtime·seek(SB),NOSPLIT,$56
-	LEAQ	new+48(SP), CX
-	MOVQ	CX, 0(SP)
-	MOVQ	fd+0(FP), CX
-	MOVQ	CX, 8(SP)
-	MOVQ	off+8(FP), CX
+// Convenience wrapper around _seek, the actual system call.
+TEXT runtime·seek(SB),NOSPLIT,$32
+	LEAQ	$ret+24(FP), AX
+	MOVL	fd+0(FP), BX
+	MOVQ	offset+8(FP), CX
+	MOVL	whence+16(FP), DX
+	MOVQ	AX, 0(SP)
+	MOVL	BX, 8(SP)
 	MOVQ	CX, 16(SP)
-	MOVQ	whence+16(FP), CX
-	MOVQ	CX, 24(SP)
+	MOVL	DX, 24(SP)
 	CALL	_seek<>(SB)
 	CMPL	AX, $0
 	JGE	2(PC)
-	MOVQ	$-1, new+48(SP)
-	MOVQ	new+48(SP), AX
+	MOVQ	$-1, ret+24(FP)
 	RET
 
 TEXT runtime·close(SB),NOSPLIT,$0
 	MOVQ	$4, BP
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·exits(SB),NOSPLIT,$0
@@ -60,41 +64,49 @@
 TEXT runtime·brk_(SB),NOSPLIT,$0
 	MOVQ	$24, BP
 	SYSCALL
+	MOVQ	AX, ret+8(FP)
 	RET
 
 TEXT runtime·sleep(SB),NOSPLIT,$0
 	MOVQ	$17, BP
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·plan9_semacquire(SB),NOSPLIT,$0
 	MOVQ	$37, BP
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·plan9_tsemacquire(SB),NOSPLIT,$0
 	MOVQ	$52, BP
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·nsec(SB),NOSPLIT,$0
 	MOVQ	$53, BP
 	SYSCALL
+	MOVQ	AX, ret+8(FP)
 	RET
 
 TEXT runtime·notify(SB),NOSPLIT,$0
 	MOVQ	$28, BP
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 
 TEXT runtime·noted(SB),NOSPLIT,$0
 	MOVQ	$29, BP
 	SYSCALL
+	MOVL	AX, ret+8(FP)
 	RET
 	
 TEXT runtime·plan9_semrelease(SB),NOSPLIT,$0
 	MOVQ	$38, BP
 	SYSCALL
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT runtime·rfork(SB),NOSPLIT,$0
@@ -103,7 +115,8 @@
 
 	// In parent, return.
 	CMPQ	AX, $0
-	JEQ	2(PC)
+	JEQ	3(PC)
+	MOVL	AX, ret+40(FP)
 	RET
 
 	// In child on forked stack.
@@ -132,6 +145,7 @@
 	
 	CALL	SI	// fn()
 	CALL	runtime·exit(SB)
+	MOVL	AX, ret+40(FP)
 	RET
 
 // This is needed by asm_amd64.s
@@ -208,17 +222,17 @@
 	MOVQ	g(AX), BX
 	MOVQ	g_m(BX), BX
 	MOVQ	m_errstr(BX), CX
-	MOVQ	CX, 8(SP)
-	MOVQ	$ERRMAX, 16(SP)
+	MOVQ	CX, ret_base+0(FP)
+	MOVQ	$ERRMAX, ret_len+8(FP)
 	MOVQ	$41, BP
 	SYSCALL
 
 	// syscall requires caller-save
-	MOVQ	8(SP), CX
+	MOVQ	ret_base+0(FP), CX
 
 	// push the argument
 	PUSHQ	CX
 	CALL	runtime·findnull(SB)
 	POPQ	CX
-	MOVQ	AX, 16(SP)
+	MOVQ	AX, ret_len+8(FP)
 	RET
diff --git a/src/pkg/runtime/sys_solaris_amd64.s b/src/pkg/runtime/sys_solaris_amd64.s
index 1b18c8d..60447d3 100644
--- a/src/pkg/runtime/sys_solaris_amd64.s
+++ b/src/pkg/runtime/sys_solaris_amd64.s
@@ -44,6 +44,7 @@
 	IMULQ	$1000000000, AX	// multiply into nanoseconds
 	ADDQ	8(SP), AX	// tv_nsec, offset should be stable.
 	ADDQ	$64, SP
+	MOVQ	AX, ret+0(FP)
 	RET
 
 // pipe(3c) wrapper that returns fds in AX, DX.
@@ -137,6 +138,7 @@
 	CALL	runtime·mstart(SB)
 
 	XORL	AX, AX			// return 0 == success
+	MOVL	AX, ret+8(FP)
 	RET
 
 // Careful, this is called by __sighndlr, a libc function. We must preserve
@@ -274,7 +276,7 @@
 // Called from runtime·usleep (Go). Can be called on Go stack, on OS stack,
 // can also be called in cgo callback path without a g->m.
 TEXT runtime·usleep1(SB),NOSPLIT,$0
-	MOVL	us+0(FP), DI
+	MOVL	usec+0(FP), DI
 	MOVQ	$runtime·usleep2(SB), AX // to hide from 6l
 
 	// Execute call on m->g0.
diff --git a/src/pkg/runtime/sys_windows_386.s b/src/pkg/runtime/sys_windows_386.s
index f2c2a41..2a1f4f9 100644
--- a/src/pkg/runtime/sys_windows_386.s
+++ b/src/pkg/runtime/sys_windows_386.s
@@ -7,7 +7,7 @@
 
 // void runtime·asmstdcall(void *c);
 TEXT runtime·asmstdcall(SB),NOSPLIT,$0
-	MOVL	c+0(FP), BX
+	MOVL	fn+0(FP), BX
 
 	// SetLastError(0).
 	MOVL	$0, 0x34(FS)
@@ -29,7 +29,7 @@
 	MOVL	BP, SP
 
 	// Return result.
-	MOVL	c+0(FP), BX
+	MOVL	fn+0(FP), BX
 	MOVL	AX, libcall_r1(BX)
 	MOVL	DX, libcall_r2(BX)
 
@@ -62,6 +62,7 @@
 // faster get/set last error
 TEXT runtime·getlasterror(SB),NOSPLIT,$0
 	MOVL	0x34(FS), AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 TEXT runtime·setlasterror(SB),NOSPLIT,$0
@@ -301,7 +302,7 @@
 
 // Sleep duration is in 100ns units.
 TEXT runtime·usleep1(SB),NOSPLIT,$0
-	MOVL	duration+0(FP), BX
+	MOVL	usec+0(FP), BX
 	MOVL	$runtime·usleep2(SB), AX // to hide from 8l
 
 	// Execute call on m->g0 stack, in case we are not actually
@@ -323,7 +324,7 @@
 	MOVL	SI, m_libcallg(BP)
 	// sp must be the last, because once async cpu profiler finds
 	// all three values to be non-zero, it will use them
-	LEAL	4(SP), SI
+	LEAL	usec+0(FP), SI
 	MOVL	SI, m_libcallsp(BP)
 
 	MOVL	m_g0(BP), SI
diff --git a/src/pkg/runtime/sys_windows_amd64.s b/src/pkg/runtime/sys_windows_amd64.s
index 73b1ba6..6030262 100644
--- a/src/pkg/runtime/sys_windows_amd64.s
+++ b/src/pkg/runtime/sys_windows_amd64.s
@@ -87,6 +87,7 @@
 TEXT runtime·getlasterror(SB),NOSPLIT,$0
 	MOVQ	0x30(GS), AX
 	MOVL	0x68(AX), AX
+	MOVL	AX, ret+0(FP)
 	RET
 
 TEXT runtime·setlasterror(SB),NOSPLIT,$0
@@ -323,7 +324,7 @@
 
 // Sleep duration is in 100ns units.
 TEXT runtime·usleep1(SB),NOSPLIT,$0
-	MOVL	duration+0(FP), BX
+	MOVL	usec+0(FP), BX
 	MOVQ	$runtime·usleep2(SB), AX // to hide from 6l
 
 	// Execute call on m->g0 stack, in case we are not actually
@@ -345,7 +346,7 @@
 	MOVQ	R12, m_libcallg(R13)
 	// sp must be the last, because once async cpu profiler finds
 	// all three values to be non-zero, it will use them
-	LEAQ	8(SP), R12
+	LEAQ	usec+0(FP), R12
 	MOVQ	R12, m_libcallsp(R13)
 
 	MOVQ	m_g0(R13), R14
diff --git a/src/pkg/runtime/vlop_386.s b/src/pkg/runtime/vlop_386.s
index 9783fdc..f3d792c 100644
--- a/src/pkg/runtime/vlop_386.s
+++ b/src/pkg/runtime/vlop_386.s
@@ -29,7 +29,7 @@
  * C runtime for 64-bit divide.
  */
 
-// _mul64x32(r *uint64, a uint64, b uint32)
+// _mul64x32(r *uint64, a uint64, b uint32) uint32
 // sets *r = low 64 bits of 96-bit product a*b; returns high 32 bits.
 TEXT _mul64by32(SB), NOSPLIT, $0
 	MOVL	r+0(FP), CX
@@ -43,6 +43,7 @@
 	ADCL	$0, DX
 	MOVL	BX, 4(CX)
 	MOVL	DX, AX
+	MOVL	AX, ret+16(FP)
 	RET
 
 TEXT _div64by32(SB), NOSPLIT, $0
@@ -51,4 +52,5 @@
 	MOVL	a+4(FP), DX
 	DIVL	b+8(FP)
 	MOVL	DX, 0(CX)
+	MOVL	AX, ret+16(FP)
 	RET
diff --git a/src/pkg/runtime/vlop_arm.s b/src/pkg/runtime/vlop_arm.s
index 02bab31..3b5243c 100644
--- a/src/pkg/runtime/vlop_arm.s
+++ b/src/pkg/runtime/vlop_arm.s
@@ -31,18 +31,17 @@
 /* replaced use of R10 by R11 because the former can be the data segment base register */
 
 TEXT _mulv(SB), NOSPLIT, $0
-	MOVW	0(FP), R0
-	MOVW	4(FP), R2	/* l0 */
-	MOVW	8(FP), R11	/* h0 */
-	MOVW	12(FP), R4	/* l1 */
-	MOVW	16(FP), R5	/* h1 */
+	MOVW	l0+0(FP), R2	/* l0 */
+	MOVW	h0+4(FP), R11	/* h0 */
+	MOVW	l1+8(FP), R4	/* l1 */
+	MOVW	h1+12(FP), R5	/* h1 */
 	MULLU	R4, R2, (R7,R6)
 	MUL	R11, R4, R8
 	ADD	R8, R7
 	MUL	R2, R5, R8
 	ADD	R8, R7
-	MOVW	R6, 0(R(arg))
-	MOVW	R7, 4(R(arg))
+	MOVW	R6, ret_lo+16(FP)
+	MOVW	R7, ret_hi+20(FP)
 	RET
 
 // trampoline for _sfloat2. passes LR as arg0 and
diff --git a/src/pkg/runtime/vlrt.c b/src/pkg/runtime/vlrt.c
new file mode 100644
index 0000000..cab74c5
--- /dev/null
+++ b/src/pkg/runtime/vlrt.c
@@ -0,0 +1,906 @@
+// Inferno's libkern/vlrt-386.c
+// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-386.c
+//
+//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
+//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
+//         Portions Copyright 2009 The Go Authors. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// +build arm 386
+
+#include "../../cmd/ld/textflag.h"
+
+/*
+ * C runtime for 64-bit divide, others.
+ *
+ * TODO(rsc): The simple functions are dregs--8c knows how
+ * to generate the code directly now.  Find and remove.
+ */
+
+void	runtime·panicstring(char*);
+void	runtime·panicdivide(void);
+
+typedef	unsigned long	ulong;
+typedef	unsigned int	uint;
+typedef	unsigned short	ushort;
+typedef	unsigned char	uchar;
+typedef	signed char	schar;
+
+#define	SIGN(n)	(1UL<<(n-1))
+
+typedef	struct	Vlong	Vlong;
+struct	Vlong
+{
+	ulong	lo;
+	ulong	hi;
+};
+
+typedef	union	Vlong64	Vlong64;
+union	Vlong64
+{
+	long long	v;
+	Vlong	v2;
+};
+
+void	runtime·abort(void);
+
+#pragma textflag NOSPLIT
+Vlong
+_addv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.lo = a.lo + b.lo;
+	r.hi = a.hi + b.hi;
+	if(r.lo < a.lo)
+		r.hi++;
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_subv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.lo = a.lo - b.lo;
+	r.hi = a.hi - b.hi;
+	if(r.lo > a.lo)
+		r.hi--;
+	return r;
+}
+
+Vlong
+_d2v(double d)
+{
+	union { double d; Vlong vl; } x;
+	ulong xhi, xlo, ylo, yhi;
+	int sh;
+	Vlong y;
+
+	x.d = d;
+
+	xhi = (x.vl.hi & 0xfffff) | 0x100000;
+	xlo = x.vl.lo;
+	sh = 1075 - ((x.vl.hi >> 20) & 0x7ff);
+
+	ylo = 0;
+	yhi = 0;
+	if(sh >= 0) {
+		/* v = (hi||lo) >> sh */
+		if(sh < 32) {
+			if(sh == 0) {
+				ylo = xlo;
+				yhi = xhi;
+			} else {
+				ylo = (xlo >> sh) | (xhi << (32-sh));
+				yhi = xhi >> sh;
+			}
+		} else {
+			if(sh == 32) {
+				ylo = xhi;
+			} else
+			if(sh < 64) {
+				ylo = xhi >> (sh-32);
+			}
+		}
+	} else {
+		/* v = (hi||lo) << -sh */
+		sh = -sh;
+		if(sh <= 10) { /* NOTE: sh <= 11 on ARM??? */
+			ylo = xlo << sh;
+			yhi = (xhi << sh) | (xlo >> (32-sh));
+		} else {
+			/* overflow */
+			yhi = d;	/* causes something awful */
+		}
+	}
+	if(x.vl.hi & SIGN(32)) {
+		if(ylo != 0) {
+			ylo = -ylo;
+			yhi = ~yhi;
+		} else
+			yhi = -yhi;
+	}
+
+	y.hi = yhi;
+	y.lo = ylo;
+	return y;
+}
+
+Vlong
+_f2v(float f)
+{
+	return _d2v(f);
+}
+
+double
+_ul2d(ulong u)
+{
+	// compensate for bug in c
+	if(u & SIGN(32)) {
+		u ^= SIGN(32);
+		return 2147483648. + u;
+	}
+	return u;
+}
+
+double
+_v2d(Vlong x)
+{
+	if(x.hi & SIGN(32)) {
+		if(x.lo) {
+			x.lo = -x.lo;
+			x.hi = ~x.hi;
+		} else
+			x.hi = -x.hi;
+		return -(_ul2d(x.hi)*4294967296. + _ul2d(x.lo));
+	}
+	return (long)x.hi*4294967296. + x.lo;
+}
+
+float
+_v2f(Vlong x)
+{
+	return _v2d(x);
+}
+
+ulong	_div64by32(Vlong, ulong, ulong*);
+int	_mul64by32(Vlong*, Vlong, ulong);
+
+static void
+slowdodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
+{
+	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
+	int i;
+
+	numhi = num.hi;
+	numlo = num.lo;
+	denhi = den.hi;
+	denlo = den.lo;
+
+	/*
+	 * get a divide by zero
+	 */
+	if(denlo==0 && denhi==0) {
+		runtime·panicdivide();
+	}
+
+	/*
+	 * set up the divisor and find the number of iterations needed
+	 */
+	if(numhi >= SIGN(32)) {
+		quohi = SIGN(32);
+		quolo = 0;
+	} else {
+		quohi = numhi;
+		quolo = numlo;
+	}
+	i = 0;
+	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
+		denhi = (denhi<<1) | (denlo>>31);
+		denlo <<= 1;
+		i++;
+	}
+
+	quohi = 0;
+	quolo = 0;
+	for(; i >= 0; i--) {
+		quohi = (quohi<<1) | (quolo>>31);
+		quolo <<= 1;
+		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
+			t = numlo;
+			numlo -= denlo;
+			if(numlo > t)
+				numhi--;
+			numhi -= denhi;
+			quolo |= 1;
+		}
+		denlo = (denlo>>1) | (denhi<<31);
+		denhi >>= 1;
+	}
+
+	if(q) {
+		q->lo = quolo;
+		q->hi = quohi;
+	}
+	if(r) {
+		r->lo = numlo;
+		r->hi = numhi;
+	}
+}
+
+#ifdef GOARCH_arm
+static void
+dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
+{
+	slowdodiv(num, den, qp, rp);
+}
+#endif
+
+#ifdef GOARCH_386
+static void
+dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
+{
+	ulong n;
+	Vlong x, q, r;
+	
+	if(den.hi > num.hi || (den.hi == num.hi && den.lo > num.lo)){
+		if(qp) {
+			qp->hi = 0;
+			qp->lo = 0;
+		}
+		if(rp) {
+			rp->hi = num.hi;
+			rp->lo = num.lo;
+		}
+		return;
+	}
+
+	if(den.hi != 0){
+		q.hi = 0;
+		n = num.hi/den.hi;
+		if(_mul64by32(&x, den, n) || x.hi > num.hi || (x.hi == num.hi && x.lo > num.lo))
+			slowdodiv(num, den, &q, &r);
+		else {
+			q.lo = n;
+			*(long long*)&r = *(long long*)&num - *(long long*)&x;
+		}
+	} else {
+		if(num.hi >= den.lo){
+			if(den.lo == 0)
+				runtime·panicdivide();
+			q.hi = n = num.hi/den.lo;
+			num.hi -= den.lo*n;
+		} else {
+			q.hi = 0;
+		}
+		q.lo = _div64by32(num, den.lo, &r.lo);
+		r.hi = 0;
+	}
+	if(qp) {
+		qp->lo = q.lo;
+		qp->hi = q.hi;
+	}
+	if(rp) {
+		rp->lo = r.lo;
+		rp->hi = r.hi;
+	}
+}
+#endif
+
+Vlong
+_divvu(Vlong n, Vlong d)
+{
+	Vlong q;
+
+	if(n.hi == 0 && d.hi == 0) {
+		if(d.lo == 0)
+			runtime·panicdivide();
+		q.hi = 0;
+		q.lo = n.lo / d.lo;
+		return q;
+	}
+	dodiv(n, d, &q, 0);
+	return q;
+}
+
+Vlong
+runtime·uint64div(Vlong n, Vlong d)
+{
+	return _divvu(n, d);
+}
+
+Vlong
+_modvu(Vlong n, Vlong d)
+{
+	Vlong r;
+
+	if(n.hi == 0 && d.hi == 0) {
+		if(d.lo == 0)
+			runtime·panicdivide();
+		r.hi = 0;
+		r.lo = n.lo % d.lo;
+		return r;
+	}
+	dodiv(n, d, 0, &r);
+	return r;
+}
+
+Vlong
+runtime·uint64mod(Vlong n, Vlong d)
+{
+	return _modvu(n, d);
+}
+
+static void
+vneg(Vlong *v)
+{
+
+	if(v->lo == 0) {
+		v->hi = -v->hi;
+		return;
+	}
+	v->lo = -v->lo;
+	v->hi = ~v->hi;
+}
+
+Vlong
+_divv(Vlong n, Vlong d)
+{
+	long nneg, dneg;
+	Vlong q;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
+			// special case: 32-bit -0x80000000 / -1 causes divide error,
+			// but it's okay in this 64-bit context.
+			q.lo = 0x80000000;
+			q.hi = 0;
+			return q;
+		}
+		if(d.lo == 0)
+			runtime·panicdivide();
+		q.lo = (long)n.lo / (long)d.lo;
+		q.hi = ((long)q.lo) >> 31;
+		return q;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, &q, 0);
+	if(nneg != dneg)
+		vneg(&q);
+	return q;
+}
+
+Vlong
+runtime·int64div(Vlong n, Vlong d)
+{
+	return _divv(n, d);
+}
+
+Vlong
+_modv(Vlong n, Vlong d)
+{
+	long nneg, dneg;
+	Vlong r;
+
+	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
+		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
+			// special case: 32-bit -0x80000000 % -1 causes divide error,
+			// but it's okay in this 64-bit context.
+			r.lo = 0;
+			r.hi = 0;
+			return r;
+		}
+		if(d.lo == 0)
+			runtime·panicdivide();
+		r.lo = (long)n.lo % (long)d.lo;
+		r.hi = ((long)r.lo) >> 31;
+		return r;
+	}
+	nneg = n.hi >> 31;
+	if(nneg)
+		vneg(&n);
+	dneg = d.hi >> 31;
+	if(dneg)
+		vneg(&d);
+	dodiv(n, d, 0, &r);
+	if(nneg)
+		vneg(&r);
+	return r;
+}
+
+Vlong
+runtime·int64mod(Vlong n, Vlong d)
+{
+	return _modv(n, d);
+}
+
+Vlong
+_rshav(Vlong a, int b)
+{
+	long t;
+	Vlong r;
+
+	t = a.hi;
+	if(b >= 32) {
+		r.hi = t>>31;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r.lo = t>>31;
+			return r;
+		}
+		r.lo = t >> (b-32);
+		return r;
+	}
+	if(b <= 0) {
+		r.hi = t;
+		r.lo = a.lo;
+		return r;
+	}
+	r.hi = t >> b;
+	r.lo = (t << (32-b)) | (a.lo >> b);
+	return r;
+}
+
+Vlong
+_rshlv(Vlong a, int b)
+{
+	ulong t;
+	Vlong r;
+
+	t = a.hi;
+	if(b >= 32) {
+		r.hi = 0;
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			r.lo = 0;
+			return r;
+		}
+		r.lo = t >> (b-32);
+		return r;
+	}
+	if(b <= 0) {
+		r.hi = t;
+		r.lo = a.lo;
+		return r;
+	}
+	r.hi = t >> b;
+	r.lo = (t << (32-b)) | (a.lo >> b);
+	return r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_lshv(Vlong a, int b)
+{
+	ulong t;
+
+	t = a.lo;
+	if(b >= 32) {
+		if(b >= 64) {
+			/* this is illegal re C standard */
+			return (Vlong){0, 0};
+		}
+		return (Vlong){0, t<<(b-32)};
+	}
+	if(b <= 0) {
+		return (Vlong){t, a.hi};
+	}
+	return (Vlong){t<<b, (t >> (32-b)) | (a.hi << b)};
+}
+
+Vlong
+_andv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.hi = a.hi & b.hi;
+	r.lo = a.lo & b.lo;
+	return r;
+}
+
+Vlong
+_orv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.hi = a.hi | b.hi;
+	r.lo = a.lo | b.lo;
+	return r;
+}
+
+Vlong
+_xorv(Vlong a, Vlong b)
+{
+	Vlong r;
+
+	r.hi = a.hi ^ b.hi;
+	r.lo = a.lo ^ b.lo;
+	return r;
+}
+
+Vlong
+_vpp(Vlong *r)
+{
+	Vlong l;
+
+	l = *r;
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	return l;
+}
+
+Vlong
+_vmm(Vlong *r)
+{
+	Vlong l;
+
+	l = *r;
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	return l;
+}
+
+Vlong
+_ppv(Vlong *r)
+{
+
+	r->lo++;
+	if(r->lo == 0)
+		r->hi++;
+	return *r;
+}
+
+Vlong
+_mmv(Vlong *r)
+{
+
+	if(r->lo == 0)
+		r->hi--;
+	r->lo--;
+	return *r;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_vasop(void *lv, Vlong fn(Vlong, Vlong), int type, Vlong rv)
+{
+	Vlong t, u;
+
+	u.lo = 0;
+	u.hi = 0;
+	switch(type) {
+	default:
+		runtime·abort();
+		break;
+
+	case 1:	/* schar */
+		t.lo = *(schar*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(schar*)lv = u.lo;
+		break;
+
+	case 2:	/* uchar */
+		t.lo = *(uchar*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(uchar*)lv = u.lo;
+		break;
+
+	case 3:	/* short */
+		t.lo = *(short*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(short*)lv = u.lo;
+		break;
+
+	case 4:	/* ushort */
+		t.lo = *(ushort*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(ushort*)lv = u.lo;
+		break;
+
+	case 9:	/* int */
+		t.lo = *(int*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(int*)lv = u.lo;
+		break;
+
+	case 10:	/* uint */
+		t.lo = *(uint*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(uint*)lv = u.lo;
+		break;
+
+	case 5:	/* long */
+		t.lo = *(long*)lv;
+		t.hi = t.lo >> 31;
+		u = fn(t, rv);
+		*(long*)lv = u.lo;
+		break;
+
+	case 6:	/* ulong */
+		t.lo = *(ulong*)lv;
+		t.hi = 0;
+		u = fn(t, rv);
+		*(ulong*)lv = u.lo;
+		break;
+
+	case 7:	/* vlong */
+	case 8:	/* uvlong */
+		if((void*)fn == _lshv || (void*)fn == _rshav || (void*)fn == _rshlv)
+			u = ((Vlong(*)(Vlong,int))fn)(*(Vlong*)lv, *(int*)&rv);
+		else
+			u = fn(*(Vlong*)lv, rv);
+		*(Vlong*)lv = u;
+		break;
+	}
+	return u;
+}
+
+Vlong
+_p2v(void *p)
+{
+	long t;
+	Vlong ret;
+
+	t = (ulong)p;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+Vlong
+_sl2v(long sl)
+{
+	long t;
+	Vlong ret;
+
+	t = sl;
+	ret.lo = t;
+	ret.hi = t >> 31;
+	return ret;
+}
+
+Vlong
+_ul2v(ulong ul)
+{
+	long t;
+	Vlong ret;
+
+	t = ul;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+#pragma textflag NOSPLIT
+Vlong
+_si2v(int si)
+{
+	return (Vlong){si, si>>31};
+}
+
+Vlong
+_ui2v(uint ui)
+{
+	long t;
+	Vlong ret;
+
+	t = ui;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+Vlong
+_sh2v(long sh)
+{
+	long t;
+	Vlong ret;
+
+	t = (sh << 16) >> 16;
+	ret.lo = t;
+	ret.hi = t >> 31;
+	return ret;
+}
+
+Vlong
+_uh2v(ulong ul)
+{
+	long t;
+	Vlong ret;
+
+	t = ul & 0xffff;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+Vlong
+_sc2v(long uc)
+{
+	long t;
+	Vlong ret;
+
+	t = (uc << 24) >> 24;
+	ret.lo = t;
+	ret.hi = t >> 31;
+	return ret;
+}
+
+Vlong
+_uc2v(ulong ul)
+{
+	long t;
+	Vlong ret;
+
+	t = ul & 0xff;
+	ret.lo = t;
+	ret.hi = 0;
+	return ret;
+}
+
+long
+_v2sc(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xff;
+	return (t << 24) >> 24;
+}
+
+long
+_v2uc(Vlong rv)
+{
+
+	return rv.lo & 0xff;
+}
+
+long
+_v2sh(Vlong rv)
+{
+	long t;
+
+	t = rv.lo & 0xffff;
+	return (t << 16) >> 16;
+}
+
+long
+_v2uh(Vlong rv)
+{
+
+	return rv.lo & 0xffff;
+}
+
+long
+_v2sl(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+long
+_v2ul(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+#pragma textflag NOSPLIT
+long
+_v2si(Vlong rv)
+{
+	return rv.lo;
+}
+
+long
+_v2ui(Vlong rv)
+{
+
+	return rv.lo;
+}
+
+int
+_testv(Vlong rv)
+{
+	return rv.lo || rv.hi;
+}
+
+int
+_eqv(Vlong lv, Vlong rv)
+{
+	return lv.lo == rv.lo && lv.hi == rv.hi;
+}
+
+int
+_nev(Vlong lv, Vlong rv)
+{
+	return lv.lo != rv.lo || lv.hi != rv.hi;
+}
+
+int
+_ltv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi < (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_gtv(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+#pragma textflag NOSPLIT
+int
+_gev(Vlong lv, Vlong rv)
+{
+	return (long)lv.hi > (long)rv.hi ||
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
+
+int
+_lov(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi ||
+		(lv.hi == rv.hi && lv.lo < rv.lo);
+}
+
+int
+_lsv(Vlong lv, Vlong rv)
+{
+	return lv.hi < rv.hi ||
+		(lv.hi == rv.hi && lv.lo <= rv.lo);
+}
+
+int
+_hiv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi ||
+		(lv.hi == rv.hi && lv.lo > rv.lo);
+}
+
+int
+_hsv(Vlong lv, Vlong rv)
+{
+	return lv.hi > rv.hi ||
+		(lv.hi == rv.hi && lv.lo >= rv.lo);
+}
diff --git a/src/pkg/runtime/vlrt_386.c b/src/pkg/runtime/vlrt_386.c
deleted file mode 100644
index bda67b1..0000000
--- a/src/pkg/runtime/vlrt_386.c
+++ /dev/null
@@ -1,819 +0,0 @@
-// Inferno's libkern/vlrt-386.c
-// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-386.c
-//
-//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
-//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
-//         Portions Copyright 2009 The Go Authors. All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-#include "../../cmd/ld/textflag.h"
-
-/*
- * C runtime for 64-bit divide, others.
- *
- * TODO(rsc): The simple functions are dregs--8c knows how
- * to generate the code directly now.  Find and remove.
- */
-
-extern void runtime·panicdivide(void);
-
-typedef	unsigned long	ulong;
-typedef	unsigned int	uint;
-typedef	unsigned short	ushort;
-typedef	unsigned char	uchar;
-typedef	signed char	schar;
-
-#define	SIGN(n)	(1UL<<(n-1))
-
-typedef	union	Vlong	Vlong;
-union	Vlong
-{
-	long long	v;
-	struct
-	{
-		ulong	lo;
-		ulong	hi;
-	}		v2;
-};
-
-void	runtime·abort(void);
-
-void
-_d2v(Vlong *y, double d)
-{
-	union { double d; Vlong vl; } x;
-	ulong xhi, xlo, ylo, yhi;
-	int sh;
-
-	x.d = d;
-
-	xhi = (x.vl.v2.hi & 0xfffff) | 0x100000;
-	xlo = x.vl.v2.lo;
-	sh = 1075 - ((x.vl.v2.hi >> 20) & 0x7ff);
-
-	ylo = 0;
-	yhi = 0;
-	if(sh >= 0) {
-		/* v = (hi||lo) >> sh */
-		if(sh < 32) {
-			if(sh == 0) {
-				ylo = xlo;
-				yhi = xhi;
-			} else {
-				ylo = (xlo >> sh) | (xhi << (32-sh));
-				yhi = xhi >> sh;
-			}
-		} else {
-			if(sh == 32) {
-				ylo = xhi;
-			} else
-			if(sh < 64) {
-				ylo = xhi >> (sh-32);
-			}
-		}
-	} else {
-		/* v = (hi||lo) << -sh */
-		sh = -sh;
-		if(sh <= 10) {
-			ylo = xlo << sh;
-			yhi = (xhi << sh) | (xlo >> (32-sh));
-		} else {
-			/* overflow */
-			yhi = d;	/* causes something awful */
-		}
-	}
-	if(x.vl.v2.hi & SIGN(32)) {
-		if(ylo != 0) {
-			ylo = -ylo;
-			yhi = ~yhi;
-		} else
-			yhi = -yhi;
-	}
-
-	y->v2.hi = yhi;
-	y->v2.lo = ylo;
-}
-
-void
-_f2v(Vlong *y, float f)
-{
-
-	_d2v(y, f);
-}
-
-double
-_v2d(Vlong x)
-{
-	if(x.v2.hi & SIGN(32)) {
-		if(x.v2.lo) {
-			x.v2.lo = -x.v2.lo;
-			x.v2.hi = ~x.v2.hi;
-		} else
-			x.v2.hi = -x.v2.hi;
-		return -((long)x.v2.hi*4294967296. + x.v2.lo);
-	}
-	return (long)x.v2.hi*4294967296. + x.v2.lo;
-}
-
-float
-_v2f(Vlong x)
-{
-	return _v2d(x);
-}
-
-ulong	_div64by32(Vlong, ulong, ulong*);
-int	_mul64by32(Vlong*, Vlong, ulong);
-
-static void
-slowdodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
-{
-	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
-	int i;
-
-	numhi = num.v2.hi;
-	numlo = num.v2.lo;
-	denhi = den.v2.hi;
-	denlo = den.v2.lo;
-
-	/*
-	 * get a divide by zero
-	 */
-	if(denlo==0 && denhi==0) {
-		numlo = numlo / denlo;
-	}
-
-	/*
-	 * set up the divisor and find the number of iterations needed
-	 */
-	if(numhi >= SIGN(32)) {
-		quohi = SIGN(32);
-		quolo = 0;
-	} else {
-		quohi = numhi;
-		quolo = numlo;
-	}
-	i = 0;
-	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
-		denhi = (denhi<<1) | (denlo>>31);
-		denlo <<= 1;
-		i++;
-	}
-
-	quohi = 0;
-	quolo = 0;
-	for(; i >= 0; i--) {
-		quohi = (quohi<<1) | (quolo>>31);
-		quolo <<= 1;
-		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
-			t = numlo;
-			numlo -= denlo;
-			if(numlo > t)
-				numhi--;
-			numhi -= denhi;
-			quolo |= 1;
-		}
-		denlo = (denlo>>1) | (denhi<<31);
-		denhi >>= 1;
-	}
-
-	if(q) {
-		q->v2.lo = quolo;
-		q->v2.hi = quohi;
-	}
-	if(r) {
-		r->v2.lo = numlo;
-		r->v2.hi = numhi;
-	}
-}
-
-static void
-dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
-{
-	ulong n;
-	Vlong x, q, r;
-
-	if(den.v2.hi > num.v2.hi || (den.v2.hi == num.v2.hi && den.v2.lo > num.v2.lo)){
-		if(qp) {
-			qp->v2.hi = 0;
-			qp->v2.lo = 0;
-		}
-		if(rp) {
-			rp->v2.hi = num.v2.hi;
-			rp->v2.lo = num.v2.lo;
-		}
-		return;
-	}
-
-	if(den.v2.hi != 0){
-		q.v2.hi = 0;
-		n = num.v2.hi/den.v2.hi;
-		if(_mul64by32(&x, den, n) || x.v2.hi > num.v2.hi || (x.v2.hi == num.v2.hi && x.v2.lo > num.v2.lo))
-			slowdodiv(num, den, &q, &r);
-		else {
-			q.v2.lo = n;
-			r.v = num.v - x.v;
-		}
-	} else {
-		if(num.v2.hi >= den.v2.lo){
-			if(den.v2.lo == 0)
-				runtime·panicdivide();
-			q.v2.hi = n = num.v2.hi/den.v2.lo;
-			num.v2.hi -= den.v2.lo*n;
-		} else {
-			q.v2.hi = 0;
-		}
-		q.v2.lo = _div64by32(num, den.v2.lo, &r.v2.lo);
-		r.v2.hi = 0;
-	}
-	if(qp) {
-		qp->v2.lo = q.v2.lo;
-		qp->v2.hi = q.v2.hi;
-	}
-	if(rp) {
-		rp->v2.lo = r.v2.lo;
-		rp->v2.hi = r.v2.hi;
-	}
-}
-
-void
-_divvu(Vlong *q, Vlong n, Vlong d)
-{
-
-	if(n.v2.hi == 0 && d.v2.hi == 0) {
-		if(d.v2.lo == 0)
-			runtime·panicdivide();
-		q->v2.hi = 0;
-		q->v2.lo = n.v2.lo / d.v2.lo;
-		return;
-	}
-	dodiv(n, d, q, 0);
-}
-
-void
-runtime·uint64div(Vlong n, Vlong d, Vlong q)
-{
-	_divvu(&q, n, d);
-}
-
-void
-_modvu(Vlong *r, Vlong n, Vlong d)
-{
-
-	if(n.v2.hi == 0 && d.v2.hi == 0) {
-		if(d.v2.lo == 0)
-			runtime·panicdivide();
-		r->v2.hi = 0;
-		r->v2.lo = n.v2.lo % d.v2.lo;
-		return;
-	}
-	dodiv(n, d, 0, r);
-}
-
-void
-runtime·uint64mod(Vlong n, Vlong d, Vlong q)
-{
-	_modvu(&q, n, d);
-}
-
-static void
-vneg(Vlong *v)
-{
-
-	if(v->v2.lo == 0) {
-		v->v2.hi = -v->v2.hi;
-		return;
-	}
-	v->v2.lo = -v->v2.lo;
-	v->v2.hi = ~v->v2.hi;
-}
-
-void
-_divv(Vlong *q, Vlong n, Vlong d)
-{
-	long nneg, dneg;
-
-	if(n.v2.hi == (((long)n.v2.lo)>>31) && d.v2.hi == (((long)d.v2.lo)>>31)) {
-		if((long)n.v2.lo == -0x80000000 && (long)d.v2.lo == -1) {
-			// special case: 32-bit -0x80000000 / -1 causes divide error,
-			// but it's okay in this 64-bit context.
-			q->v2.lo = 0x80000000;
-			q->v2.hi = 0;
-			return;
-		}
-		if(d.v2.lo == 0)
-			runtime·panicdivide();
-		q->v2.lo = (long)n.v2.lo / (long)d.v2.lo;
-		q->v2.hi = ((long)q->v2.lo) >> 31;
-		return;
-	}
-	nneg = n.v2.hi >> 31;
-	if(nneg)
-		vneg(&n);
-	dneg = d.v2.hi >> 31;
-	if(dneg)
-		vneg(&d);
-	dodiv(n, d, q, 0);
-	if(nneg != dneg)
-		vneg(q);
-}
-
-void
-runtime·int64div(Vlong n, Vlong d, Vlong q)
-{
-	_divv(&q, n, d);
-}
-
-void
-_modv(Vlong *r, Vlong n, Vlong d)
-{
-	long nneg, dneg;
-
-	if(n.v2.hi == (((long)n.v2.lo)>>31) && d.v2.hi == (((long)d.v2.lo)>>31)) {
-		if((long)n.v2.lo == -0x80000000 && (long)d.v2.lo == -1) {
-			// special case: 32-bit -0x80000000 % -1 causes divide error,
-			// but it's okay in this 64-bit context.
-			r->v2.lo = 0;
-			r->v2.hi = 0;
-			return;
-		}
-		if(d.v2.lo == 0)
-			runtime·panicdivide();
-		r->v2.lo = (long)n.v2.lo % (long)d.v2.lo;
-		r->v2.hi = ((long)r->v2.lo) >> 31;
-		return;
-	}
-	nneg = n.v2.hi >> 31;
-	if(nneg)
-		vneg(&n);
-	dneg = d.v2.hi >> 31;
-	if(dneg)
-		vneg(&d);
-	dodiv(n, d, 0, r);
-	if(nneg)
-		vneg(r);
-}
-
-void
-runtime·int64mod(Vlong n, Vlong d, Vlong q)
-{
-	_modv(&q, n, d);
-}
-
-void
-_rshav(Vlong *r, Vlong a, int b)
-{
-	long t;
-
-	t = a.v2.hi;
-	if(b >= 32) {
-		r->v2.hi = t>>31;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->v2.lo = t>>31;
-			return;
-		}
-		r->v2.lo = t >> (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->v2.hi = t;
-		r->v2.lo = a.v2.lo;
-		return;
-	}
-	r->v2.hi = t >> b;
-	r->v2.lo = (t << (32-b)) | (a.v2.lo >> b);
-}
-
-void
-_rshlv(Vlong *r, Vlong a, int b)
-{
-	ulong t;
-
-	t = a.v2.hi;
-	if(b >= 32) {
-		r->v2.hi = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->v2.lo = 0;
-			return;
-		}
-		r->v2.lo = t >> (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->v2.hi = t;
-		r->v2.lo = a.v2.lo;
-		return;
-	}
-	r->v2.hi = t >> b;
-	r->v2.lo = (t << (32-b)) | (a.v2.lo >> b);
-}
-
-#pragma textflag NOSPLIT
-void
-_lshv(Vlong *r, Vlong a, int b)
-{
-	ulong t;
-
-	t = a.v2.lo;
-	if(b >= 32) {
-		r->v2.lo = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->v2.hi = 0;
-			return;
-		}
-		r->v2.hi = t << (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->v2.lo = t;
-		r->v2.hi = a.v2.hi;
-		return;
-	}
-	r->v2.lo = t << b;
-	r->v2.hi = (t >> (32-b)) | (a.v2.hi << b);
-}
-
-void
-_andv(Vlong *r, Vlong a, Vlong b)
-{
-	r->v2.hi = a.v2.hi & b.v2.hi;
-	r->v2.lo = a.v2.lo & b.v2.lo;
-}
-
-void
-_orv(Vlong *r, Vlong a, Vlong b)
-{
-	r->v2.hi = a.v2.hi | b.v2.hi;
-	r->v2.lo = a.v2.lo | b.v2.lo;
-}
-
-void
-_xorv(Vlong *r, Vlong a, Vlong b)
-{
-	r->v2.hi = a.v2.hi ^ b.v2.hi;
-	r->v2.lo = a.v2.lo ^ b.v2.lo;
-}
-
-void
-_vpp(Vlong *l, Vlong *r)
-{
-
-	l->v2.hi = r->v2.hi;
-	l->v2.lo = r->v2.lo;
-	r->v2.lo++;
-	if(r->v2.lo == 0)
-		r->v2.hi++;
-}
-
-void
-_vmm(Vlong *l, Vlong *r)
-{
-
-	l->v2.hi = r->v2.hi;
-	l->v2.lo = r->v2.lo;
-	if(r->v2.lo == 0)
-		r->v2.hi--;
-	r->v2.lo--;
-}
-
-void
-_ppv(Vlong *l, Vlong *r)
-{
-
-	r->v2.lo++;
-	if(r->v2.lo == 0)
-		r->v2.hi++;
-	l->v2.hi = r->v2.hi;
-	l->v2.lo = r->v2.lo;
-}
-
-void
-_mmv(Vlong *l, Vlong *r)
-{
-
-	if(r->v2.lo == 0)
-		r->v2.hi--;
-	r->v2.lo--;
-	l->v2.hi = r->v2.hi;
-	l->v2.lo = r->v2.lo;
-}
-
-void
-_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
-{
-	Vlong t, u;
-
-	u.v2.lo = 0;
-	u.v2.hi = 0;
-	switch(type) {
-	default:
-		runtime·abort();
-		break;
-
-	case 1:	/* schar */
-		t.v2.lo = *(schar*)lv;
-		t.v2.hi = t.v2.lo >> 31;
-		fn(&u, t, rv);
-		*(schar*)lv = u.v2.lo;
-		break;
-
-	case 2:	/* uchar */
-		t.v2.lo = *(uchar*)lv;
-		t.v2.hi = 0;
-		fn(&u, t, rv);
-		*(uchar*)lv = u.v2.lo;
-		break;
-
-	case 3:	/* short */
-		t.v2.lo = *(short*)lv;
-		t.v2.hi = t.v2.lo >> 31;
-		fn(&u, t, rv);
-		*(short*)lv = u.v2.lo;
-		break;
-
-	case 4:	/* ushort */
-		t.v2.lo = *(ushort*)lv;
-		t.v2.hi = 0;
-		fn(&u, t, rv);
-		*(ushort*)lv = u.v2.lo;
-		break;
-
-	case 9:	/* int */
-		t.v2.lo = *(int*)lv;
-		t.v2.hi = t.v2.lo >> 31;
-		fn(&u, t, rv);
-		*(int*)lv = u.v2.lo;
-		break;
-
-	case 10:	/* uint */
-		t.v2.lo = *(uint*)lv;
-		t.v2.hi = 0;
-		fn(&u, t, rv);
-		*(uint*)lv = u.v2.lo;
-		break;
-
-	case 5:	/* long */
-		t.v2.lo = *(long*)lv;
-		t.v2.hi = t.v2.lo >> 31;
-		fn(&u, t, rv);
-		*(long*)lv = u.v2.lo;
-		break;
-
-	case 6:	/* ulong */
-		t.v2.lo = *(ulong*)lv;
-		t.v2.hi = 0;
-		fn(&u, t, rv);
-		*(ulong*)lv = u.v2.lo;
-		break;
-
-	case 7:	/* vlong */
-	case 8:	/* uvlong */
-		fn(&u, *(Vlong*)lv, rv);
-		*(Vlong*)lv = u;
-		break;
-	}
-	*ret = u;
-}
-
-void
-_p2v(Vlong *ret, void *p)
-{
-	long t;
-
-	t = (ulong)p;
-	ret->v2.lo = t;
-	ret->v2.hi = 0;
-}
-
-void
-_sl2v(Vlong *ret, long sl)
-{
-	long t;
-
-	t = sl;
-	ret->v2.lo = t;
-	ret->v2.hi = t >> 31;
-}
-
-void
-_ul2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul;
-	ret->v2.lo = t;
-	ret->v2.hi = 0;
-}
-
-void
-_si2v(Vlong *ret, int si)
-{
-	long t;
-
-	t = si;
-	ret->v2.lo = t;
-	ret->v2.hi = t >> 31;
-}
-
-void
-_ui2v(Vlong *ret, uint ui)
-{
-	long t;
-
-	t = ui;
-	ret->v2.lo = t;
-	ret->v2.hi = 0;
-}
-
-void
-_sh2v(Vlong *ret, long sh)
-{
-	long t;
-
-	t = (sh << 16) >> 16;
-	ret->v2.lo = t;
-	ret->v2.hi = t >> 31;
-}
-
-void
-_uh2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul & 0xffff;
-	ret->v2.lo = t;
-	ret->v2.hi = 0;
-}
-
-void
-_sc2v(Vlong *ret, long uc)
-{
-	long t;
-
-	t = (uc << 24) >> 24;
-	ret->v2.lo = t;
-	ret->v2.hi = t >> 31;
-}
-
-void
-_uc2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul & 0xff;
-	ret->v2.lo = t;
-	ret->v2.hi = 0;
-}
-
-long
-_v2sc(Vlong rv)
-{
-	long t;
-
-	t = rv.v2.lo & 0xff;
-	return (t << 24) >> 24;
-}
-
-long
-_v2uc(Vlong rv)
-{
-
-	return rv.v2.lo & 0xff;
-}
-
-long
-_v2sh(Vlong rv)
-{
-	long t;
-
-	t = rv.v2.lo & 0xffff;
-	return (t << 16) >> 16;
-}
-
-long
-_v2uh(Vlong rv)
-{
-
-	return rv.v2.lo & 0xffff;
-}
-
-long
-_v2sl(Vlong rv)
-{
-
-	return rv.v2.lo;
-}
-
-long
-_v2ul(Vlong rv)
-{
-
-	return rv.v2.lo;
-}
-
-long
-_v2si(Vlong rv)
-{
-
-	return rv.v2.lo;
-}
-
-long
-_v2ui(Vlong rv)
-{
-
-	return rv.v2.lo;
-}
-
-int
-_testv(Vlong rv)
-{
-	return rv.v2.lo || rv.v2.hi;
-}
-
-int
-_eqv(Vlong lv, Vlong rv)
-{
-	return lv.v2.lo == rv.v2.lo && lv.v2.hi == rv.v2.hi;
-}
-
-int
-_nev(Vlong lv, Vlong rv)
-{
-	return lv.v2.lo != rv.v2.lo || lv.v2.hi != rv.v2.hi;
-}
-
-int
-_ltv(Vlong lv, Vlong rv)
-{
-	return (long)lv.v2.hi < (long)rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo < rv.v2.lo);
-}
-
-int
-_lev(Vlong lv, Vlong rv)
-{
-	return (long)lv.v2.hi < (long)rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo <= rv.v2.lo);
-}
-
-int
-_gtv(Vlong lv, Vlong rv)
-{
-	return (long)lv.v2.hi > (long)rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo > rv.v2.lo);
-}
-
-int
-_gev(Vlong lv, Vlong rv)
-{
-	return (long)lv.v2.hi > (long)rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo >= rv.v2.lo);
-}
-
-int
-_lov(Vlong lv, Vlong rv)
-{
-	return lv.v2.hi < rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo < rv.v2.lo);
-}
-
-int
-_lsv(Vlong lv, Vlong rv)
-{
-	return lv.v2.hi < rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo <= rv.v2.lo);
-}
-
-int
-_hiv(Vlong lv, Vlong rv)
-{
-	return lv.v2.hi > rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo > rv.v2.lo);
-}
-
-int
-_hsv(Vlong lv, Vlong rv)
-{
-	return lv.v2.hi > rv.v2.hi ||
-		(lv.v2.hi == rv.v2.hi && lv.v2.lo >= rv.v2.lo);
-}
diff --git a/src/pkg/runtime/vlrt_arm.c b/src/pkg/runtime/vlrt_arm.c
deleted file mode 100644
index b342a3e..0000000
--- a/src/pkg/runtime/vlrt_arm.c
+++ /dev/null
@@ -1,769 +0,0 @@
-// Inferno's libkern/vlrt-arm.c
-// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-arm.c
-//
-//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
-//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
-//         Portions Copyright 2009 The Go Authors. All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-#include "../../cmd/ld/textflag.h"
-
-// declared here to avoid include of runtime.h
-void	runtime·panicstring(char*);
-void	runtime·panicdivide(void);
-
-typedef unsigned long   ulong;
-typedef unsigned int    uint;
-typedef unsigned short  ushort;
-typedef unsigned char   uchar;
-typedef signed char     schar;
-
-#define SIGN(n) (1UL<<(n-1))
-
-typedef struct  Vlong   Vlong;
-struct  Vlong
-{
-	ulong   lo;
-	ulong   hi;
-};
-
-void    runtime·abort(void);
-
-#pragma textflag NOSPLIT
-void
-_addv(Vlong *r, Vlong a, Vlong b)
-{
-	r->lo = a.lo + b.lo;
-	r->hi = a.hi + b.hi;
-	if(r->lo < a.lo)
-		r->hi++;
-}
-
-#pragma textflag NOSPLIT
-void
-_subv(Vlong *r, Vlong a, Vlong b)
-{
-	r->lo = a.lo - b.lo;
-	r->hi = a.hi - b.hi;
-	if(r->lo > a.lo)
-		r->hi--;
-}
-
-void
-_d2v(Vlong *y, double d)
-{
-	union { double d; Vlong vl; } x;
-	ulong xhi, xlo, ylo, yhi;
-	int sh;
-
-	x.d = d;
-
-	xhi = (x.vl.hi & 0xfffff) | 0x100000;
-	xlo = x.vl.lo;
-	sh = 1075 - ((x.vl.hi >> 20) & 0x7ff);
-
-	ylo = 0;
-	yhi = 0;
-	if(sh >= 0) {
-		/* v = (hi||lo) >> sh */
-		if(sh < 32) {
-			if(sh == 0) {
-				ylo = xlo;
-				yhi = xhi;
-			} else {
-				ylo = (xlo >> sh) | (xhi << (32-sh));
-				yhi = xhi >> sh;
-			}
-		} else {
-			if(sh == 32) {
-				ylo = xhi;
-			} else
-			if(sh < 64) {
-				ylo = xhi >> (sh-32);
-			}
-		}
-	} else {
-		/* v = (hi||lo) << -sh */
-		sh = -sh;
-		if(sh <= 11) {
-			ylo = xlo << sh;
-			yhi = (xhi << sh) | (xlo >> (32-sh));
-		} else {
-			/* overflow */
-			yhi = d;        /* causes something awful */
-		}
-	}
-	if(x.vl.hi & SIGN(32)) {
-		if(ylo != 0) {
-			ylo = -ylo;
-			yhi = ~yhi;
-		} else
-			yhi = -yhi;
-	}
-
-	y->hi = yhi;
-	y->lo = ylo;
-}
-
-void
-_f2v(Vlong *y, float f)
-{
-	_d2v(y, f);
-}
-
-double
-_ul2d(ulong u)
-{
-	// compensate for bug in c
-	if(u & SIGN(32)) {
-		u ^= SIGN(32);
-		return 2147483648. + u;
-	}
-	return u;
-}
-
-double
-_v2d(Vlong x)
-{
-	if(x.hi & SIGN(32)) {
-		if(x.lo) {
-			x.lo = -x.lo;
-			x.hi = ~x.hi;
-		} else
-			x.hi = -x.hi;
-		return -(_ul2d(x.hi)*4294967296. + _ul2d(x.lo));
-	}
-	return x.hi*4294967296. + _ul2d(x.lo);
-}
-
-float
-_v2f(Vlong x)
-{
-	return _v2d(x);
-}
-
-static void
-dodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
-{
-	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
-	int i;
-
-	numhi = num.hi;
-	numlo = num.lo;
-	denhi = den.hi;
-	denlo = den.lo;
-
-	/*
-	 * get a divide by zero
-	 */
-	if(denlo==0 && denhi==0) {
-		runtime·panicdivide();
-	}
-
-	/*
-	 * set up the divisor and find the number of iterations needed
-	 */
-	if(numhi >= SIGN(32)) {
-		quohi = SIGN(32);
-		quolo = 0;
-	} else {
-		quohi = numhi;
-		quolo = numlo;
-	}
-	i = 0;
-	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
-		denhi = (denhi<<1) | (denlo>>31);
-		denlo <<= 1;
-		i++;
-	}
-
-	quohi = 0;
-	quolo = 0;
-	for(; i >= 0; i--) {
-		quohi = (quohi<<1) | (quolo>>31);
-		quolo <<= 1;
-		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
-			t = numlo;
-			numlo -= denlo;
-			if(numlo > t)
-				numhi--;
-			numhi -= denhi;
-			quolo |= 1;
-		}
-		denlo = (denlo>>1) | (denhi<<31);
-		denhi >>= 1;
-	}
-
-	if(q) {
-		q->lo = quolo;
-		q->hi = quohi;
-	}
-	if(r) {
-		r->lo = numlo;
-		r->hi = numhi;
-	}
-}
-
-void
-_divvu(Vlong *q, Vlong n, Vlong d)
-{
-
-	if(n.hi == 0 && d.hi == 0) {
-		q->hi = 0;
-		q->lo = n.lo / d.lo;
-		return;
-	}
-	dodiv(n, d, q, 0);
-}
-
-void
-runtime·uint64div(Vlong n, Vlong d, Vlong q)
-{
-	_divvu(&q, n, d);
-}
-
-void
-_modvu(Vlong *r, Vlong n, Vlong d)
-{
-
-	if(n.hi == 0 && d.hi == 0) {
-		r->hi = 0;
-		r->lo = n.lo % d.lo;
-		return;
-	}
-	dodiv(n, d, 0, r);
-}
-
-void
-runtime·uint64mod(Vlong n, Vlong d, Vlong q)
-{
-	_modvu(&q, n, d);
-}
-
-static void
-vneg(Vlong *v)
-{
-
-	if(v->lo == 0) {
-		v->hi = -v->hi;
-		return;
-	}
-	v->lo = -v->lo;
-	v->hi = ~v->hi;
-}
-
-void
-_divv(Vlong *q, Vlong n, Vlong d)
-{
-	long nneg, dneg;
-
-	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
-		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
-			// special case: 32-bit -0x80000000 / -1 causes wrong sign
-			q->lo = 0x80000000;
-			q->hi = 0;
-			return;
-		}
-		q->lo = (long)n.lo / (long)d.lo;
-		q->hi = ((long)q->lo) >> 31;
-		return;
-	}
-	nneg = n.hi >> 31;
-	if(nneg)
-		vneg(&n);
-	dneg = d.hi >> 31;
-	if(dneg)
-		vneg(&d);
-	dodiv(n, d, q, 0);
-	if(nneg != dneg)
-		vneg(q);
-}
-
-void
-runtime·int64div(Vlong n, Vlong d, Vlong q)
-{
-	_divv(&q, n, d);
-}
-
-void
-_modv(Vlong *r, Vlong n, Vlong d)
-{
-	long nneg, dneg;
-
-	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
-		r->lo = (long)n.lo % (long)d.lo;
-		r->hi = ((long)r->lo) >> 31;
-		return;
-	}
-	nneg = n.hi >> 31;
-	if(nneg)
-		vneg(&n);
-	dneg = d.hi >> 31;
-	if(dneg)
-		vneg(&d);
-	dodiv(n, d, 0, r);
-	if(nneg)
-		vneg(r);
-}
-
-void
-runtime·int64mod(Vlong n, Vlong d, Vlong q)
-{
-	_modv(&q, n, d);
-}
-
-void
-_rshav(Vlong *r, Vlong a, int b)
-{
-	long t;
-
-	t = a.hi;
-	if(b >= 32) {
-		r->hi = t>>31;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->lo = t>>31;
-			return;
-		}
-		r->lo = t >> (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->hi = t;
-		r->lo = a.lo;
-		return;
-	}
-	r->hi = t >> b;
-	r->lo = (t << (32-b)) | (a.lo >> b);
-}
-
-void
-_rshlv(Vlong *r, Vlong a, int b)
-{
-	ulong t;
-
-	t = a.hi;
-	if(b >= 32) {
-		r->hi = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->lo = 0;
-			return;
-		}
-		r->lo = t >> (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->hi = t;
-		r->lo = a.lo;
-		return;
-	}
-	r->hi = t >> b;
-	r->lo = (t << (32-b)) | (a.lo >> b);
-}
-
-#pragma textflag NOSPLIT
-void
-_lshv(Vlong *r, Vlong a, int b)
-{
-	if(b >= 32) {
-		r->lo = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r->hi = 0;
-			return;
-		}
-		r->hi = a.lo << (b-32);
-		return;
-	}
-	if(b <= 0) {
-		r->lo = a.lo;
-		r->hi = a.hi;
-		return;
-	}
-	r->lo = a.lo << b;
-	r->hi = (a.lo >> (32-b)) | (a.hi << b);
-}
-
-void
-_andv(Vlong *r, Vlong a, Vlong b)
-{
-	r->hi = a.hi & b.hi;
-	r->lo = a.lo & b.lo;
-}
-
-void
-_orv(Vlong *r, Vlong a, Vlong b)
-{
-	r->hi = a.hi | b.hi;
-	r->lo = a.lo | b.lo;
-}
-
-void
-_xorv(Vlong *r, Vlong a, Vlong b)
-{
-	r->hi = a.hi ^ b.hi;
-	r->lo = a.lo ^ b.lo;
-}
-
-void
-_vpp(Vlong *l, Vlong *r)
-{
-
-	l->hi = r->hi;
-	l->lo = r->lo;
-	r->lo++;
-	if(r->lo == 0)
-		r->hi++;
-}
-
-void
-_vmm(Vlong *l, Vlong *r)
-{
-
-	l->hi = r->hi;
-	l->lo = r->lo;
-	if(r->lo == 0)
-		r->hi--;
-	r->lo--;
-}
-
-void
-_ppv(Vlong *l, Vlong *r)
-{
-
-	r->lo++;
-	if(r->lo == 0)
-		r->hi++;
-	l->hi = r->hi;
-	l->lo = r->lo;
-}
-
-void
-_mmv(Vlong *l, Vlong *r)
-{
-
-	if(r->lo == 0)
-		r->hi--;
-	r->lo--;
-	l->hi = r->hi;
-	l->lo = r->lo;
-}
-
-#pragma textflag NOSPLIT
-void
-_vasop(Vlong *ret, void *lv, void fn(Vlong*, Vlong, Vlong), int type, Vlong rv)
-{
-	Vlong t, u;
-
-	u = *ret;
-	switch(type) {
-	default:
-		runtime·abort();
-		break;
-
-	case 1: /* schar */
-		t.lo = *(schar*)lv;
-		t.hi = t.lo >> 31;
-		fn(&u, t, rv);
-		*(schar*)lv = u.lo;
-		break;
-
-	case 2: /* uchar */
-		t.lo = *(uchar*)lv;
-		t.hi = 0;
-		fn(&u, t, rv);
-		*(uchar*)lv = u.lo;
-		break;
-
-	case 3: /* short */
-		t.lo = *(short*)lv;
-		t.hi = t.lo >> 31;
-		fn(&u, t, rv);
-		*(short*)lv = u.lo;
-		break;
-
-	case 4: /* ushort */
-		t.lo = *(ushort*)lv;
-		t.hi = 0;
-		fn(&u, t, rv);
-		*(ushort*)lv = u.lo;
-		break;
-
-	case 9: /* int */
-		t.lo = *(int*)lv;
-		t.hi = t.lo >> 31;
-		fn(&u, t, rv);
-		*(int*)lv = u.lo;
-		break;
-
-	case 10:        /* uint */
-		t.lo = *(uint*)lv;
-		t.hi = 0;
-		fn(&u, t, rv);
-		*(uint*)lv = u.lo;
-		break;
-
-	case 5: /* long */
-		t.lo = *(long*)lv;
-		t.hi = t.lo >> 31;
-		fn(&u, t, rv);
-		*(long*)lv = u.lo;
-		break;
-
-	case 6: /* ulong */
-		t.lo = *(ulong*)lv;
-		t.hi = 0;
-		fn(&u, t, rv);
-		*(ulong*)lv = u.lo;
-		break;
-
-	case 7: /* vlong */
-	case 8: /* uvlong */
-		fn(&u, *(Vlong*)lv, rv);
-		*(Vlong*)lv = u;
-		break;
-	}
-	*ret = u;
-}
-
-void
-_p2v(Vlong *ret, void *p)
-{
-	long t;
-
-	t = (ulong)p;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_sl2v(Vlong *ret, long sl)
-{
-	long t;
-
-	t = sl;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_ul2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-#pragma textflag NOSPLIT
-void
-_si2v(Vlong *ret, int si)
-{
-	ret->lo = (long)si;
-	ret->hi = (long)si >> 31;
-}
-
-void
-_ui2v(Vlong *ret, uint ui)
-{
-	long t;
-
-	t = ui;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_sh2v(Vlong *ret, long sh)
-{
-	long t;
-
-	t = (sh << 16) >> 16;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_uh2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul & 0xffff;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-void
-_sc2v(Vlong *ret, long uc)
-{
-	long t;
-
-	t = (uc << 24) >> 24;
-	ret->lo = t;
-	ret->hi = t >> 31;
-}
-
-void
-_uc2v(Vlong *ret, ulong ul)
-{
-	long t;
-
-	t = ul & 0xff;
-	ret->lo = t;
-	ret->hi = 0;
-}
-
-long
-_v2sc(Vlong rv)
-{
-	long t;
-
-	t = rv.lo & 0xff;
-	return (t << 24) >> 24;
-}
-
-long
-_v2uc(Vlong rv)
-{
-
-	return rv.lo & 0xff;
-}
-
-long
-_v2sh(Vlong rv)
-{
-	long t;
-
-	t = rv.lo & 0xffff;
-	return (t << 16) >> 16;
-}
-
-long
-_v2uh(Vlong rv)
-{
-
-	return rv.lo & 0xffff;
-}
-
-long
-_v2sl(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-long
-_v2ul(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2si(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-long
-_v2ui(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-int
-_testv(Vlong rv)
-{
-	return rv.lo || rv.hi;
-}
-
-int
-_eqv(Vlong lv, Vlong rv)
-{
-	return lv.lo == rv.lo && lv.hi == rv.hi;
-}
-
-int
-_nev(Vlong lv, Vlong rv)
-{
-	return lv.lo != rv.lo || lv.hi != rv.hi;
-}
-
-int
-_ltv(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi < (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo < rv.lo);
-}
-
-int
-_lev(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi < (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo <= rv.lo);
-}
-
-int
-_gtv(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi > (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo > rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_gev(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi > (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo >= rv.lo);
-}
-
-int
-_lov(Vlong lv, Vlong rv)
-{
-	return lv.hi < rv.hi ||
-		(lv.hi == rv.hi && lv.lo < rv.lo);
-}
-
-int
-_lsv(Vlong lv, Vlong rv)
-{
-	return lv.hi < rv.hi ||
-		(lv.hi == rv.hi && lv.lo <= rv.lo);
-}
-
-int
-_hiv(Vlong lv, Vlong rv)
-{
-	return lv.hi > rv.hi ||
-		(lv.hi == rv.hi && lv.lo > rv.lo);
-}
-
-int
-_hsv(Vlong lv, Vlong rv)
-{
-	return lv.hi > rv.hi ||
-		(lv.hi == rv.hi && lv.lo >= rv.lo);
-}
diff --git a/src/pkg/sync/atomic/asm_linux_arm.s b/src/pkg/sync/atomic/asm_linux_arm.s
index bfcfd79..63f1f9e 100644
--- a/src/pkg/sync/atomic/asm_linux_arm.s
+++ b/src/pkg/sync/atomic/asm_linux_arm.s
@@ -121,28 +121,8 @@
 	MOVW	R0, 20(FP)
 	RET
 
-TEXT ·generalCAS64(SB),NOSPLIT,$20-21
-	// bool runtime·cas64(uint64 volatile *addr, uint64 old, uint64 new)
-	MOVW	addr+0(FP), R0
-	// trigger potential paging fault here,
-	// because a fault in runtime.cas64 will hang.
-	MOVW	(R0), R2
-	// make unaligned atomic access panic
-	AND.S	$7, R0, R1
-	BEQ 	2(PC)
-	MOVW	R1, (R1)
-	MOVW	R0, 4(R13)
-	MOVW	old_lo+4(FP), R1
-	MOVW	R1, 8(R13)
-	MOVW	old_hi+8(FP), R1
-	MOVW	R1, 12(R13)
-	MOVW	new_lo+12(FP), R2
-	MOVW	R2, 16(R13)
-	MOVW	new_hi+16(FP), R3
-	MOVW	R3, 20(R13)
-	BL  	runtime·cas64(SB)
-	MOVB	R0, ret+20(FP)
-	RET
+TEXT ·generalCAS64(SB),NOSPLIT,$0-21
+	B  	runtime·cas64(SB)
 
 GLOBL armCAS64(SB), $4