runtime: implement string ops in Go

Also implement go:nosplit annotation.  Not really needed
for now, but we'll definitely need it for other conversions.

benchmark                 old ns/op     new ns/op     delta
BenchmarkRuneIterate      534           474           -11.24%
BenchmarkRuneIterate2     535           470           -12.15%

LGTM=bradfitz
R=golang-codereviews, dave, bradfitz, minux
CC=golang-codereviews
https://golang.org/cl/93380044
diff --git a/src/cmd/gc/fmt.c b/src/cmd/gc/fmt.c
index b5f8a83..8c2f8b9 100644
--- a/src/cmd/gc/fmt.c
+++ b/src/cmd/gc/fmt.c
@@ -649,7 +649,7 @@
 
 		if(t->funarg) {
 			fmtstrcpy(fp, "(");
-			if(fmtmode == FTypeId || fmtmode == FErr) {	// no argument names on function signature, and no "noescape" tags
+			if(fmtmode == FTypeId || fmtmode == FErr) {	// no argument names on function signature, and no "noescape"/"nosplit" tags
 				for(t1=t->type; t1!=T; t1=t1->down)
 					if(t1->down)
 						fmtprint(fp, "%hT, ", t1);
diff --git a/src/cmd/gc/go.h b/src/cmd/gc/go.h
index 413e710..a5a464e 100644
--- a/src/cmd/gc/go.h
+++ b/src/cmd/gc/go.h
@@ -269,6 +269,7 @@
 	uchar	colas;		// OAS resulting from :=
 	uchar	diag;		// already printed error about this
 	uchar	noescape;	// func arguments do not escape
+	uchar	nosplit;	// func should not execute on separate stack
 	uchar	builtin;	// built-in name, like len or close
 	uchar	walkdef;
 	uchar	typecheck;
@@ -980,6 +981,7 @@
 EXTERN	int	flag_race;
 EXTERN	int	flag_largemodel;
 EXTERN	int	noescape;
+EXTERN	int	nosplit;
 EXTERN	int	debuglive;
 EXTERN	Link*	ctxt;
 
diff --git a/src/cmd/gc/go.y b/src/cmd/gc/go.y
index 2f354f72..e351fa7 100644
--- a/src/cmd/gc/go.y
+++ b/src/cmd/gc/go.y
@@ -1311,6 +1311,7 @@
 		$$->nbody = $3;
 		$$->endlineno = lineno;
 		$$->noescape = noescape;
+		$$->nosplit = nosplit;
 		funcbody($$);
 	}
 
@@ -1495,6 +1496,7 @@
 			testdclstack();
 		nointerface = 0;
 		noescape = 0;
+		nosplit = 0;
 	}
 
 vardcl_list:
diff --git a/src/cmd/gc/lex.c b/src/cmd/gc/lex.c
index a50101c..c90cbef 100644
--- a/src/cmd/gc/lex.c
+++ b/src/cmd/gc/lex.c
@@ -1592,6 +1592,10 @@
 		noescape = 1;
 		goto out;
 	}
+	if(strcmp(lexbuf, "go:nosplit") == 0) {
+		nosplit = 1;
+		goto out;
+	}
 	
 out:
 	return c;
diff --git a/src/cmd/gc/pgen.c b/src/cmd/gc/pgen.c
index 40620c3..cabba68 100644
--- a/src/cmd/gc/pgen.c
+++ b/src/cmd/gc/pgen.c
@@ -229,6 +229,8 @@
 		ptxt->TEXTFLAG |= WRAPPER;
 	if(fn->needctxt)
 		ptxt->TEXTFLAG |= NEEDCTXT;
+	if(fn->nosplit)
+		ptxt->TEXTFLAG |= NOSPLIT;
 
 	// Clumsy but important.
 	// See test/recover.go for test cases and src/pkg/reflect/value.go
diff --git a/src/cmd/gc/y.tab.c b/src/cmd/gc/y.tab.c
index 08d8ecf..c389f23 100644
--- a/src/cmd/gc/y.tab.c
+++ b/src/cmd/gc/y.tab.c
@@ -3828,6 +3828,7 @@
 		(yyval.node)->nbody = (yyvsp[(3) - (3)].list);
 		(yyval.node)->endlineno = lineno;
 		(yyval.node)->noescape = noescape;
+		(yyval.node)->nosplit = nosplit;
 		funcbody((yyval.node));
 	}
     break;
@@ -4037,6 +4038,7 @@
 			testdclstack();
 		nointerface = 0;
 		noescape = 0;
+		nosplit = 0;
 	}
     break;
 
diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s
index 088eecf..2009a60 100644
--- a/src/pkg/runtime/asm_386.s
+++ b/src/pkg/runtime/asm_386.s
@@ -781,6 +781,12 @@
 	MOVL	-4(AX),AX		// get calling pc
 	RET
 
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVL	x+0(FP),AX		// addr of first arg
+	MOVL	-4(AX),AX		// get calling pc
+	MOVL	AX, r+4(FP)
+	RET
+
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-8
 	MOVL	x+0(FP),AX		// addr of first arg
 	MOVL	x+4(FP), BX
diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s
index b352a50..d15b372 100644
--- a/src/pkg/runtime/asm_amd64.s
+++ b/src/pkg/runtime/asm_amd64.s
@@ -858,6 +858,12 @@
 	MOVQ	-8(AX),AX		// get calling pc
 	RET
 
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVQ	x+0(FP),AX		// addr of first arg
+	MOVQ	-8(AX),AX		// get calling pc
+	MOVQ	AX,r+4(FP)
+	RET
+
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
 	MOVQ	x+0(FP),AX		// addr of first arg
 	MOVQ	x+8(FP), BX
diff --git a/src/pkg/runtime/asm_amd64p32.s b/src/pkg/runtime/asm_amd64p32.s
index afe5734..4e1fb0a 100644
--- a/src/pkg/runtime/asm_amd64p32.s
+++ b/src/pkg/runtime/asm_amd64p32.s
@@ -663,6 +663,12 @@
 	MOVL	-8(AX),AX		// get calling pc
 	RET
 
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$0-8
+	MOVL	x+0(FP),AX		// addr of first arg
+	MOVL	-8(AX),AX		// get calling pc
+	MOVL	AX, r+4(FP)
+	RET
+
 TEXT runtime·setcallerpc(SB),NOSPLIT,$0-16
 	MOVL	x+0(FP),AX		// addr of first arg
 	MOVL	pc+4(FP), BX		// pc to set
diff --git a/src/pkg/runtime/asm_arm.s b/src/pkg/runtime/asm_arm.s
index 2b43ac4..5af5895 100644
--- a/src/pkg/runtime/asm_arm.s
+++ b/src/pkg/runtime/asm_arm.s
@@ -560,6 +560,10 @@
 	MOVW	0(SP), R0
 	RET
 
+TEXT runtime·gogetcallerpc(SB),NOSPLIT,$-4-8
+	MOVW	R14, 4(FP)
+	RET
+
 TEXT runtime·setcallerpc(SB),NOSPLIT,$-4-8
 	MOVW	x+4(FP), R0
 	MOVW	R0, 0(SP)
diff --git a/src/pkg/runtime/error.go b/src/pkg/runtime/error.go
index e704ff8..12fd09e 100644
--- a/src/pkg/runtime/error.go
+++ b/src/pkg/runtime/error.go
@@ -80,8 +80,6 @@
 
 func (e errorCString) RuntimeError() {}
 
-func cstringToGo(uintptr) string
-
 func (e errorCString) Error() string {
 	return "runtime error: " + cstringToGo(e.cstr)
 }
diff --git a/src/pkg/runtime/race.go b/src/pkg/runtime/race.go
index 2a9124d..2fe5240 100644
--- a/src/pkg/runtime/race.go
+++ b/src/pkg/runtime/race.go
@@ -29,3 +29,6 @@
 
 func RaceSemacquire(s *uint32)
 func RaceSemrelease(s *uint32)
+
+// private interface for the runtime
+const raceenabled = true
diff --git a/src/pkg/runtime/race0.go b/src/pkg/runtime/race0.go
new file mode 100644
index 0000000..e9f72a4
--- /dev/null
+++ b/src/pkg/runtime/race0.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !race
+
+// Dummy race detection API, used when not built with -race.
+
+package runtime
+
+const raceenabled = false
diff --git a/src/pkg/runtime/rune.c b/src/pkg/runtime/rune.c
deleted file mode 100644
index ed86726..0000000
--- a/src/pkg/runtime/rune.c
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- * The authors of this software are Rob Pike and Ken Thompson.
- *              Copyright (c) 2002 by Lucent Technologies.
- *              Portions Copyright 2009 The Go Authors. All rights reserved.
- * Permission to use, copy, modify, and distribute this software for any
- * purpose without fee is hereby granted, provided that this entire notice
- * is included in all copies of any software which is or includes a copy
- * or modification of this software and in all copies of the supporting
- * documentation for such software.
- * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
- * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
- * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
- */
-
-/*
- * This code is copied, with slight editing due to type differences,
- * from a subset of ../lib9/utf/rune.c
- */
-
-#include "runtime.h"
-
-enum
-{
-	Bit1	= 7,
-	Bitx	= 6,
-	Bit2	= 5,
-	Bit3	= 4,
-	Bit4	= 3,
-	Bit5	= 2,
-
-	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
-	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
-	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
-	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
-	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
-	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
-
-	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
-	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
-	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
-	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0001 1111 1111 1111 1111 1111 */
-
-	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
-	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
-
-	Runeerror	= 0xFFFD,
-	Runeself	= 0x80,
-
-	SurrogateMin = 0xD800,
-	SurrogateMax = 0xDFFF,
-
-	Bad	= Runeerror,
-
-	Runemax	= 0x10FFFF,	/* maximum rune value */
-};
-
-/*
- * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
- * This is a slower but "safe" version of the old chartorune
- * that works on strings that are not necessarily null-terminated.
- *
- * If you know for sure that your string is null-terminated,
- * chartorune will be a bit faster.
- *
- * It is guaranteed not to attempt to access "length"
- * past the incoming pointer.  This is to avoid
- * possible access violations.  If the string appears to be
- * well-formed but incomplete (i.e., to get the whole Rune
- * we'd need to read past str+length) then we'll set the Rune
- * to Bad and return 0.
- *
- * Note that if we have decoding problems for other
- * reasons, we return 1 instead of 0.
- */
-int32
-runtime·charntorune(int32 *rune, uint8 *str, int32 length)
-{
-	int32 c, c1, c2, c3, l;
-
-	/* When we're not allowed to read anything */
-	if(length <= 0) {
-		goto badlen;
-	}
-
-	/*
-	 * one character sequence (7-bit value)
-	 *	00000-0007F => T1
-	 */
-	c = *(uint8*)str;
-	if(c < Tx) {
-		*rune = c;
-		return 1;
-	}
-
-	// If we can't read more than one character we must stop
-	if(length <= 1) {
-		goto badlen;
-	}
-
-	/*
-	 * two character sequence (11-bit value)
-	 *	0080-07FF => T2 Tx
-	 */
-	c1 = *(uint8*)(str+1) ^ Tx;
-	if(c1 & Testx)
-		goto bad;
-	if(c < T3) {
-		if(c < T2)
-			goto bad;
-		l = ((c << Bitx) | c1) & Rune2;
-		if(l <= Rune1)
-			goto bad;
-		*rune = l;
-		return 2;
-	}
-
-	// If we can't read more than two characters we must stop
-	if(length <= 2) {
-		goto badlen;
-	}
-
-	/*
-	 * three character sequence (16-bit value)
-	 *	0800-FFFF => T3 Tx Tx
-	 */
-	c2 = *(uint8*)(str+2) ^ Tx;
-	if(c2 & Testx)
-		goto bad;
-	if(c < T4) {
-		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
-		if(l <= Rune2)
-			goto bad;
-		if (SurrogateMin <= l && l <= SurrogateMax)
-			goto bad;
-		*rune = l;
-		return 3;
-	}
-
-	if (length <= 3)
-		goto badlen;
-
-	/*
-	 * four character sequence (21-bit value)
-	 *	10000-1FFFFF => T4 Tx Tx Tx
-	 */
-	c3 = *(uint8*)(str+3) ^ Tx;
-	if (c3 & Testx)
-		goto bad;
-	if (c < T5) {
-		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-		if (l <= Rune3 || l > Runemax)
-			goto bad;
-		*rune = l;
-		return 4;
-	}
-
-	// Support for 5-byte or longer UTF-8 would go here, but
-	// since we don't have that, we'll just fall through to bad.
-
-	/*
-	 * bad decoding
-	 */
-bad:
-	*rune = Bad;
-	return 1;
-badlen:
-	// was return 0, but return 1 is more convenient for the runtime.
-	*rune = Bad;
-	return 1;
-
-}
-
-int32
-runtime·runetochar(byte *str, int32 rune)  /* note: in original, arg2 was pointer */
-{
-	/* Runes are signed, so convert to unsigned for range check. */
-	uint32 c;
-
-	/*
-	 * one character sequence
-	 *	00000-0007F => 00-7F
-	 */
-	c = rune;
-	if(c <= Rune1) {
-		str[0] = c;
-		return 1;
-	}
-
-	/*
-	 * two character sequence
-	 *	0080-07FF => T2 Tx
-	 */
-	if(c <= Rune2) {
-		str[0] = T2 | (c >> 1*Bitx);
-		str[1] = Tx | (c & Maskx);
-		return 2;
-	}
-
-	/*
-	 * If the Rune is out of range or a surrogate half, convert it to the error rune.
-	 * Do this test here because the error rune encodes to three bytes.
-	 * Doing it earlier would duplicate work, since an out of range
-	 * Rune wouldn't have fit in one or two bytes.
-	 */
-	if (c > Runemax)
-		c = Runeerror;
-	if (SurrogateMin <= c && c <= SurrogateMax)
-		c = Runeerror;
-
-	/*
-	 * three character sequence
-	 *	0800-FFFF => T3 Tx Tx
-	 */
-	if (c <= Rune3) {
-		str[0] = T3 |  (c >> 2*Bitx);
-		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
-		str[2] = Tx |  (c & Maskx);
-		return 3;
-	}
-
-	/*
-	 * four character sequence (21-bit value)
-	 *     10000-1FFFFF => T4 Tx Tx Tx
-	 */
-	str[0] = T4 | (c >> 3*Bitx);
-	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
-	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
-	str[3] = Tx | (c & Maskx);
-	return 4;
-}
diff --git a/src/pkg/runtime/rune.go b/src/pkg/runtime/rune.go
new file mode 100644
index 0000000..a9f6835
--- /dev/null
+++ b/src/pkg/runtime/rune.go
@@ -0,0 +1,219 @@
+/*
+ * The authors of this software are Rob Pike and Ken Thompson.
+ *              Copyright (c) 2002 by Lucent Technologies.
+ *              Portions Copyright 2009 The Go Authors. All rights reserved.
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose without fee is hereby granted, provided that this entire notice
+ * is included in all copies of any software which is or includes a copy
+ * or modification of this software and in all copies of the supporting
+ * documentation for such software.
+ * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
+ */
+
+/*
+ * This code is copied, with slight editing due to type differences,
+ * from a subset of ../lib9/utf/rune.c
+ */
+
+package runtime
+
+const (
+	bit1 = 7
+	bitx = 6
+	bit2 = 5
+	bit3 = 4
+	bit4 = 3
+	bit5 = 2
+
+	t1 = ((1 << (bit1 + 1)) - 1) ^ 0xFF /* 0000 0000 */
+	tx = ((1 << (bitx + 1)) - 1) ^ 0xFF /* 1000 0000 */
+	t2 = ((1 << (bit2 + 1)) - 1) ^ 0xFF /* 1100 0000 */
+	t3 = ((1 << (bit3 + 1)) - 1) ^ 0xFF /* 1110 0000 */
+	t4 = ((1 << (bit4 + 1)) - 1) ^ 0xFF /* 1111 0000 */
+	t5 = ((1 << (bit5 + 1)) - 1) ^ 0xFF /* 1111 1000 */
+
+	rune1 = (1 << (bit1 + 0*bitx)) - 1 /* 0000 0000 0111 1111 */
+	rune2 = (1 << (bit2 + 1*bitx)) - 1 /* 0000 0111 1111 1111 */
+	rune3 = (1 << (bit3 + 2*bitx)) - 1 /* 1111 1111 1111 1111 */
+	rune4 = (1 << (bit4 + 3*bitx)) - 1 /* 0001 1111 1111 1111 1111 1111 */
+
+	maskx = (1 << bitx) - 1 /* 0011 1111 */
+	testx = maskx ^ 0xFF    /* 1100 0000 */
+
+	runeerror = 0xFFFD
+	runeself  = 0x80
+
+	surrogateMin = 0xD800
+	surrogateMax = 0xDFFF
+
+	bad = runeerror
+
+	runemax = 0x10FFFF /* maximum rune value */
+)
+
+/*
+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
+ * This is a slower but "safe" version of the old chartorune
+ * that works on strings that are not necessarily null-terminated.
+ *
+ * If you know for sure that your string is null-terminated,
+ * chartorune will be a bit faster.
+ *
+ * It is guaranteed not to attempt to access "length"
+ * past the incoming pointer.  This is to avoid
+ * possible access violations.  If the string appears to be
+ * well-formed but incomplete (i.e., to get the whole Rune
+ * we'd need to read past str+length) then we'll set the Rune
+ * to Bad and return 0.
+ *
+ * Note that if we have decoding problems for other
+ * reasons, we return 1 instead of 0.
+ */
+func charntorune(s string) (rune, int) {
+	/* When we're not allowed to read anything */
+	if len(s) <= 0 {
+		return bad, 1
+	}
+
+	/*
+	 * one character sequence (7-bit value)
+	 *	00000-0007F => T1
+	 */
+	c := s[0]
+	if c < tx {
+		return rune(c), 1
+	}
+
+	// If we can't read more than one character we must stop
+	if len(s) <= 1 {
+		return bad, 1
+	}
+
+	/*
+	 * two character sequence (11-bit value)
+	 *	0080-07FF => t2 tx
+	 */
+	c1 := s[1] ^ tx
+	if (c1 & testx) != 0 {
+		return bad, 1
+	}
+	if c < t3 {
+		if c < t2 {
+			return bad, 1
+		}
+		l := ((rune(c) << bitx) | rune(c1)) & rune2
+		if l <= rune1 {
+			return bad, 1
+		}
+		return l, 2
+	}
+
+	// If we can't read more than two characters we must stop
+	if len(s) <= 2 {
+		return bad, 1
+	}
+
+	/*
+	 * three character sequence (16-bit value)
+	 *	0800-FFFF => t3 tx tx
+	 */
+	c2 := s[2] ^ tx
+	if (c2 & testx) != 0 {
+		return bad, 1
+	}
+	if c < t4 {
+		l := ((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) & rune3
+		if l <= rune2 {
+			return bad, 1
+		}
+		if surrogateMin <= l && l <= surrogateMax {
+			return bad, 1
+		}
+		return l, 3
+	}
+
+	if len(s) <= 3 {
+		return bad, 1
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => t4 tx tx tx
+	 */
+	c3 := s[3] ^ tx
+	if (c3 & testx) != 0 {
+		return bad, 1
+	}
+	if c < t5 {
+		l := ((((((rune(c) << bitx) | rune(c1)) << bitx) | rune(c2)) << bitx) | rune(c3)) & rune4
+		if l <= rune3 || l > runemax {
+			return bad, 1
+		}
+		return l, 4
+	}
+
+	// Support for 5-byte or longer UTF-8 would go here, but
+	// since we don't have that, we'll just return bad.
+	return bad, 1
+}
+
+// runetochar converts r to bytes and writes the result to str.
+// returns the number of bytes generated.
+func runetochar(str []byte, r rune) int {
+	/* runes are signed, so convert to unsigned for range check. */
+	c := uint32(r)
+	/*
+	 * one character sequence
+	 *	00000-0007F => 00-7F
+	 */
+	if c <= rune1 {
+		str[0] = byte(c)
+		return 1
+	}
+	/*
+	 * two character sequence
+	 *	0080-07FF => t2 tx
+	 */
+	if c <= rune2 {
+		str[0] = byte(t2 | (c >> (1 * bitx)))
+		str[1] = byte(tx | (c & maskx))
+		return 2
+	}
+
+	/*
+	 * If the rune is out of range or a surrogate half, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * rune wouldn't have fit in one or two bytes.
+	 */
+	if c > runemax {
+		c = runeerror
+	}
+	if surrogateMin <= c && c <= surrogateMax {
+		c = runeerror
+	}
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => t3 tx tx
+	 */
+	if c <= rune3 {
+		str[0] = byte(t3 | (c >> (2 * bitx)))
+		str[1] = byte(tx | ((c >> (1 * bitx)) & maskx))
+		str[2] = byte(tx | (c & maskx))
+		return 3
+	}
+
+	/*
+	 * four character sequence (21-bit value)
+	 *     10000-1FFFFF => t4 tx tx tx
+	 */
+	str[0] = byte(t4 | (c >> (3 * bitx)))
+	str[1] = byte(tx | ((c >> (2 * bitx)) & maskx))
+	str[2] = byte(tx | ((c >> (1 * bitx)) & maskx))
+	str[3] = byte(tx | (c & maskx))
+	return 4
+}
diff --git a/src/pkg/runtime/string.c b/src/pkg/runtime/string.c
new file mode 100644
index 0000000..d5b668b
--- /dev/null
+++ b/src/pkg/runtime/string.c
@@ -0,0 +1,199 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "race.h"
+#include "../../cmd/ld/textflag.h"
+
+String	runtime·emptystring;
+
+#pragma textflag NOSPLIT
+intgo
+runtime·findnull(byte *s)
+{
+	intgo l;
+
+	if(s == nil)
+		return 0;
+	for(l=0; s[l]!=0; l++)
+		;
+	return l;
+}
+
+intgo
+runtime·findnullw(uint16 *s)
+{
+	intgo l;
+
+	if(s == nil)
+		return 0;
+	for(l=0; s[l]!=0; l++)
+		;
+	return l;
+}
+
+uintptr runtime·maxstring = 256; // a hint for print
+
+static String
+gostringsize(intgo l)
+{
+	String s;
+	uintptr ms;
+
+	if(l == 0)
+		return runtime·emptystring;
+	s.str = runtime·mallocgc(l, 0, FlagNoScan|FlagNoZero);
+	s.len = l;
+	for(;;) {
+		ms = runtime·maxstring;
+		if((uintptr)l <= ms || runtime·casp((void**)&runtime·maxstring, (void*)ms, (void*)l))
+			break;
+	}
+	return s;
+}
+
+String
+runtime·gostring(byte *str)
+{
+	intgo l;
+	String s;
+
+	l = runtime·findnull(str);
+	s = gostringsize(l);
+	runtime·memmove(s.str, str, l);
+	return s;
+}
+
+String
+runtime·gostringn(byte *str, intgo l)
+{
+	String s;
+
+	s = gostringsize(l);
+	runtime·memmove(s.str, str, l);
+	return s;
+}
+
+// used by cmd/cgo
+Slice
+runtime·gobytes(byte *p, intgo n)
+{
+	Slice sl;
+
+	sl.array = runtime·mallocgc(n, 0, FlagNoScan|FlagNoZero);
+	sl.len = n;
+	sl.cap = n;
+	runtime·memmove(sl.array, p, n);
+	return sl;
+}
+
+String
+runtime·gostringnocopy(byte *str)
+{
+	String s;
+	
+	s.str = str;
+	s.len = runtime·findnull(str);
+	return s;
+}
+
+String
+runtime·gostringw(uint16 *str)
+{
+	intgo n1, n2, i;
+	byte buf[8];
+	String s;
+
+	n1 = 0;
+	for(i=0; str[i]; i++)
+		n1 += runtime·runetochar(buf, str[i]);
+	s = gostringsize(n1+4);
+	n2 = 0;
+	for(i=0; str[i]; i++) {
+		// check for race
+		if(n2 >= n1)
+			break;
+		n2 += runtime·runetochar(s.str+n2, str[i]);
+	}
+	s.len = n2;
+	s.str[s.len] = 0;
+	return s;
+}
+
+String
+runtime·catstring(String s1, String s2)
+{
+	String s3;
+
+	if(s1.len == 0)
+		return s2;
+	if(s2.len == 0)
+		return s1;
+
+	s3 = gostringsize(s1.len + s2.len);
+	runtime·memmove(s3.str, s1.str, s1.len);
+	runtime·memmove(s3.str+s1.len, s2.str, s2.len);
+	return s3;
+}
+
+int32
+runtime·strcmp(byte *s1, byte *s2)
+{
+	uintptr i;
+	byte c1, c2;
+
+	for(i=0;; i++) {
+		c1 = s1[i];
+		c2 = s2[i];
+		if(c1 < c2)
+			return -1;
+		if(c1 > c2)
+			return +1;
+		if(c1 == 0)
+			return 0;
+	}
+}
+
+int32
+runtime·strncmp(byte *s1, byte *s2, uintptr n)
+{
+	uintptr i;
+	byte c1, c2;
+
+	for(i=0; i<n; i++) {
+		c1 = s1[i];
+		c2 = s2[i];
+		if(c1 < c2)
+			return -1;
+		if(c1 > c2)
+			return +1;
+		if(c1 == 0)
+			break;
+	}
+	return 0;
+}
+
+byte*
+runtime·strstr(byte *s1, byte *s2)
+{
+	byte *sp1, *sp2;
+
+	if(*s2 == 0)
+		return s1;
+	for(; *s1; s1++) {
+		if(*s1 != *s2)
+			continue;
+		sp1 = s1;
+		sp2 = s2;
+		for(;;) {
+			if(*sp2 == 0)
+				return s1;
+			if(*sp1++ != *sp2++)
+				break;
+		}
+	}
+	return nil;
+}
diff --git a/src/pkg/runtime/string.go b/src/pkg/runtime/string.go
new file mode 100644
index 0000000..9805dd5
--- /dev/null
+++ b/src/pkg/runtime/string.go
@@ -0,0 +1,200 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+func concatstrings(a []string) string {
+	idx := 0
+	l := 0
+	count := 0
+	for i, x := range a {
+		n := len(x)
+		if n == 0 {
+			continue
+		}
+		if l+n < l {
+			panic("string concatenation too long")
+		}
+		l += n
+		count++
+		idx = i
+	}
+	if count == 0 {
+		return ""
+	}
+	if count == 1 {
+		return a[idx]
+	}
+	s, b := rawstring(l)
+	l = 0
+	for _, x := range a {
+		copy(b[l:], x)
+		l += len(x)
+	}
+	return s
+}
+
+//go:nosplit
+func concatstring2(a [2]string) string {
+	return concatstrings(a[:])
+}
+
+//go:nosplit
+func concatstring3(a [3]string) string {
+	return concatstrings(a[:])
+}
+
+//go:nosplit
+func concatstring4(a [4]string) string {
+	return concatstrings(a[:])
+}
+
+//go:nosplit
+func concatstring5(a [5]string) string {
+	return concatstrings(a[:])
+}
+
+func slicebytetostring(b []byte) string {
+	if raceenabled && len(b) > 0 {
+		fn := slicebytetostring
+		racereadrangepc(unsafe.Pointer(&b[0]),
+			len(b),
+			gogetcallerpc(unsafe.Pointer(&b)),
+			**(**uintptr)(unsafe.Pointer(&fn)))
+	}
+	s, c := rawstring(len(b))
+	copy(c, b)
+	return s
+}
+
+func slicebytetostringtmp(b []byte) string {
+	// Return a "string" referring to the actual []byte bytes.
+	// This is only for use by internal compiler optimizations
+	// that know that the string form will be discarded before
+	// the calling goroutine could possibly modify the original
+	// slice or synchronize with another goroutine.
+	// Today, the only such case is a m[string(k)] lookup where
+	// m is a string-keyed map and k is a []byte.
+
+	if raceenabled && len(b) > 0 {
+		fn := slicebytetostringtmp
+		racereadrangepc(unsafe.Pointer(&b[0]),
+			len(b),
+			gogetcallerpc(unsafe.Pointer(&b)),
+			**(**uintptr)(unsafe.Pointer(&fn)))
+	}
+	return *(*string)(unsafe.Pointer(&b))
+}
+
+func stringtoslicebyte(s string) []byte {
+	b := rawbyteslice(len(s))
+	copy(b, s)
+	return b
+}
+
+func stringtoslicerune(s string) []rune {
+	// two passes.
+	// unlike slicerunetostring, no race because strings are immutable.
+	n := 0
+	t := s
+	for len(s) > 0 {
+		_, k := charntorune(s)
+		s = s[k:]
+		n++
+	}
+	a := rawruneslice(n)
+	n = 0
+	for len(t) > 0 {
+		r, k := charntorune(t)
+		t = t[k:]
+		a[n] = r
+		n++
+	}
+	return a
+}
+
+func slicerunetostring(a []rune) string {
+	if raceenabled && len(a) > 0 {
+		fn := slicerunetostring
+		racereadrangepc(unsafe.Pointer(&a[0]),
+			len(a)*int(unsafe.Sizeof(a[0])),
+			gogetcallerpc(unsafe.Pointer(&a)),
+			**(**uintptr)(unsafe.Pointer(&fn)))
+	}
+	var dum [4]byte
+	size1 := 0
+	for _, r := range a {
+		size1 += runetochar(dum[:], r)
+	}
+	s, b := rawstring(size1 + 3)
+	size2 := 0
+	for _, r := range a {
+		// check for race
+		if size2 >= size1 {
+			break
+		}
+		size2 += runetochar(b[size2:], r)
+	}
+	return s[:size2]
+}
+
+func cstringToGo(str uintptr) (s string) {
+	// Note: we need i to be the same type as _string.len and to start at 0.
+	i := _string{}.len
+	for ; ; i++ {
+		if *(*byte)(unsafe.Pointer(str + uintptr(i))) == 0 {
+			break
+		}
+	}
+	t := (*_string)(unsafe.Pointer(&s))
+	t.str = (*byte)(unsafe.Pointer(str))
+	t.len = i
+	return
+}
+
+func intstring(v int64) string {
+	s, b := rawstring(4)
+	n := runetochar(b, rune(v))
+	return s[:n]
+}
+
+// stringiter returns the index of the next
+// rune after the rune that starts at s[k].
+func stringiter(s string, k int) int {
+	if k >= len(s) {
+		// 0 is end of iteration
+		return 0
+	}
+
+	c := s[k]
+	if c < runeself {
+		return k + 1
+	}
+
+	// multi-char rune
+	_, n := charntorune(s[k:])
+	return k + n
+}
+
+// stringiter2 returns the rune that starts at s[k]
+// and the index where the next rune starts.
+func stringiter2(s string, k int) (int, rune) {
+	if k >= len(s) {
+		// 0 is end of iteration
+		return 0, 0
+	}
+
+	c := s[k]
+	if c < runeself {
+		return k + 1, rune(c)
+	}
+
+	// multi-char rune
+	r, n := charntorune(s[k:])
+	return k + n, r
+}
diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc
deleted file mode 100644
index 64a1d90..0000000
--- a/src/pkg/runtime/string.goc
+++ /dev/null
@@ -1,418 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "race.h"
-#include "../../cmd/ld/textflag.h"
-
-String	runtime·emptystring;
-
-#pragma textflag NOSPLIT
-intgo
-runtime·findnull(byte *s)
-{
-	intgo l;
-
-	if(s == nil)
-		return 0;
-	for(l=0; s[l]!=0; l++)
-		;
-	return l;
-}
-
-intgo
-runtime·findnullw(uint16 *s)
-{
-	intgo l;
-
-	if(s == nil)
-		return 0;
-	for(l=0; s[l]!=0; l++)
-		;
-	return l;
-}
-
-uintptr runtime·maxstring = 256; // a hint for print
-
-static String
-gostringsize(intgo l)
-{
-	String s;
-	uintptr ms;
-
-	if(l == 0)
-		return runtime·emptystring;
-	s.str = runtime·mallocgc(l, 0, FlagNoScan|FlagNoZero);
-	s.len = l;
-	for(;;) {
-		ms = runtime·maxstring;
-		if((uintptr)l <= ms || runtime·casp((void**)&runtime·maxstring, (void*)ms, (void*)l))
-			break;
-	}
-	return s;
-}
-
-String
-runtime·gostring(byte *str)
-{
-	intgo l;
-	String s;
-
-	l = runtime·findnull(str);
-	s = gostringsize(l);
-	runtime·memmove(s.str, str, l);
-	return s;
-}
-
-String
-runtime·gostringn(byte *str, intgo l)
-{
-	String s;
-
-	s = gostringsize(l);
-	runtime·memmove(s.str, str, l);
-	return s;
-}
-
-// used by cmd/cgo
-Slice
-runtime·gobytes(byte *p, intgo n)
-{
-	Slice sl;
-
-	sl.array = runtime·mallocgc(n, 0, FlagNoScan|FlagNoZero);
-	sl.len = n;
-	sl.cap = n;
-	runtime·memmove(sl.array, p, n);
-	return sl;
-}
-
-String
-runtime·gostringnocopy(byte *str)
-{
-	String s;
-	
-	s.str = str;
-	s.len = runtime·findnull(str);
-	return s;
-}
-
-func cstringToGo(str *byte) (s String) {
-	s = runtime·gostringnocopy(str);
-}
-
-String
-runtime·gostringw(uint16 *str)
-{
-	intgo n1, n2, i;
-	byte buf[8];
-	String s;
-
-	n1 = 0;
-	for(i=0; str[i]; i++)
-		n1 += runtime·runetochar(buf, str[i]);
-	s = gostringsize(n1+4);
-	n2 = 0;
-	for(i=0; str[i]; i++) {
-		// check for race
-		if(n2 >= n1)
-			break;
-		n2 += runtime·runetochar(s.str+n2, str[i]);
-	}
-	s.len = n2;
-	s.str[s.len] = 0;
-	return s;
-}
-
-String
-runtime·catstring(String s1, String s2)
-{
-	String s3;
-
-	if(s1.len == 0)
-		return s2;
-	if(s2.len == 0)
-		return s1;
-
-	s3 = gostringsize(s1.len + s2.len);
-	runtime·memmove(s3.str, s1.str, s1.len);
-	runtime·memmove(s3.str+s1.len, s2.str, s2.len);
-	return s3;
-}
-
-static String
-concatstring(intgo n, String *s)
-{
-	intgo i, l, count;
-	String out;
-
-	l = 0;
-	count = 0;
-	for(i=0; i<n; i++) {
-		if(l + s[i].len < l)
-			runtime·throw("string concatenation too long");
-		l += s[i].len;
-		if(s[i].len > 0) {
-			count++;
-			out = s[i];
-		}
-	}
-	if(count == 0)
-		return runtime·emptystring;
-	if(count == 1) // zero or one non-empty string in concatenation
-		return out;
-	
-	out = gostringsize(l);
-	l = 0;
-	for(i=0; i<n; i++) {
-		runtime·memmove(out.str+l, s[i].str, s[i].len);
-		l += s[i].len;
-	}
-	return out;
-}
-
-#pragma textflag NOSPLIT
-func concatstring2(s1 String, s2 String) (res String) {
-	USED(&s2);
-	res = concatstring(2, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstring3(s1 String, s2 String, s3 String) (res String) {
-	USED(&s2);
-	USED(&s3);
-	res = concatstring(3, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstring4(s1 String, s2 String, s3 String, s4 String) (res String) {
-	USED(&s2);
-	USED(&s3);
-	USED(&s4);
-	res = concatstring(4, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstring5(s1 String, s2 String, s3 String, s4 String, s5 String) (res String) {
-	USED(&s2);
-	USED(&s3);
-	USED(&s4);
-	USED(&s5);
-	res = concatstring(5, &s1);
-}
-#pragma textflag NOSPLIT
-func concatstrings(s Slice) (res String) {
-	res = concatstring(s.len, (String*)s.array);
-}
-
-int32
-runtime·strcmp(byte *s1, byte *s2)
-{
-	uintptr i;
-	byte c1, c2;
-
-	for(i=0;; i++) {
-		c1 = s1[i];
-		c2 = s2[i];
-		if(c1 < c2)
-			return -1;
-		if(c1 > c2)
-			return +1;
-		if(c1 == 0)
-			return 0;
-	}
-}
-
-int32
-runtime·strncmp(byte *s1, byte *s2, uintptr n)
-{
-	uintptr i;
-	byte c1, c2;
-
-	for(i=0; i<n; i++) {
-		c1 = s1[i];
-		c2 = s2[i];
-		if(c1 < c2)
-			return -1;
-		if(c1 > c2)
-			return +1;
-		if(c1 == 0)
-			break;
-	}
-	return 0;
-}
-
-byte*
-runtime·strstr(byte *s1, byte *s2)
-{
-	byte *sp1, *sp2;
-
-	if(*s2 == 0)
-		return s1;
-	for(; *s1; s1++) {
-		if(*s1 != *s2)
-			continue;
-		sp1 = s1;
-		sp2 = s2;
-		for(;;) {
-			if(*sp2 == 0)
-				return s1;
-			if(*sp1++ != *sp2++)
-				break;
-		}
-	}
-	return nil;
-}
-
-func intstring(v int64) (s String) {
-	s = gostringsize(8);
-	s.len = runtime·runetochar(s.str, v);
-	s.str[s.len] = 0;
-}
-
-func slicebytetostring(b Slice) (s String) {
-	void *pc;
-
-	if(raceenabled) {
-		pc = runtime·getcallerpc(&b);
-		runtime·racereadrangepc(b.array, b.len, pc, runtime·slicebytetostring);
-	}
-	s = gostringsize(b.len);
-	runtime·memmove(s.str, b.array, s.len);
-}
-
-func slicebytetostringtmp(b Slice) (s String) {
-	void *pc;
-
-	if(raceenabled) {
-		pc = runtime·getcallerpc(&b);
-		runtime·racereadrangepc(b.array, b.len, pc, runtime·slicebytetostringtmp);
-	}
-	
-	// Return a "string" referring to the actual []byte bytes.
-	// This is only for use by internal compiler optimizations
-	// that know that the string form will be discarded before
-	// the calling goroutine could possibly modify the original
-	// slice or synchronize with another goroutine.
-	// Today, the only such case is a m[string(k)] lookup where
-	// m is a string-keyed map and k is a []byte.
-	s.str = b.array;
-	s.len = b.len;
-}
-
-func stringtoslicebyte(s String) (b Slice) {
-	uintptr cap;
-
-	cap = runtime·roundupsize(s.len);
-	b.array = runtime·mallocgc(cap, 0, FlagNoScan|FlagNoZero);
-	b.len = s.len;
-	b.cap = cap;
-	runtime·memmove(b.array, s.str, s.len);
-	if(cap != b.len)
-		runtime·memclr(b.array+b.len, cap-b.len);
-}
-
-func slicerunetostring(b Slice) (s String) {
-	intgo siz1, siz2, i;
-	int32 *a;
-	byte dum[8];
-	void *pc;
-
-	if(raceenabled) {
-		pc = runtime·getcallerpc(&b);
-		runtime·racereadrangepc(b.array, b.len*sizeof(*a), pc, runtime·slicerunetostring);
-	}
-	a = (int32*)b.array;
-	siz1 = 0;
-	for(i=0; i<b.len; i++) {
-		siz1 += runtime·runetochar(dum, a[i]);
-	}
-
-	s = gostringsize(siz1+4);
-	siz2 = 0;
-	for(i=0; i<b.len; i++) {
-		// check for race
-		if(siz2 >= siz1)
-			break;
-		siz2 += runtime·runetochar(s.str+siz2, a[i]);
-	}
-	s.len = siz2;
-	s.str[s.len] = 0;
-}
-
-func stringtoslicerune(s String) (b Slice) {
-	intgo n;
-	int32 dum, *r;
-	uint8 *p, *ep;
-	uintptr mem;
-
-	// two passes.
-	// unlike slicerunetostring, no race because strings are immutable.
-	p = s.str;
-	ep = s.str+s.len;
-	n = 0;
-	while(p < ep) {
-		p += runtime·charntorune(&dum, p, ep-p);
-		n++;
-	}
-
-	if(n > MaxMem/sizeof(r[0]))
-		runtime·throw("out of memory");
-	mem = runtime·roundupsize(n*sizeof(r[0]));
-	b.array = runtime·mallocgc(mem, 0, FlagNoScan|FlagNoZero);
-	b.len = n;
-	b.cap = mem/sizeof(r[0]);
-	p = s.str;
-	r = (int32*)b.array;
-	while(p < ep)
-		p += runtime·charntorune(r++, p, ep-p);
-	if(b.cap > b.len)
-		runtime·memclr(b.array+b.len*sizeof(r[0]), (b.cap-b.len)*sizeof(r[0]));
-}
-
-enum
-{
-	Runeself	= 0x80,
-};
-
-func stringiter(s String, k int) (retk int) {
-	int32 l;
-
-	if(k >= s.len) {
-		// retk=0 is end of iteration
-		retk = 0;
-		goto out;
-	}
-
-	l = s.str[k];
-	if(l < Runeself) {
-		retk = k+1;
-		goto out;
-	}
-
-	// multi-char rune
-	retk = k + runtime·charntorune(&l, s.str+k, s.len-k);
-
-out:
-}
-
-func stringiter2(s String, k int) (retk int, retv int32) {
-	if(k >= s.len) {
-		// retk=0 is end of iteration
-		retk = 0;
-		retv = 0;
-		goto out;
-	}
-
-	retv = s.str[k];
-	if(retv < Runeself) {
-		retk = k+1;
-		goto out;
-	}
-
-	// multi-char rune
-	retk = k + runtime·charntorune(&retv, s.str+k, s.len-k);
-
-out:
-}
diff --git a/src/pkg/runtime/string_test.go b/src/pkg/runtime/string_test.go
index df3ff06..dbccc24 100644
--- a/src/pkg/runtime/string_test.go
+++ b/src/pkg/runtime/string_test.go
@@ -75,3 +75,27 @@
 	}
 	b.SetBytes(int64(len(s1)))
 }
+
+func BenchmarkRuneIterate(b *testing.B) {
+	bytes := make([]byte, 100)
+	for i := range bytes {
+		bytes[i] = byte('A')
+	}
+	s := string(bytes)
+	for i := 0; i < b.N; i++ {
+		for _ = range s {
+		}
+	}
+}
+
+func BenchmarkRuneIterate2(b *testing.B) {
+	bytes := make([]byte, 100)
+	for i := range bytes {
+		bytes[i] = byte('A')
+	}
+	s := string(bytes)
+	for i := 0; i < b.N; i++ {
+		for _, _ = range s {
+		}
+	}
+}
diff --git a/src/pkg/runtime/stubs.go b/src/pkg/runtime/stubs.go
new file mode 100644
index 0000000..b19b0e0
--- /dev/null
+++ b/src/pkg/runtime/stubs.go
@@ -0,0 +1,30 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Declarations for runtime services implemented in C or assembly.
+// C implementations of these functions are in stubs.goc.
+// Assembly implementations are in various files, see comments with
+// each function.
+
+// rawstring allocates storage for a new string. The returned
+// string and byte slice both refer to the same storage.
+// The storage is not zeroed. Callers should use
+// b to set the string contents and then drop b.
+func rawstring(size int) (string, []byte)
+
+// rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
+func rawbyteslice(size int) []byte
+
+// rawruneslice allocates a new rune slice. The rune slice is not zeroed.
+func rawruneslice(size int) []rune
+
+//go:noescape
+func gogetcallerpc(p unsafe.Pointer) uintptr
+
+//go:noescape
+func racereadrangepc(addr unsafe.Pointer, len int, callpc, pc uintptr)
diff --git a/src/pkg/runtime/stubs.goc b/src/pkg/runtime/stubs.goc
new file mode 100644
index 0000000..bd493d4
--- /dev/null
+++ b/src/pkg/runtime/stubs.goc
@@ -0,0 +1,66 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+#include "runtime.h"
+#include "arch_GOARCH.h"
+#include "malloc.h"
+#include "../../cmd/ld/textflag.h"
+
+// This file contains functions called by Go but written
+// in C.  These functions are problematic for the garbage
+// collector and stack copier because we don't have
+// stack maps for them.  So we must ensure that the
+// garbage collector and stack copier cannot see these
+// frames.  So we impose the following invariants:
+
+// 1) Functions should be marked NOSPLIT and call
+//    out to only NOSPLIT functions (recursively).
+// 2) Functions should not block.
+
+// During conversion, we can still call out to splittable
+// functions.  But once conversion is done the invariants
+// above should hold.
+
+#pragma textflag NOSPLIT
+func rawstring(size intgo) (s String, b Slice) {
+	byte *p;
+
+	p = runtime·mallocgc(size, 0, FlagNoScan|FlagNoZero);
+	s.str = p;
+	s.len = size;
+	b.array = p;
+	b.len = size;
+	b.cap = size;
+}
+
+#pragma textflag NOSPLIT
+func rawbyteslice(size intgo) (b Slice) {
+	uintptr cap;
+	byte *p;
+
+	cap = runtime·roundupsize(size);
+	p = runtime·mallocgc(cap, 0, FlagNoScan|FlagNoZero);
+	if(cap != size)
+		runtime·memclr(p + size, cap - size);
+	b.array = p;
+	b.len = size;
+	b.cap = cap;
+}
+
+#pragma textflag NOSPLIT
+func rawruneslice(size intgo) (b Slice) {
+	uintptr mem;
+	byte *p;
+
+	if(size > MaxMem/sizeof(int32))
+		runtime·throw("out of memory");
+	mem = runtime·roundupsize(size*sizeof(int32));
+	p = runtime·mallocgc(mem, 0, FlagNoScan|FlagNoZero);
+	if(mem != size*sizeof(int32))
+		runtime·memclr(p + size*sizeof(int32), mem - size*sizeof(int32));
+	b.array = p;
+	b.len = size;
+	b.cap = mem/sizeof(int32);
+}