[dev.garbage] all: merge dev.cc into dev.garbage

The garbage collector is now written in Go.
There is plenty to clean up (just like on dev.cc).

all.bash passes on darwin/amd64, darwin/386, linux/amd64, linux/386.

TBR=rlh
R=austin, rlh, bradfitz
CC=golang-codereviews
https://golang.org/cl/173250043
diff --git a/src/runtime/alg.go b/src/runtime/alg.go
index e9ed595..e367bc5 100644
--- a/src/runtime/alg.go
+++ b/src/runtime/alg.go
@@ -314,9 +314,6 @@
 
 var aeskeysched [hashRandomBytes]byte
 
-//go:noescape
-func get_random_data(rnd *unsafe.Pointer, n *int32)
-
 func init() {
 	if theGoos == "nacl" {
 		return
diff --git a/src/runtime/arch1_386.go b/src/runtime/arch1_386.go
new file mode 100644
index 0000000..7746dfb
--- /dev/null
+++ b/src/runtime/arch1_386.go
@@ -0,0 +1,15 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	thechar           = '8'
+	_BigEndian        = 0
+	_CacheLineSize    = 64
+	_RuntimeGogoBytes = 64
+	_PhysPageSize     = _NaCl*65536 + (1-_NaCl)*4096 // 4k normally; 64k on NaCl
+	_PCQuantum        = 1
+	_Int64Align       = 4
+)
diff --git a/src/runtime/arch1_amd64.go b/src/runtime/arch1_amd64.go
new file mode 100644
index 0000000..83c9c2d
--- /dev/null
+++ b/src/runtime/arch1_amd64.go
@@ -0,0 +1,15 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	thechar           = '6'
+	_BigEndian        = 0
+	_CacheLineSize    = 64
+	_RuntimeGogoBytes = 64 + (_Plan9|_Solaris|_Windows)*16
+	_PhysPageSize     = 4096
+	_PCQuantum        = 1
+	_Int64Align       = 8
+)
diff --git a/src/runtime/arch1_arm.go b/src/runtime/arch1_arm.go
new file mode 100644
index 0000000..5cb79fd
--- /dev/null
+++ b/src/runtime/arch1_arm.go
@@ -0,0 +1,15 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	thechar           = '5'
+	_BigEndian        = 0
+	_CacheLineSize    = 32
+	_RuntimeGogoBytes = 60
+	_PhysPageSize     = 65536*_NaCl + 4096*(1-_NaCl)
+	_PCQuantum        = 4
+	_Int64Align       = 4
+)
diff --git a/src/runtime/arch_386.h b/src/runtime/arch_386.h
deleted file mode 100644
index 75a5ba7..0000000
--- a/src/runtime/arch_386.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-enum {
-	thechar = '8',
-	BigEndian = 0,
-	CacheLineSize = 64,
-	RuntimeGogoBytes = 64,
-#ifdef GOOS_nacl
-	PhysPageSize = 65536,
-#else
-	PhysPageSize = 4096,
-#endif
-	PCQuantum = 1,
-	Int64Align = 4
-};
diff --git a/src/runtime/arch_amd64.h b/src/runtime/arch_amd64.h
deleted file mode 100644
index d7b81ee..0000000
--- a/src/runtime/arch_amd64.h
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-enum {
-	thechar = '6',
-	BigEndian = 0,
-	CacheLineSize = 64,
-#ifdef GOOS_solaris
-	RuntimeGogoBytes = 80,
-#else
-#ifdef GOOS_windows
-	RuntimeGogoBytes = 80,
-#else
-#ifdef GOOS_plan9
-	RuntimeGogoBytes = 80,
-#else
-	RuntimeGogoBytes = 64,
-#endif	// Plan 9
-#endif	// Windows
-#endif	// Solaris
-	PhysPageSize = 4096,
-	PCQuantum = 1,
-	Int64Align = 8
-};
diff --git a/src/runtime/arch_arm.h b/src/runtime/arch_arm.h
deleted file mode 100644
index 637a334..0000000
--- a/src/runtime/arch_arm.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-enum {
-	thechar = '5',
-	BigEndian = 0,
-	CacheLineSize = 32,
-	RuntimeGogoBytes = 60,
-#ifdef GOOS_nacl
-	PhysPageSize = 65536,
-#else
-	PhysPageSize = 4096,
-#endif
-	PCQuantum = 4,
-	Int64Align = 4
-};
diff --git a/src/runtime/asm.s b/src/runtime/asm.s
index e6d782f..f1c812b 100644
--- a/src/runtime/asm.s
+++ b/src/runtime/asm.s
@@ -12,3 +12,8 @@
 DATA runtime·no_pointers_stackmap+0x04(SB)/4, $0
 GLOBL runtime·no_pointers_stackmap(SB),RODATA, $8
 
+TEXT runtime·nop(SB),NOSPLIT,$0-0
+	RET
+
+GLOBL runtime·mheap_(SB), NOPTR, $0
+GLOBL runtime·memstats(SB), NOPTR, $0
diff --git a/src/runtime/asm_386.s b/src/runtime/asm_386.s
index 501e64b..a02bb55 100644
--- a/src/runtime/asm_386.s
+++ b/src/runtime/asm_386.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
 
@@ -49,7 +50,7 @@
 	// update stackguard after _cgo_init
 	MOVL	$runtime·g0(SB), CX
 	MOVL	(g_stack+stack_lo)(CX), AX
-	ADDL	$const_StackGuard, AX
+	ADDL	$const__StackGuard, AX
 	MOVL	AX, g_stackguard0(CX)
 	MOVL	AX, g_stackguard1(CX)
 
@@ -199,62 +200,49 @@
 	JMP	AX
 	RET
 
-// switchtoM is a dummy routine that onM leaves at the bottom
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
 // of the G stack.  We need to distinguish the routine that
 // lives at the bottom of the G stack from the one that lives
-// at the top of the M stack because the one at the top of
-// the M stack terminates the stack walk (see topofstack()).
-TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
+// at the top of the system stack because the one at the top of
+// the system stack terminates the stack walk (see topofstack()).
+TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
 	RET
 
-// func onM_signalok(fn func())
-TEXT runtime·onM_signalok(SB), NOSPLIT, $0-4
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB), NOSPLIT, $0-4
+	MOVL	fn+0(FP), DI	// DI = fn
 	get_tls(CX)
 	MOVL	g(CX), AX	// AX = g
 	MOVL	g_m(AX), BX	// BX = m
+
 	MOVL	m_gsignal(BX), DX	// DX = gsignal
 	CMPL	AX, DX
-	JEQ	ongsignal
-	JMP	runtime·onM(SB)
-
-ongsignal:
-	MOVL	fn+0(FP), DI	// DI = fn
-	MOVL	DI, DX
-	MOVL	0(DI), DI
-	CALL	DI
-	RET
-
-// func onM(fn func())
-TEXT runtime·onM(SB), NOSPLIT, $0-4
-	MOVL	fn+0(FP), DI	// DI = fn
-	get_tls(CX)
-	MOVL	g(CX), AX	// AX = g
-	MOVL	g_m(AX), BX	// BX = m
+	JEQ	noswitch
 
 	MOVL	m_g0(BX), DX	// DX = g0
 	CMPL	AX, DX
-	JEQ	onm
+	JEQ	noswitch
 
 	MOVL	m_curg(BX), BP
 	CMPL	AX, BP
-	JEQ	oncurg
+	JEQ	switch
 	
-	// Not g0, not curg. Must be gsignal, but that's not allowed.
+	// Bad: g is not gsignal, not g0, not curg. What is it?
 	// Hide call from linker nosplit analysis.
-	MOVL	$runtime·badonm(SB), AX
+	MOVL	$runtime·badsystemstack(SB), AX
 	CALL	AX
 
-oncurg:
+switch:
 	// save our state in g->sched.  Pretend to
-	// be switchtoM if the G stack is scanned.
-	MOVL	$runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX)
+	// be systemstack_switch if the G stack is scanned.
+	MOVL	$runtime·systemstack_switch(SB), (g_sched+gobuf_pc)(AX)
 	MOVL	SP, (g_sched+gobuf_sp)(AX)
 	MOVL	AX, (g_sched+gobuf_g)(AX)
 
 	// switch to g0
 	MOVL	DX, g(CX)
 	MOVL	(g_sched+gobuf_sp)(DX), BX
-	// make it look like mstart called onM on g0, to stop traceback
+	// make it look like mstart called systemstack on g0, to stop traceback
 	SUBL	$4, BX
 	MOVL	$runtime·mstart(SB), DX
 	MOVL	DX, 0(BX)
@@ -275,8 +263,8 @@
 	MOVL	$0, (g_sched+gobuf_sp)(AX)
 	RET
 
-onm:
-	// already on m stack, just call directly
+noswitch:
+	// already on system stack, just call directly
 	MOVL	DI, DX
 	MOVL	0(DI), DI
 	CALL	DI
@@ -740,7 +728,7 @@
 	// the same SP back to m->sched.sp. That seems redundant,
 	// but if an unrecovered panic happens, unwindm will
 	// restore the g->sched.sp from the stack location
-	// and then onM will try to use it. If we don't set it here,
+	// and then systemstack will try to use it. If we don't set it here,
 	// that restored SP will be uninitialized (typically 0) and
 	// will not be usable.
 	MOVL	m_g0(BP), SI
@@ -2290,3 +2278,10 @@
 TEXT runtime·goexit(SB),NOSPLIT,$0-0
 	BYTE	$0x90	// NOP
 	CALL	runtime·goexit1(SB)	// does not return
+
+TEXT runtime·getg(SB),NOSPLIT,$0-4
+	get_tls(CX)
+	MOVL	g(CX), AX
+	MOVL	AX, ret+0(FP)
+	RET
+
diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s
index 1aa2d71..6e3f5ff 100644
--- a/src/runtime/asm_amd64.s
+++ b/src/runtime/asm_amd64.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
 
@@ -47,7 +48,7 @@
 	// update stackguard after _cgo_init
 	MOVQ	$runtime·g0(SB), CX
 	MOVQ	(g_stack+stack_lo)(CX), AX
-	ADDQ	$const_StackGuard, AX
+	ADDQ	$const__StackGuard, AX
 	MOVQ	AX, g_stackguard0(CX)
 	MOVQ	AX, g_stackguard1(CX)
 
@@ -189,55 +190,41 @@
 	JMP	AX
 	RET
 
-// switchtoM is a dummy routine that onM leaves at the bottom
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
 // of the G stack.  We need to distinguish the routine that
 // lives at the bottom of the G stack from the one that lives
-// at the top of the M stack because the one at the top of
-// the M stack terminates the stack walk (see topofstack()).
-TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
+// at the top of the system stack because the one at the top of
+// the system stack terminates the stack walk (see topofstack()).
+TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
 	RET
 
-// func onM_signalok(fn func())
-TEXT runtime·onM_signalok(SB), NOSPLIT, $0-8
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB), NOSPLIT, $0-8
+	MOVQ	fn+0(FP), DI	// DI = fn
 	get_tls(CX)
 	MOVQ	g(CX), AX	// AX = g
 	MOVQ	g_m(AX), BX	// BX = m
+
 	MOVQ	m_gsignal(BX), DX	// DX = gsignal
 	CMPQ	AX, DX
-	JEQ	ongsignal
-	JMP	runtime·onM(SB)
-
-ongsignal:
-	MOVQ	fn+0(FP), DI	// DI = fn
-	MOVQ	DI, DX
-	MOVQ	0(DI), DI
-	CALL	DI
-	RET
-
-// func onM(fn func())
-TEXT runtime·onM(SB), NOSPLIT, $0-8
-	MOVQ	fn+0(FP), DI	// DI = fn
-	get_tls(CX)
-	MOVQ	g(CX), AX	// AX = g
-	MOVQ	g_m(AX), BX	// BX = m
+	JEQ	noswitch
 
 	MOVQ	m_g0(BX), DX	// DX = g0
 	CMPQ	AX, DX
-	JEQ	onm
+	JEQ	noswitch
 
 	MOVQ	m_curg(BX), BP
 	CMPQ	AX, BP
-	JEQ	oncurg
+	JEQ	switch
 	
-	// Not g0, not curg. Must be gsignal, but that's not allowed.
-	// Hide call from linker nosplit analysis.
-	MOVQ	$runtime·badonm(SB), AX
+	// Bad: g is not gsignal, not g0, not curg. What is it?
+	MOVQ	$runtime·badsystemstack(SB), AX
 	CALL	AX
 
-oncurg:
+switch:
 	// save our state in g->sched.  Pretend to
-	// be switchtoM if the G stack is scanned.
-	MOVQ	$runtime·switchtoM(SB), BP
+	// be systemstack_switch if the G stack is scanned.
+	MOVQ	$runtime·systemstack_switch(SB), BP
 	MOVQ	BP, (g_sched+gobuf_pc)(AX)
 	MOVQ	SP, (g_sched+gobuf_sp)(AX)
 	MOVQ	AX, (g_sched+gobuf_g)(AX)
@@ -245,7 +232,7 @@
 	// switch to g0
 	MOVQ	DX, g(CX)
 	MOVQ	(g_sched+gobuf_sp)(DX), BX
-	// make it look like mstart called onM on g0, to stop traceback
+	// make it look like mstart called systemstack on g0, to stop traceback
 	SUBQ	$8, BX
 	MOVQ	$runtime·mstart(SB), DX
 	MOVQ	DX, 0(BX)
@@ -266,7 +253,7 @@
 	MOVQ	$0, (g_sched+gobuf_sp)(AX)
 	RET
 
-onm:
+noswitch:
 	// already on m stack, just call directly
 	MOVQ	DI, DX
 	MOVQ	0(DI), DI
@@ -726,7 +713,7 @@
 	// the same SP back to m->sched.sp. That seems redundant,
 	// but if an unrecovered panic happens, unwindm will
 	// restore the g->sched.sp from the stack location
-	// and then onM will try to use it. If we don't set it here,
+	// and then systemstack will try to use it. If we don't set it here,
 	// that restored SP will be uninitialized (typically 0) and
 	// will not be usable.
 	MOVQ	m_g0(BP), SI
@@ -2235,3 +2222,9 @@
 TEXT runtime·goexit(SB),NOSPLIT,$0-0
 	BYTE	$0x90	// NOP
 	CALL	runtime·goexit1(SB)	// does not return
+
+TEXT runtime·getg(SB),NOSPLIT,$0-8
+	get_tls(CX)
+	MOVQ	g(CX), AX
+	MOVQ	AX, ret+0(FP)
+	RET
diff --git a/src/runtime/asm_amd64p32.s b/src/runtime/asm_amd64p32.s
index 153564b..cead3cd 100644
--- a/src/runtime/asm_amd64p32.s
+++ b/src/runtime/asm_amd64p32.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
 
@@ -164,55 +165,42 @@
 	JMP	AX
 	RET
 
-// switchtoM is a dummy routine that onM leaves at the bottom
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
 // of the G stack.  We need to distinguish the routine that
 // lives at the bottom of the G stack from the one that lives
-// at the top of the M stack because the one at the top of
+// at the top of the system stack because the one at the top of
 // the M stack terminates the stack walk (see topofstack()).
-TEXT runtime·switchtoM(SB), NOSPLIT, $0-0
+TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
 	RET
 
-// func onM_signalok(fn func())
-TEXT runtime·onM_signalok(SB), NOSPLIT, $0-4
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB), NOSPLIT, $0-4
+	MOVL	fn+0(FP), DI	// DI = fn
 	get_tls(CX)
 	MOVL	g(CX), AX	// AX = g
 	MOVL	g_m(AX), BX	// BX = m
+
 	MOVL	m_gsignal(BX), DX	// DX = gsignal
 	CMPL	AX, DX
-	JEQ	ongsignal
-	JMP	runtime·onM(SB)
-
-ongsignal:
-	MOVL	fn+0(FP), DI	// DI = fn
-	MOVL	DI, DX
-	MOVL	0(DI), DI
-	CALL	DI
-	RET
-
-// func onM(fn func())
-TEXT runtime·onM(SB), NOSPLIT, $0-4
-	MOVL	fn+0(FP), DI	// DI = fn
-	get_tls(CX)
-	MOVL	g(CX), AX	// AX = g
-	MOVL	g_m(AX), BX	// BX = m
+	JEQ	noswitch
 
 	MOVL	m_g0(BX), DX	// DX = g0
 	CMPL	AX, DX
-	JEQ	onm
+	JEQ	noswitch
 
 	MOVL	m_curg(BX), R8
 	CMPL	AX, R8
-	JEQ	oncurg
+	JEQ	switch
 	
 	// Not g0, not curg. Must be gsignal, but that's not allowed.
 	// Hide call from linker nosplit analysis.
-	MOVL	$runtime·badonm(SB), AX
+	MOVL	$runtime·badsystemstack(SB), AX
 	CALL	AX
 
-oncurg:
+switch:
 	// save our state in g->sched.  Pretend to
-	// be switchtoM if the G stack is scanned.
-	MOVL	$runtime·switchtoM(SB), SI
+	// be systemstack_switch if the G stack is scanned.
+	MOVL	$runtime·systemstack_switch(SB), SI
 	MOVL	SI, (g_sched+gobuf_pc)(AX)
 	MOVL	SP, (g_sched+gobuf_sp)(AX)
 	MOVL	AX, (g_sched+gobuf_g)(AX)
@@ -236,7 +224,7 @@
 	MOVL	$0, (g_sched+gobuf_sp)(AX)
 	RET
 
-onm:
+noswitch:
 	// already on m stack, just call directly
 	MOVL	DI, DX
 	MOVL	0(DI), DI
@@ -1085,3 +1073,9 @@
 TEXT runtime·goexit(SB),NOSPLIT,$0-0
 	BYTE	$0x90	// NOP
 	CALL	runtime·goexit1(SB)	// does not return
+
+TEXT runtime·getg(SB),NOSPLIT,$0-4
+	get_tls(CX)
+	MOVL	g(CX), AX
+	MOVL	AX, ret+0(FP)
+	RET
diff --git a/src/runtime/asm_arm.s b/src/runtime/asm_arm.s
index 58aebf3..583c7ba 100644
--- a/src/runtime/asm_arm.s
+++ b/src/runtime/asm_arm.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
 
@@ -54,7 +55,7 @@
 nocgo:
 	// update stackguard after _cgo_init
 	MOVW	(g_stack+stack_lo)(g), R0
-	ADD	$const_StackGuard, R0
+	ADD	$const__StackGuard, R0
 	MOVW	R0, g_stackguard0(g)
 	MOVW	R0, g_stackguard1(g)
 
@@ -190,53 +191,42 @@
 	B	runtime·badmcall2(SB)
 	RET
 
-// switchtoM is a dummy routine that onM leaves at the bottom
+// systemstack_switch is a dummy routine that systemstack leaves at the bottom
 // of the G stack.  We need to distinguish the routine that
 // lives at the bottom of the G stack from the one that lives
-// at the top of the M stack because the one at the top of
-// the M stack terminates the stack walk (see topofstack()).
-TEXT runtime·switchtoM(SB),NOSPLIT,$0-0
+// at the top of the system stack because the one at the top of
+// the system stack terminates the stack walk (see topofstack()).
+TEXT runtime·systemstack_switch(SB),NOSPLIT,$0-0
 	MOVW	$0, R0
 	BL	(R0) // clobber lr to ensure push {lr} is kept
 	RET
 
-// func onM_signalok(fn func())
-TEXT runtime·onM_signalok(SB), NOSPLIT, $-4-4
-	MOVW	g_m(g), R1
-	MOVW	m_gsignal(R1), R2
-	CMP	g, R2
-	B.EQ	ongsignal
-	B	runtime·onM(SB)
-
-ongsignal:
-	MOVW	fn+0(FP), R0
-	MOVW	R0, R7
-	MOVW	0(R0), R0
-	BL	(R0)
-	RET
-
-// func onM(fn func())
-TEXT runtime·onM(SB),NOSPLIT,$0-4
+// func systemstack(fn func())
+TEXT runtime·systemstack(SB),NOSPLIT,$0-4
 	MOVW	fn+0(FP), R0	// R0 = fn
 	MOVW	g_m(g), R1	// R1 = m
 
+	MOVW	m_gsignal(R1), R2	// R2 = gsignal
+	CMP	g, R2
+	B.EQ	noswitch
+
 	MOVW	m_g0(R1), R2	// R2 = g0
 	CMP	g, R2
-	B.EQ	onm
+	B.EQ	noswitch
 
 	MOVW	m_curg(R1), R3
 	CMP	g, R3
-	B.EQ	oncurg
+	B.EQ	switch
 
-	// Not g0, not curg. Must be gsignal, but that's not allowed.
+	// Bad: g is not gsignal, not g0, not curg. What is it?
 	// Hide call from linker nosplit analysis.
-	MOVW	$runtime·badonm(SB), R0
+	MOVW	$runtime·badsystemstack(SB), R0
 	BL	(R0)
 
-oncurg:
+switch:
 	// save our state in g->sched.  Pretend to
-	// be switchtoM if the G stack is scanned.
-	MOVW	$runtime·switchtoM(SB), R3
+	// be systemstack_switch if the G stack is scanned.
+	MOVW	$runtime·systemstack_switch(SB), R3
 	ADD	$4, R3, R3 // get past push {lr}
 	MOVW	R3, (g_sched+gobuf_pc)(g)
 	MOVW	SP, (g_sched+gobuf_sp)(g)
@@ -249,7 +239,7 @@
 	BL	setg<>(SB)
 	MOVW	R5, R0
 	MOVW	(g_sched+gobuf_sp)(R2), R3
-	// make it look like mstart called onM on g0, to stop traceback
+	// make it look like mstart called systemstack on g0, to stop traceback
 	SUB	$4, R3, R3
 	MOVW	$runtime·mstart(SB), R4
 	MOVW	R4, 0(R3)
@@ -269,7 +259,7 @@
 	MOVW	R3, (g_sched+gobuf_sp)(g)
 	RET
 
-onm:
+noswitch:
 	MOVW	R0, R7
 	MOVW	0(R0), R0
 	BL	(R0)
@@ -564,7 +554,7 @@
 	// the same SP back to m->sched.sp. That seems redundant,
 	// but if an unrecovered panic happens, unwindm will
 	// restore the g->sched.sp from the stack location
-	// and then onM will try to use it. If we don't set it here,
+	// and then systemstack will try to use it. If we don't set it here,
 	// that restored SP will be uninitialized (typically 0) and
 	// will not be usable.
 	MOVW	g_m(g), R8
@@ -1326,3 +1316,7 @@
 TEXT runtime·goexit(SB),NOSPLIT,$-4-0
 	MOVW	R0, R0	// NOP
 	BL	runtime·goexit1(SB)	// does not return
+
+TEXT runtime·getg(SB),NOSPLIT,$-4-4
+	MOVW	g, ret+0(FP)
+	RET
diff --git a/src/runtime/atomic.go b/src/runtime/atomic.go
deleted file mode 100644
index a0e4d84..0000000
--- a/src/runtime/atomic.go
+++ /dev/null
@@ -1,81 +0,0 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !arm
-
-package runtime
-
-import "unsafe"
-
-//go:noescape
-func xadd(ptr *uint32, delta int32) uint32
-
-//go:noescape
-func xadd64(ptr *uint64, delta int64) uint64
-
-//go:noescape
-func xchg(ptr *uint32, new uint32) uint32
-
-//go:noescape
-func xchg64(ptr *uint64, new uint64) uint64
-
-// Cannot use noescape here: ptr does not but new does escape.
-// Instead use noescape(ptr) in wrapper below.
-func xchgp1(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
-
-//go:nosplit
-func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
-	old := xchgp1(noescape(ptr), new)
-	writebarrierptr_nostore((*uintptr)(ptr), uintptr(new))
-	return old
-}
-
-//go:noescape
-func xchguintptr(ptr *uintptr, new uintptr) uintptr
-
-//go:noescape
-func atomicload(ptr *uint32) uint32
-
-//go:noescape
-func atomicload64(ptr *uint64) uint64
-
-//go:noescape
-func atomicloadp(ptr unsafe.Pointer) unsafe.Pointer
-
-//go:noescape
-func atomicor8(ptr *uint8, val uint8)
-
-//go:noescape
-func cas64(ptr *uint64, old, new uint64) bool
-
-//go:noescape
-func atomicstore(ptr *uint32, val uint32)
-
-//go:noescape
-func atomicstore64(ptr *uint64, val uint64)
-
-// Cannot use noescape here: ptr does not but val does escape.
-// Instead use noescape(ptr) in wrapper below.
-func atomicstorep1(ptr unsafe.Pointer, val unsafe.Pointer)
-
-//go:nosplit
-func atomicstorep(ptr unsafe.Pointer, val unsafe.Pointer) {
-	atomicstorep1(noescape(ptr), val)
-	// TODO(rsc): Why does the compiler think writebarrierptr_nostore's dst argument escapes?
-	writebarrierptr_nostore((*uintptr)(noescape(ptr)), uintptr(val))
-}
-
-// Cannot use noescape here: ptr does not but new does escape.
-// Instead use noescape(ptr) in wrapper below.
-func casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
-
-//go:nosplit
-func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
-	ok := casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), old, new)
-	if !ok {
-		return false
-	}
-	writebarrierptr_nostore((*uintptr)(unsafe.Pointer(ptr)), uintptr(new))
-	return true
-}
diff --git a/src/runtime/atomic_386.c b/src/runtime/atomic_386.c
deleted file mode 100644
index 82d36f2..0000000
--- a/src/runtime/atomic_386.c
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "textflag.h"
-
-#pragma textflag NOSPLIT
-uint32
-runtime·atomicload(uint32 volatile* addr)
-{
-	return *addr;
-}
-
-#pragma textflag NOSPLIT
-void*
-runtime·atomicloadp(void* volatile* addr)
-{
-	return *addr;
-}
-
-#pragma textflag NOSPLIT
-uint64
-runtime·xadd64(uint64 volatile* addr, int64 v)
-{
-	uint64 old;
-
-	do
-		old = *addr;
-	while(!runtime·cas64(addr, old, old+v));
-
-	return old+v;
-}
-
-#pragma textflag NOSPLIT
-uint64
-runtime·xchg64(uint64 volatile* addr, uint64 v)
-{
-	uint64 old;
-
-	do
-		old = *addr;
-	while(!runtime·cas64(addr, old, v));
-
-	return old;
-}
diff --git a/src/runtime/atomic_386.go b/src/runtime/atomic_386.go
new file mode 100644
index 0000000..5563432
--- /dev/null
+++ b/src/runtime/atomic_386.go
@@ -0,0 +1,91 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// The calls to nop are to keep these functions from being inlined.
+// If they are inlined we have no guarantee that later rewrites of the
+// code by optimizers will preserve the relative order of memory accesses.
+
+//go:nosplit
+func atomicload(ptr *uint32) uint32 {
+	nop()
+	return *ptr
+}
+
+//go:nosplit
+func atomicloadp(ptr unsafe.Pointer) unsafe.Pointer {
+	nop()
+	return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+func xadd64(ptr *uint64, delta int64) uint64 {
+	for {
+		old := *ptr
+		if cas64(ptr, old, old+uint64(delta)) {
+			return old + uint64(delta)
+		}
+	}
+}
+
+//go:nosplit
+func xchg64(ptr *uint64, new uint64) uint64 {
+	for {
+		old := *ptr
+		if cas64(ptr, old, new) {
+			return old
+		}
+	}
+}
+
+//go:noescape
+func xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func xchg(ptr *uint32, new uint32) uint32
+
+// xchgp cannot have a go:noescape annotation, because
+// while ptr does not escape, new does. If new is marked as
+// not escaping, the compiler will make incorrect escape analysis
+// decisions about the value being xchg'ed.
+// Instead, make xchgp a wrapper around the actual atomic.
+// When calling the wrapper we mark ptr as noescape explicitly.
+
+//go:nosplit
+func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
+	return xchgp1(noescape(ptr), new)
+}
+
+func xchgp1(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func atomicload64(ptr *uint64) uint64
+
+//go:noescape
+func atomicor8(ptr *uint8, val uint8)
+
+//go:noescape
+func cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func atomicstore(ptr *uint32, val uint32)
+
+//go:noescape
+func atomicstore64(ptr *uint64, val uint64)
+
+// atomicstorep cannot have a go:noescape annotation.
+// See comment above for xchgp.
+
+//go:nosplit
+func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
+	atomicstorep1(noescape(ptr), new)
+}
+
+func atomicstorep1(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/atomic_amd64x.c b/src/runtime/atomic_amd64x.c
deleted file mode 100644
index 7be57ac..0000000
--- a/src/runtime/atomic_amd64x.c
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64 amd64p32
-
-#include "runtime.h"
-#include "textflag.h"
-
-#pragma textflag NOSPLIT
-uint32
-runtime·atomicload(uint32 volatile* addr)
-{
-	return *addr;
-}
-
-#pragma textflag NOSPLIT
-uint64
-runtime·atomicload64(uint64 volatile* addr)
-{
-	return *addr;
-}
-
-#pragma textflag NOSPLIT
-void*
-runtime·atomicloadp(void* volatile* addr)
-{
-	return *addr;
-}
diff --git a/src/runtime/atomic_amd64x.go b/src/runtime/atomic_amd64x.go
new file mode 100644
index 0000000..f2dd584
--- /dev/null
+++ b/src/runtime/atomic_amd64x.go
@@ -0,0 +1,82 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+
+package runtime
+
+import "unsafe"
+
+// The calls to nop are to keep these functions from being inlined.
+// If they are inlined we have no guarantee that later rewrites of the
+// code by optimizers will preserve the relative order of memory accesses.
+
+//go:nosplit
+func atomicload(ptr *uint32) uint32 {
+	nop()
+	return *ptr
+}
+
+//go:nosplit
+func atomicloadp(ptr unsafe.Pointer) unsafe.Pointer {
+	nop()
+	return *(*unsafe.Pointer)(ptr)
+}
+
+//go:nosplit
+func atomicload64(ptr *uint64) uint64 {
+	nop()
+	return *ptr
+}
+
+//go:noescape
+func xadd(ptr *uint32, delta int32) uint32
+
+//go:noescape
+func xadd64(ptr *uint64, delta int64) uint64
+
+//go:noescape
+func xchg(ptr *uint32, new uint32) uint32
+
+//go:noescape
+func xchg64(ptr *uint64, new uint64) uint64
+
+// xchgp cannot have a go:noescape annotation, because
+// while ptr does not escape, new does. If new is marked as
+// not escaping, the compiler will make incorrect escape analysis
+// decisions about the value being xchg'ed.
+// Instead, make xchgp a wrapper around the actual atomic.
+// When calling the wrapper we mark ptr as noescape explicitly.
+
+//go:nosplit
+func xchgp(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer {
+	return xchgp1(noescape(ptr), new)
+}
+
+func xchgp1(ptr unsafe.Pointer, new unsafe.Pointer) unsafe.Pointer
+
+//go:noescape
+func xchguintptr(ptr *uintptr, new uintptr) uintptr
+
+//go:noescape
+func atomicor8(ptr *uint8, val uint8)
+
+//go:noescape
+func cas64(ptr *uint64, old, new uint64) bool
+
+//go:noescape
+func atomicstore(ptr *uint32, val uint32)
+
+//go:noescape
+func atomicstore64(ptr *uint64, val uint64)
+
+// atomicstorep cannot have a go:noescape annotation.
+// See comment above for xchgp.
+
+//go:nosplit
+func atomicstorep(ptr unsafe.Pointer, new unsafe.Pointer) {
+	atomicstorep1(noescape(ptr), new)
+}
+
+func atomicstorep1(ptr unsafe.Pointer, val unsafe.Pointer)
diff --git a/src/runtime/atomic_arm.go b/src/runtime/atomic_arm.go
index b1632cd..fd55a0a 100644
--- a/src/runtime/atomic_arm.go
+++ b/src/runtime/atomic_arm.go
@@ -85,7 +85,7 @@
 //go:nosplit
 func cas64(addr *uint64, old, new uint64) bool {
 	var ok bool
-	onM(func() {
+	systemstack(func() {
 		lock(addrLock(addr))
 		if *addr == old {
 			*addr = new
@@ -99,7 +99,7 @@
 //go:nosplit
 func xadd64(addr *uint64, delta int64) uint64 {
 	var r uint64
-	onM(func() {
+	systemstack(func() {
 		lock(addrLock(addr))
 		r = *addr + uint64(delta)
 		*addr = r
@@ -111,7 +111,7 @@
 //go:nosplit
 func xchg64(addr *uint64, v uint64) uint64 {
 	var r uint64
-	onM(func() {
+	systemstack(func() {
 		lock(addrLock(addr))
 		r = *addr
 		*addr = v
@@ -123,7 +123,7 @@
 //go:nosplit
 func atomicload64(addr *uint64) uint64 {
 	var r uint64
-	onM(func() {
+	systemstack(func() {
 		lock(addrLock(addr))
 		r = *addr
 		unlock(addrLock(addr))
@@ -133,7 +133,7 @@
 
 //go:nosplit
 func atomicstore64(addr *uint64, v uint64) {
-	onM(func() {
+	systemstack(func() {
 		lock(addrLock(addr))
 		*addr = v
 		unlock(addrLock(addr))
diff --git a/src/runtime/cgo.go b/src/runtime/cgo.go
new file mode 100644
index 0000000..7e6b253
--- /dev/null
+++ b/src/runtime/cgo.go
@@ -0,0 +1,23 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//go:cgo_export_static main
+
+// Filled in by runtime/cgo when linked into binary.
+
+//go:linkname _cgo_init _cgo_init
+//go:linkname _cgo_malloc _cgo_malloc
+//go:linkname _cgo_free _cgo_free
+//go:linkname _cgo_thread_start _cgo_thread_start
+
+var (
+	_cgo_init         unsafe.Pointer
+	_cgo_malloc       unsafe.Pointer
+	_cgo_free         unsafe.Pointer
+	_cgo_thread_start unsafe.Pointer
+)
diff --git a/src/runtime/cgo/callbacks.c b/src/runtime/cgo/callbacks.c
deleted file mode 100644
index 282beee..0000000
--- a/src/runtime/cgo/callbacks.c
+++ /dev/null
@@ -1,83 +0,0 @@
-// Copyright 2011 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "../runtime.h"
-#include "../cgocall.h"
-#include "textflag.h"
-
-// These utility functions are available to be called from code
-// compiled with gcc via crosscall2.
-
-// The declaration of crosscall2 is:
-//   void crosscall2(void (*fn)(void *, int), void *, int);
-// 
-// We need to export the symbol crosscall2 in order to support
-// callbacks from shared libraries. This applies regardless of
-// linking mode.
-#pragma cgo_export_static crosscall2
-#pragma cgo_export_dynamic crosscall2
-
-// Allocate memory.  This allocates the requested number of bytes in
-// memory controlled by the Go runtime.  The allocated memory will be
-// zeroed.  You are responsible for ensuring that the Go garbage
-// collector can see a pointer to the allocated memory for as long as
-// it is valid, e.g., by storing a pointer in a local variable in your
-// C function, or in memory allocated by the Go runtime.  If the only
-// pointers are in a C global variable or in memory allocated via
-// malloc, then the Go garbage collector may collect the memory.
-
-// Call like this in code compiled with gcc:
-//   struct { size_t len; void *ret; } a;
-//   a.len = /* number of bytes to allocate */;
-//   crosscall2(_cgo_allocate, &a, sizeof a);
-//   /* Here a.ret is a pointer to the allocated memory.  */
-
-void runtime·_cgo_allocate_internal(void);
-
-#pragma cgo_export_static _cgo_allocate
-#pragma cgo_export_dynamic _cgo_allocate
-#pragma textflag NOSPLIT
-void
-_cgo_allocate(void *a, int32 n)
-{
-	runtime·cgocallback((void(*)(void))runtime·_cgo_allocate_internal, a, n);
-}
-
-// Panic.  The argument is converted into a Go string.
-
-// Call like this in code compiled with gcc:
-//   struct { const char *p; } a;
-//   a.p = /* string to pass to panic */;
-//   crosscall2(_cgo_panic, &a, sizeof a);
-//   /* The function call will not return.  */
-
-void runtime·_cgo_panic_internal(void);
-
-#pragma cgo_export_static _cgo_panic
-#pragma cgo_export_dynamic _cgo_panic
-#pragma textflag NOSPLIT
-void
-_cgo_panic(void *a, int32 n)
-{
-	runtime·cgocallback((void(*)(void))runtime·_cgo_panic_internal, a, n);
-}
-
-#pragma cgo_import_static x_cgo_init
-extern void x_cgo_init(G*);
-void (*_cgo_init)(G*) = x_cgo_init;
-
-#pragma cgo_import_static x_cgo_malloc
-extern void x_cgo_malloc(void*);
-void (*_cgo_malloc)(void*) = x_cgo_malloc;
-
-#pragma cgo_import_static x_cgo_free
-extern void x_cgo_free(void*);
-void (*_cgo_free)(void*) = x_cgo_free;
-
-#pragma cgo_import_static x_cgo_thread_start
-extern void x_cgo_thread_start(void*);
-void (*_cgo_thread_start)(void*) = x_cgo_thread_start;
-
-#pragma cgo_export_static _cgo_topofstack
-#pragma cgo_export_dynamic _cgo_topofstack
diff --git a/src/runtime/cgo/callbacks.go b/src/runtime/cgo/callbacks.go
new file mode 100644
index 0000000..1e8b590
--- /dev/null
+++ b/src/runtime/cgo/callbacks.go
@@ -0,0 +1,95 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package cgo
+
+import "unsafe"
+
+// These utility functions are available to be called from code
+// compiled with gcc via crosscall2.
+
+// cgocallback is defined in runtime
+//go:linkname _runtime_cgocallback runtime.cgocallback
+func _runtime_cgocallback(unsafe.Pointer, unsafe.Pointer, uintptr)
+
+// The declaration of crosscall2 is:
+//   void crosscall2(void (*fn)(void *, int), void *, int);
+//
+// We need to export the symbol crosscall2 in order to support
+// callbacks from shared libraries. This applies regardless of
+// linking mode.
+//go:cgo_export_static crosscall2
+//go:cgo_export_dynamic crosscall2
+
+// Allocate memory.  This allocates the requested number of bytes in
+// memory controlled by the Go runtime.  The allocated memory will be
+// zeroed.  You are responsible for ensuring that the Go garbage
+// collector can see a pointer to the allocated memory for as long as
+// it is valid, e.g., by storing a pointer in a local variable in your
+// C function, or in memory allocated by the Go runtime.  If the only
+// pointers are in a C global variable or in memory allocated via
+// malloc, then the Go garbage collector may collect the memory.
+
+// Call like this in code compiled with gcc:
+//   struct { size_t len; void *ret; } a;
+//   a.len = /* number of bytes to allocate */;
+//   crosscall2(_cgo_allocate, &a, sizeof a);
+//   /* Here a.ret is a pointer to the allocated memory.  */
+
+//go:linkname _runtime_cgo_allocate_internal runtime._cgo_allocate_internal
+var _runtime_cgo_allocate_internal byte
+
+//go:linkname _cgo_allocate _cgo_allocate
+//go:cgo_export_static _cgo_allocate
+//go:cgo_export_dynamic _cgo_allocate
+//go:nosplit
+func _cgo_allocate(a unsafe.Pointer, n int32) {
+	_runtime_cgocallback(unsafe.Pointer(&_runtime_cgo_allocate_internal), a, uintptr(n))
+}
+
+// Panic.  The argument is converted into a Go string.
+
+// Call like this in code compiled with gcc:
+//   struct { const char *p; } a;
+//   a.p = /* string to pass to panic */;
+//   crosscall2(_cgo_panic, &a, sizeof a);
+//   /* The function call will not return.  */
+
+//go:linkname _runtime_cgo_panic_internal runtime._cgo_panic_internal
+var _runtime_cgo_panic_internal byte
+
+//go:linkname _cgo_panic _cgo_panic
+//go:cgo_export_static _cgo_panic
+//go:cgo_export_dynamic _cgo_panic
+//go:nosplit
+func _cgo_panic(a unsafe.Pointer, n int32) {
+	_runtime_cgocallback(unsafe.Pointer(&_runtime_cgo_panic_internal), a, uintptr(n))
+}
+
+//go:cgo_import_static x_cgo_init
+//go:linkname x_cgo_init x_cgo_init
+//go:linkname _cgo_init _cgo_init
+var x_cgo_init byte
+var _cgo_init = &x_cgo_init
+
+//go:cgo_import_static x_cgo_malloc
+//go:linkname x_cgo_malloc x_cgo_malloc
+//go:linkname _cgo_malloc _cgo_malloc
+var x_cgo_malloc byte
+var _cgo_malloc = &x_cgo_malloc
+
+//go:cgo_import_static x_cgo_free
+//go:linkname x_cgo_free x_cgo_free
+//go:linkname _cgo_free _cgo_free
+var x_cgo_free byte
+var _cgo_free = &x_cgo_free
+
+//go:cgo_import_static x_cgo_thread_start
+//go:linkname x_cgo_thread_start x_cgo_thread_start
+//go:linkname _cgo_thread_start _cgo_thread_start
+var x_cgo_thread_start byte
+var _cgo_thread_start = &x_cgo_thread_start
+
+//go:cgo_export_static _cgo_topofstack
+//go:cgo_export_dynamic _cgo_topofstack
diff --git a/src/runtime/cgo/dragonfly.c b/src/runtime/cgo/dragonfly.go
similarity index 64%
rename from src/runtime/cgo/dragonfly.c
rename to src/runtime/cgo/dragonfly.go
index c233c8b..69d52b5 100644
--- a/src/runtime/cgo/dragonfly.c
+++ b/src/runtime/cgo/dragonfly.go
@@ -4,16 +4,16 @@
 
 // +build dragonfly
 
-#include "textflag.h"
+package cgo
+
+import _ "unsafe" // for go:linkname
 
 // Supply environ and __progname, because we don't
 // link against the standard DragonFly crt0.o and the
 // libc dynamic library needs them.
 
-#pragma dataflag NOPTR
-char *environ[1];
-#pragma dataflag NOPTR
-char *__progname;
+//go:linkname _environ environ
+//go:linkname _progname __progname
 
-#pragma dynexport environ environ
-#pragma dynexport __progname __progname
+var _environ uintptr
+var _progname uintptr
diff --git a/src/runtime/cgo/freebsd.c b/src/runtime/cgo/freebsd.c
deleted file mode 100644
index 4876b2a..0000000
--- a/src/runtime/cgo/freebsd.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build freebsd
-
-#include "textflag.h"
-
-// Supply environ and __progname, because we don't
-// link against the standard FreeBSD crt0.o and the
-// libc dynamic library needs them.
-
-#pragma dataflag NOPTR
-char *environ[1];
-#pragma dataflag NOPTR
-char *__progname;
-
-#pragma dynexport environ environ
-#pragma dynexport __progname __progname
diff --git a/src/runtime/cgo/freebsd.go b/src/runtime/cgo/freebsd.go
new file mode 100644
index 0000000..99cf3fb
--- /dev/null
+++ b/src/runtime/cgo/freebsd.go
@@ -0,0 +1,22 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build freebsd
+
+package cgo
+
+import _ "unsafe" // for go:linkname
+
+// Supply environ and __progname, because we don't
+// link against the standard FreeBSD crt0.o and the
+// libc dynamic library needs them.
+
+//go:linkname _environ environ
+//go:linkname _progname __progname
+
+//go:cgo_export_dynamic environ
+//go:cgo_export_dynamic __progname
+
+var _environ uintptr
+var _progname uintptr
diff --git a/src/runtime/cgo/iscgo.c b/src/runtime/cgo/iscgo.go
similarity index 70%
rename from src/runtime/cgo/iscgo.c
rename to src/runtime/cgo/iscgo.go
index 0907a19..61cba73 100644
--- a/src/runtime/cgo/iscgo.c
+++ b/src/runtime/cgo/iscgo.go
@@ -9,7 +9,12 @@
 // correctly, and sometimes they break.  This variable is a
 // backup: it depends only on old C style static linking rules.
 
-#include "../runtime.h"
+package cgo
 
-bool runtime·iscgo = 1;
-uint32 runtime·needextram = 1;  // create an extra M on first cgo call
+import _ "unsafe" // for go:linkname
+
+//go:linkname _iscgo runtime.iscgo
+var _iscgo bool = true
+
+//go:linkname _needextram runtime.needextram
+var _needextram uint32 = 1 // create an extra M on first cgo call
diff --git a/src/runtime/cgo/netbsd.c b/src/runtime/cgo/netbsd.go
similarity index 64%
rename from src/runtime/cgo/netbsd.c
rename to src/runtime/cgo/netbsd.go
index 076cc87..ac6b18a 100644
--- a/src/runtime/cgo/netbsd.c
+++ b/src/runtime/cgo/netbsd.go
@@ -4,16 +4,16 @@
 
 // +build netbsd
 
-#include "textflag.h"
+package cgo
+
+import _ "unsafe" // for go:linkname
 
 // Supply environ and __progname, because we don't
 // link against the standard NetBSD crt0.o and the
 // libc dynamic library needs them.
 
-#pragma dataflag NOPTR
-char *environ[1];
-#pragma dataflag NOPTR
-char *__progname;
+//go:linkname _environ environ
+//go:linkname _progname __progname
 
-#pragma dynexport environ environ
-#pragma dynexport __progname __progname
+var _environ uintptr
+var _progname uintptr
diff --git a/src/runtime/cgo/openbsd.c b/src/runtime/cgo/openbsd.c
deleted file mode 100644
index 4766495..0000000
--- a/src/runtime/cgo/openbsd.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build openbsd
-
-#include "textflag.h"
-
-// Supply environ, __progname and __guard_local, because
-// we don't link against the standard OpenBSD crt0.o and
-// the libc dynamic library needs them.
-
-#pragma dataflag NOPTR
-char *environ[1];
-#pragma dataflag NOPTR
-char *__progname;
-long __guard_local;
-
-#pragma dynexport environ environ
-#pragma dynexport __progname __progname
-
-// This is normally marked as hidden and placed in the
-// .openbsd.randomdata section.
-#pragma dynexport __guard_local __guard_local
-
-// We override pthread_create to support PT_TLS.
-#pragma dynexport pthread_create pthread_create
diff --git a/src/runtime/cgo/openbsd.go b/src/runtime/cgo/openbsd.go
new file mode 100644
index 0000000..61af3a8
--- /dev/null
+++ b/src/runtime/cgo/openbsd.go
@@ -0,0 +1,31 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build openbsd
+
+package cgo
+
+import _ "unsafe" // for go:linkname
+
+// Supply environ, __progname and __guard_local, because
+// we don't link against the standard OpenBSD crt0.o and
+// the libc dynamic library needs them.
+
+//go:linkname _environ environ
+//go:linkname _progname __progname
+//go:linkname _guard_local __guard_local
+
+var _environ uintptr
+var _progname uintptr
+var _guard_local uintptr
+
+//go:cgo_export_dynamic environ environ
+//go:cgo_export_dynamic __progname __progname
+
+// This is normally marked as hidden and placed in the
+// .openbsd.randomdata section.
+//go:cgo_export_dynamic __guard_local __guard_local
+
+// We override pthread_create to support PT_TLS.
+//go:cgo_export_dynamic pthread_create pthread_create
diff --git a/src/runtime/cgo/setenv.c b/src/runtime/cgo/setenv.c
deleted file mode 100644
index 76d88cb..0000000
--- a/src/runtime/cgo/setenv.c
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2011 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux netbsd openbsd
-
-#pragma cgo_import_static x_cgo_setenv
-#pragma cgo_import_static x_cgo_unsetenv
-
-void x_cgo_setenv(char**);
-void (*runtime·_cgo_setenv)(char**) = x_cgo_setenv;
-void x_cgo_unsetenv(char**);
-void (*runtime·_cgo_unsetenv)(char**) = x_cgo_unsetenv;
diff --git a/src/runtime/cgo/setenv.go b/src/runtime/cgo/setenv.go
new file mode 100644
index 0000000..97c8c6a
--- /dev/null
+++ b/src/runtime/cgo/setenv.go
@@ -0,0 +1,21 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd
+
+package cgo
+
+import _ "unsafe" // for go:linkname
+
+//go:cgo_import_static x_cgo_setenv
+//go:linkname x_cgo_setenv x_cgo_setenv
+//go:linkname _cgo_setenv runtime._cgo_setenv
+var x_cgo_setenv byte
+var _cgo_setenv = &x_cgo_setenv
+
+//go:cgo_import_static x_cgo_unsetenv
+//go:linkname x_cgo_unsetenv x_cgo_unsetenv
+//go:linkname _cgo_unsetenv runtime._cgo_unsetenv
+var x_cgo_unsetenv byte
+var _cgo_unsetenv = &x_cgo_unsetenv
diff --git a/src/runtime/cgocall.go b/src/runtime/cgocall.go
index 7fd9146..258cabf 100644
--- a/src/runtime/cgocall.go
+++ b/src/runtime/cgocall.go
@@ -103,7 +103,7 @@
 
 	// Create an extra M for callbacks on threads not created by Go on first cgo call.
 	if needextram == 1 && cas(&needextram, 1, 0) {
-		onM(newextram)
+		systemstack(newextram)
 	}
 
 	/*
@@ -127,9 +127,9 @@
 	 * so it is safe to call while "in a system call", outside
 	 * the $GOMAXPROCS accounting.
 	 */
-	entersyscall()
+	entersyscall(0)
 	errno := asmcgocall_errno(fn, arg)
-	exitsyscall()
+	exitsyscall(0)
 
 	return errno
 }
@@ -153,17 +153,13 @@
 
 // Helper functions for cgo code.
 
-// Filled by schedinit from corresponding C variables,
-// which are in turn filled in by dynamic linker when Cgo is available.
-var cgoMalloc, cgoFree unsafe.Pointer
-
 func cmalloc(n uintptr) unsafe.Pointer {
 	var args struct {
 		n   uint64
 		ret unsafe.Pointer
 	}
 	args.n = uint64(n)
-	cgocall(cgoMalloc, unsafe.Pointer(&args))
+	cgocall(_cgo_malloc, unsafe.Pointer(&args))
 	if args.ret == nil {
 		gothrow("C malloc failed")
 	}
@@ -171,7 +167,7 @@
 }
 
 func cfree(p unsafe.Pointer) {
-	cgocall(cgoFree, p)
+	cgocall(_cgo_free, p)
 }
 
 // Call from C back to Go.
@@ -189,17 +185,17 @@
 	// save syscall* and let reentersyscall restore them.
 	savedsp := unsafe.Pointer(gp.syscallsp)
 	savedpc := gp.syscallpc
-	exitsyscall() // coming out of cgo call
+	exitsyscall(0) // coming out of cgo call
 	cgocallbackg1()
 	// going back to cgo call
-	reentersyscall(savedpc, savedsp)
+	reentersyscall(savedpc, uintptr(savedsp))
 }
 
 func cgocallbackg1() {
 	gp := getg()
 	if gp.m.needextram {
 		gp.m.needextram = false
-		onM(newextram)
+		systemstack(newextram)
 	}
 
 	// Add entry to defer stack in case of panic.
diff --git a/src/runtime/chan.go b/src/runtime/chan.go
index 0049701..bb0110f 100644
--- a/src/runtime/chan.go
+++ b/src/runtime/chan.go
@@ -26,7 +26,7 @@
 	if hchanSize%maxAlign != 0 || elem.align > maxAlign {
 		gothrow("makechan: bad alignment")
 	}
-	if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (maxmem-hchanSize)/uintptr(elem.size)) {
+	if size < 0 || int64(uintptr(size)) != size || (elem.size > 0 && uintptr(size) > (_MaxMem-hchanSize)/uintptr(elem.size)) {
 		panic("makechan: size out of range")
 	}
 
diff --git a/src/runtime/chan.h b/src/runtime/chan.h
deleted file mode 100644
index c34ff15..0000000
--- a/src/runtime/chan.h
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define	MAXALIGN	8
-
-typedef	struct	WaitQ	WaitQ;
-typedef	struct	Select	Select;
-typedef	struct	Scase	Scase;
-
-struct	WaitQ
-{
-	SudoG*	first;
-	SudoG*	last;
-};
-
-struct	Hchan
-{
-	uintgo	qcount;			// total data in the q
-	uintgo	dataqsiz;		// size of the circular q
-	byte*	buf;
-	uint16	elemsize;
-	uint32	closed;
-	Type*	elemtype;		// element type
-	uintgo	sendx;			// send index
-	uintgo	recvx;			// receive index
-	WaitQ	recvq;			// list of recv waiters
-	WaitQ	sendq;			// list of send waiters
-	Mutex	lock;
-};
-
-// Buffer follows Hchan immediately in memory.
-// chanbuf(c, i) is pointer to the i'th slot in the buffer.
-#define chanbuf(c, i) ((byte*)((c)->buf)+(uintptr)(c)->elemsize*(i))
-
-enum
-{
-	debug = 0,
-
-	// Scase.kind
-	CaseRecv,
-	CaseSend,
-	CaseDefault,
-};
-
-// Known to compiler.
-// Changes here must also be made in src/cmd/gc/select.c's selecttype.
-struct	Scase
-{
-	void*	elem;			// data element
-	Hchan*	chan;			// chan
-	uintptr	pc;			// return pc
-	uint16	kind;
-	uint16	so;			// vararg of selected bool
-	bool*	receivedp;		// pointer to received bool (recv2)
-	int64	releasetime;
-};
-
-// Known to compiler.
-// Changes here must also be made in src/cmd/gc/select.c's selecttype.
-struct	Select
-{
-	uint16	tcase;			// total count of scase[]
-	uint16	ncase;			// currently filled scase[]
-	uint16*	pollorder;		// case poll order
-	Hchan**	lockorder;		// channel lock order
-	Scase	scase[1];		// one per case (in order of appearance)
-};
diff --git a/src/runtime/chan1.go b/src/runtime/chan1.go
new file mode 100644
index 0000000..000775b
--- /dev/null
+++ b/src/runtime/chan1.go
@@ -0,0 +1,61 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//#define	MAXALIGN	8
+
+type waitq struct {
+	first *sudog
+	last  *sudog
+}
+
+type hchan struct {
+	qcount   uint // total data in the q
+	dataqsiz uint // size of the circular q
+	buf      *byte
+	elemsize uint16
+	closed   uint32
+	elemtype *_type // element type
+	sendx    uint   // send index
+	recvx    uint   // receive index
+	recvq    waitq  // list of recv waiters
+	sendq    waitq  // list of send waiters
+	lock     mutex
+}
+
+// Buffer follows Hchan immediately in memory.
+// chanbuf(c, i) is pointer to the i'th slot in the buffer.
+// #define chanbuf(c, i) ((byte*)((c)->buf)+(uintptr)(c)->elemsize*(i))
+
+const (
+	// scase.kind
+	_CaseRecv = iota
+	_CaseSend
+	_CaseDefault
+)
+
+// Known to compiler.
+// Changes here must also be made in src/cmd/gc/select.c's selecttype.
+type scase struct {
+	elem        unsafe.Pointer // data element
+	_chan       *hchan         // chan
+	pc          uintptr        // return pc
+	kind        uint16
+	so          uint16 // vararg of selected bool
+	receivedp   *bool  // pointer to received bool (recv2)
+	releasetime int64
+}
+
+// Known to compiler.
+// Changes here must also be made in src/cmd/gc/select.c's selecttype.
+type _select struct {
+	tcase     uint16   // total count of scase[]
+	ncase     uint16   // currently filled scase[]
+	pollorder *uint16  // case poll order
+	lockorder **hchan  // channel lock order
+	scase     [1]scase // one per case (in order of appearance)
+}
diff --git a/src/runtime/complex.go b/src/runtime/complex.go
index ec50f89..73f1161 100644
--- a/src/runtime/complex.go
+++ b/src/runtime/complex.go
@@ -4,28 +4,47 @@
 
 package runtime
 
+func isposinf(f float64) bool { return f > maxFloat64 }
+func isneginf(f float64) bool { return f < -maxFloat64 }
+func isnan(f float64) bool    { return f != f }
+
+func nan() float64 {
+	var f float64 = 0
+	return f / f
+}
+
+func posinf() float64 {
+	var f float64 = maxFloat64
+	return f * f
+}
+
+func neginf() float64 {
+	var f float64 = maxFloat64
+	return -f * f
+}
+
 func complex128div(n complex128, d complex128) complex128 {
 	// Special cases as in C99.
-	ninf := real(n) == posinf || real(n) == neginf ||
-		imag(n) == posinf || imag(n) == neginf
-	dinf := real(d) == posinf || real(d) == neginf ||
-		imag(d) == posinf || imag(d) == neginf
+	ninf := isposinf(real(n)) || isneginf(real(n)) ||
+		isposinf(imag(n)) || isneginf(imag(n))
+	dinf := isposinf(real(d)) || isneginf(real(d)) ||
+		isposinf(imag(d)) || isneginf(imag(d))
 
-	nnan := !ninf && (real(n) != real(n) || imag(n) != imag(n))
-	dnan := !dinf && (real(d) != real(d) || imag(d) != imag(d))
+	nnan := !ninf && (isnan(real(n)) || isnan(imag(n)))
+	dnan := !dinf && (isnan(real(d)) || isnan(imag(d)))
 
 	switch {
 	case nnan || dnan:
-		return complex(nan, nan)
+		return complex(nan(), nan())
 	case ninf && !dinf:
-		return complex(posinf, posinf)
+		return complex(posinf(), posinf())
 	case !ninf && dinf:
 		return complex(0, 0)
 	case real(d) == 0 && imag(d) == 0:
 		if real(n) == 0 && imag(n) == 0 {
-			return complex(nan, nan)
+			return complex(nan(), nan())
 		} else {
-			return complex(posinf, posinf)
+			return complex(posinf(), posinf())
 		}
 	default:
 		// Standard complex arithmetic, factored to avoid unnecessary overflow.
diff --git a/src/runtime/cpuprof.go b/src/runtime/cpuprof.go
index 8b1c1c6..d56678e 100644
--- a/src/runtime/cpuprof.go
+++ b/src/runtime/cpuprof.go
@@ -101,12 +101,10 @@
 	eod = [3]uintptr{0, 1, 0}
 )
 
-func setcpuprofilerate_m() // proc.c
-
 func setcpuprofilerate(hz int32) {
-	g := getg()
-	g.m.scalararg[0] = uintptr(hz)
-	onM(setcpuprofilerate_m)
+	systemstack(func() {
+		setcpuprofilerate_m(hz)
+	})
 }
 
 // lostProfileData is a no-op function used in profiles
diff --git a/src/runtime/cputicks.go b/src/runtime/cputicks.go
new file mode 100644
index 0000000..e0593d5
--- /dev/null
+++ b/src/runtime/cputicks.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !arm
+
+package runtime
+
+// careful: cputicks is not guaranteed to be monotonic!  In particular, we have
+// noticed drift between cpus on certain os/arch combinations.  See issue 8976.
+func cputicks() int64
diff --git a/src/runtime/debug.go b/src/runtime/debug.go
index 4414dd5..105b79c 100644
--- a/src/runtime/debug.go
+++ b/src/runtime/debug.go
@@ -6,18 +6,6 @@
 
 import "unsafe"
 
-// Breakpoint executes a breakpoint trap.
-func Breakpoint()
-
-// LockOSThread wires the calling goroutine to its current operating system thread.
-// Until the calling goroutine exits or calls UnlockOSThread, it will always
-// execute in that thread, and no other goroutine can.
-func LockOSThread()
-
-// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
-// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
-func UnlockOSThread()
-
 // GOMAXPROCS sets the maximum number of CPUs that can be executing
 // simultaneously and returns the previous setting.  If n < 1, it does not
 // change the current setting.
@@ -37,14 +25,14 @@
 	semacquire(&worldsema, false)
 	gp := getg()
 	gp.m.gcing = 1
-	onM(stoptheworld)
+	systemstack(stoptheworld)
 
 	// newprocs will be processed by starttheworld
 	newprocs = int32(n)
 
 	gp.m.gcing = 0
 	semrelease(&worldsema)
-	onM(starttheworld)
+	systemstack(starttheworld)
 	return ret
 }
 
@@ -66,5 +54,3 @@
 func NumGoroutine() int {
 	return int(gcount())
 }
-
-func gcount() int32
diff --git a/src/runtime/defs.c b/src/runtime/defs.c
deleted file mode 100644
index b0a9b20..0000000
--- a/src/runtime/defs.c
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file is compiled by cmd/dist to obtain debug information
-// about the given header files.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "type.h"
-#include "race.h"
-#include "chan.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
diff --git a/src/runtime/defs1_netbsd_386.go b/src/runtime/defs1_netbsd_386.go
new file mode 100644
index 0000000..e39fd04
--- /dev/null
+++ b/src/runtime/defs1_netbsd_386.go
@@ -0,0 +1,171 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_netbsd.go defs_netbsd_386.go
+
+package runtime
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x6
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = 0x0
+	_EVFILT_WRITE = 0x1
+)
+
+type sigaltstackt struct {
+	ss_sp    *byte
+	ss_size  uint32
+	ss_flags int32
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type siginfo struct {
+	_signo  int32
+	_code   int32
+	_errno  int32
+	_reason [20]byte
+}
+
+type stackt struct {
+	ss_sp    *byte
+	ss_size  uint32
+	ss_flags int32
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int32
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int32
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type mcontextt struct {
+	__gregs     [19]int32
+	__fpregs    [644]byte
+	_mc_tlsbase int32
+}
+
+type ucontextt struct {
+	uc_flags    uint32
+	uc_link     *ucontextt
+	uc_sigmask  sigset
+	uc_stack    stackt
+	uc_mcontext mcontextt
+	__uc_pad    [4]int32
+}
+
+type keventt struct {
+	ident  uint32
+	filter uint32
+	flags  uint32
+	fflags uint32
+	data   int64
+	udata  *byte
+}
+
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_netbsd.go defs_netbsd_386.go
+
+const (
+	_REG_GS     = 0x0
+	_REG_FS     = 0x1
+	_REG_ES     = 0x2
+	_REG_DS     = 0x3
+	_REG_EDI    = 0x4
+	_REG_ESI    = 0x5
+	_REG_EBP    = 0x6
+	_REG_ESP    = 0x7
+	_REG_EBX    = 0x8
+	_REG_EDX    = 0x9
+	_REG_ECX    = 0xa
+	_REG_EAX    = 0xb
+	_REG_TRAPNO = 0xc
+	_REG_ERR    = 0xd
+	_REG_EIP    = 0xe
+	_REG_CS     = 0xf
+	_REG_EFL    = 0x10
+	_REG_UESP   = 0x11
+	_REG_SS     = 0x12
+)
diff --git a/src/runtime/defs1_netbsd_amd64.go b/src/runtime/defs1_netbsd_amd64.go
new file mode 100644
index 0000000..cca701e
--- /dev/null
+++ b/src/runtime/defs1_netbsd_amd64.go
@@ -0,0 +1,183 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go
+
+package runtime
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x6
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = 0x0
+	_EVFILT_WRITE = 0x1
+)
+
+type sigaltstackt struct {
+	ss_sp     *byte
+	ss_size   uint64
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type siginfo struct {
+	_signo  int32
+	_code   int32
+	_errno  int32
+	_pad    int32
+	_reason [24]byte
+}
+
+type stackt struct {
+	ss_sp     *byte
+	ss_size   uint64
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+type timeval struct {
+	tv_sec    int64
+	tv_usec   int32
+	pad_cgo_0 [4]byte
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type mcontextt struct {
+	__gregs     [26]uint64
+	_mc_tlsbase uint64
+	__fpregs    [512]int8
+}
+
+type ucontextt struct {
+	uc_flags    uint32
+	pad_cgo_0   [4]byte
+	uc_link     *ucontextt
+	uc_sigmask  sigset
+	uc_stack    stackt
+	uc_mcontext mcontextt
+}
+
+type keventt struct {
+	ident     uint64
+	filter    uint32
+	flags     uint32
+	fflags    uint32
+	pad_cgo_0 [4]byte
+	data      int64
+	udata     *byte
+}
+
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go
+
+const (
+	_REG_RDI    = 0x0
+	_REG_RSI    = 0x1
+	_REG_RDX    = 0x2
+	_REG_RCX    = 0x3
+	_REG_R8     = 0x4
+	_REG_R9     = 0x5
+	_REG_R10    = 0x6
+	_REG_R11    = 0x7
+	_REG_R12    = 0x8
+	_REG_R13    = 0x9
+	_REG_R14    = 0xa
+	_REG_R15    = 0xb
+	_REG_RBP    = 0xc
+	_REG_RBX    = 0xd
+	_REG_RAX    = 0xe
+	_REG_GS     = 0xf
+	_REG_FS     = 0x10
+	_REG_ES     = 0x11
+	_REG_DS     = 0x12
+	_REG_TRAPNO = 0x13
+	_REG_ERR    = 0x14
+	_REG_RIP    = 0x15
+	_REG_CS     = 0x16
+	_REG_RFLAGS = 0x17
+	_REG_RSP    = 0x18
+	_REG_SS     = 0x19
+)
diff --git a/src/runtime/defs1_netbsd_arm.go b/src/runtime/defs1_netbsd_arm.go
new file mode 100644
index 0000000..54ddf38
--- /dev/null
+++ b/src/runtime/defs1_netbsd_arm.go
@@ -0,0 +1,170 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_netbsd.go defs_netbsd_arm.go
+
+package runtime
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x6
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = 0x0
+	_EVFILT_WRITE = 0x1
+)
+
+type sigaltstackt struct {
+	ss_sp    *byte
+	ss_size  uint32
+	ss_flags int32
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type siginfo struct {
+	_signo  int32
+	_code   int32
+	_errno  int32
+	_reason [20]byte
+}
+
+type stackt struct {
+	ss_sp    *byte
+	ss_size  uint32
+	ss_flags int32
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int32
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int32
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type mcontextt struct {
+	__gregs [17]uint32
+	__fpu   [4 + 8*32 + 4]byte // EABI
+	// __fpu [4+4*33+4]byte // not EABI
+	_mc_tlsbase uint32
+}
+
+type ucontextt struct {
+	uc_flags    uint32
+	uc_link     *ucontextt
+	uc_sigmask  sigset
+	uc_stack    stackt
+	uc_mcontext mcontextt
+	__uc_pad    [2]int32
+}
+
+type keventt struct {
+	ident  uint32
+	filter uint32
+	flags  uint32
+	fflags uint32
+	data   int64
+	udata  *byte
+}
+
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_netbsd.go defs_netbsd_arm.go
+
+const (
+	_REG_R0   = 0x0
+	_REG_R1   = 0x1
+	_REG_R2   = 0x2
+	_REG_R3   = 0x3
+	_REG_R4   = 0x4
+	_REG_R5   = 0x5
+	_REG_R6   = 0x6
+	_REG_R7   = 0x7
+	_REG_R8   = 0x8
+	_REG_R9   = 0x9
+	_REG_R10  = 0xa
+	_REG_R11  = 0xb
+	_REG_R12  = 0xc
+	_REG_R13  = 0xd
+	_REG_R14  = 0xe
+	_REG_R15  = 0xf
+	_REG_CPSR = 0x10
+)
diff --git a/src/runtime/defs1_solaris_amd64.go b/src/runtime/defs1_solaris_amd64.go
new file mode 100644
index 0000000..3bb6f69
--- /dev/null
+++ b/src/runtime/defs1_solaris_amd64.go
@@ -0,0 +1,245 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_solaris.go defs_solaris_amd64.go
+
+package runtime
+
+const (
+	_EINTR       = 0x4
+	_EBADF       = 0x9
+	_EFAULT      = 0xe
+	_EAGAIN      = 0xb
+	_ETIMEDOUT   = 0x91
+	_EWOULDBLOCK = 0xb
+	_EINPROGRESS = 0x96
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x100
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x5
+
+	_SA_SIGINFO = 0x8
+	_SA_RESTART = 0x4
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x15
+	_SIGSTOP   = 0x17
+	_SIGTSTP   = 0x18
+	_SIGCONT   = 0x19
+	_SIGCHLD   = 0x12
+	_SIGTTIN   = 0x1a
+	_SIGTTOU   = 0x1b
+	_SIGIO     = 0x16
+	_SIGXCPU   = 0x1e
+	_SIGXFSZ   = 0x1f
+	_SIGVTALRM = 0x1c
+	_SIGPROF   = 0x1d
+	_SIGWINCH  = 0x14
+	_SIGUSR1   = 0x10
+	_SIGUSR2   = 0x11
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	__SC_NPROCESSORS_ONLN = 0xf
+
+	_PTHREAD_CREATE_DETACHED = 0x40
+
+	_FORK_NOSIGCHLD = 0x1
+	_FORK_WAITPID   = 0x2
+
+	_MAXHOSTNAMELEN = 0x100
+
+	_O_NONBLOCK = 0x80
+	_FD_CLOEXEC = 0x1
+	_F_GETFL    = 0x3
+	_F_SETFL    = 0x4
+	_F_SETFD    = 0x2
+
+	_POLLIN  = 0x1
+	_POLLOUT = 0x4
+	_POLLHUP = 0x10
+	_POLLERR = 0x8
+
+	_PORT_SOURCE_FD = 0x4
+)
+
+type semt struct {
+	sem_count uint32
+	sem_type  uint16
+	sem_magic uint16
+	sem_pad1  [3]uint64
+	sem_pad2  [2]uint64
+}
+
+type sigaltstackt struct {
+	ss_sp     *byte
+	ss_size   uint64
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type sigset struct {
+	__sigbits [4]uint32
+}
+
+type stackt struct {
+	ss_sp     *byte
+	ss_size   uint64
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type siginfo struct {
+	si_signo int32
+	si_code  int32
+	si_errno int32
+	si_pad   int32
+	__data   [240]byte
+}
+
+type sigactiont struct {
+	sa_flags  int32
+	pad_cgo_0 [4]byte
+	_funcptr  [8]byte
+	sa_mask   sigset
+}
+
+type fpregset struct {
+	fp_reg_set [528]byte
+}
+
+type mcontext struct {
+	gregs  [28]int64
+	fpregs fpregset
+}
+
+type ucontext struct {
+	uc_flags    uint64
+	uc_link     *ucontext
+	uc_sigmask  sigset
+	uc_stack    stackt
+	pad_cgo_0   [8]byte
+	uc_mcontext mcontext
+	uc_filler   [5]int64
+	pad_cgo_1   [8]byte
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = int64(x)
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type portevent struct {
+	portev_events int32
+	portev_source uint16
+	portev_pad    uint16
+	portev_object uint64
+	portev_user   *byte
+}
+
+type pthread uint32
+type pthreadattr struct {
+	__pthread_attrp *byte
+}
+
+type stat struct {
+	st_dev     uint64
+	st_ino     uint64
+	st_mode    uint32
+	st_nlink   uint32
+	st_uid     uint32
+	st_gid     uint32
+	st_rdev    uint64
+	st_size    int64
+	st_atim    timespec
+	st_mtim    timespec
+	st_ctim    timespec
+	st_blksize int32
+	pad_cgo_0  [4]byte
+	st_blocks  int64
+	st_fstype  [16]int8
+}
+
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_solaris.go defs_solaris_amd64.go
+
+const (
+	_REG_RDI    = 0x8
+	_REG_RSI    = 0x9
+	_REG_RDX    = 0xc
+	_REG_RCX    = 0xd
+	_REG_R8     = 0x7
+	_REG_R9     = 0x6
+	_REG_R10    = 0x5
+	_REG_R11    = 0x4
+	_REG_R12    = 0x3
+	_REG_R13    = 0x2
+	_REG_R14    = 0x1
+	_REG_R15    = 0x0
+	_REG_RBP    = 0xa
+	_REG_RBX    = 0xb
+	_REG_RAX    = 0xe
+	_REG_GS     = 0x17
+	_REG_FS     = 0x16
+	_REG_ES     = 0x18
+	_REG_DS     = 0x19
+	_REG_TRAPNO = 0xf
+	_REG_ERR    = 0x10
+	_REG_RIP    = 0x11
+	_REG_CS     = 0x12
+	_REG_RFLAGS = 0x13
+	_REG_RSP    = 0x14
+	_REG_SS     = 0x15
+)
diff --git a/src/runtime/defs_android_arm.h b/src/runtime/defs_android_arm.h
deleted file mode 100644
index 3611b3a..0000000
--- a/src/runtime/defs_android_arm.h
+++ /dev/null
@@ -1,3 +0,0 @@
-// TODO: Generate using cgo like defs_linux_{386,amd64}.h
-
-#include "defs_linux_arm.h"
diff --git a/src/runtime/defs_darwin_386.go b/src/runtime/defs_darwin_386.go
new file mode 100644
index 0000000..cf4812f
--- /dev/null
+++ b/src/runtime/defs_darwin_386.go
@@ -0,0 +1,382 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_darwin.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_DONTNEED = 0x4
+	_MADV_FREE     = 0x5
+
+	_MACH_MSG_TYPE_MOVE_RECEIVE   = 0x10
+	_MACH_MSG_TYPE_MOVE_SEND      = 0x11
+	_MACH_MSG_TYPE_MOVE_SEND_ONCE = 0x12
+	_MACH_MSG_TYPE_COPY_SEND      = 0x13
+	_MACH_MSG_TYPE_MAKE_SEND      = 0x14
+	_MACH_MSG_TYPE_MAKE_SEND_ONCE = 0x15
+	_MACH_MSG_TYPE_COPY_RECEIVE   = 0x16
+
+	_MACH_MSG_PORT_DESCRIPTOR         = 0x0
+	_MACH_MSG_OOL_DESCRIPTOR          = 0x1
+	_MACH_MSG_OOL_PORTS_DESCRIPTOR    = 0x2
+	_MACH_MSG_OOL_VOLATILE_DESCRIPTOR = 0x3
+
+	_MACH_MSGH_BITS_COMPLEX = 0x80000000
+
+	_MACH_SEND_MSG  = 0x1
+	_MACH_RCV_MSG   = 0x2
+	_MACH_RCV_LARGE = 0x4
+
+	_MACH_SEND_TIMEOUT   = 0x10
+	_MACH_SEND_INTERRUPT = 0x40
+	_MACH_SEND_ALWAYS    = 0x10000
+	_MACH_SEND_TRAILER   = 0x20000
+	_MACH_RCV_TIMEOUT    = 0x100
+	_MACH_RCV_NOTIFY     = 0x200
+	_MACH_RCV_INTERRUPT  = 0x400
+	_MACH_RCV_OVERWRITE  = 0x1000
+
+	_NDR_PROTOCOL_2_0      = 0x0
+	_NDR_INT_BIG_ENDIAN    = 0x0
+	_NDR_INT_LITTLE_ENDIAN = 0x1
+	_NDR_FLOAT_IEEE        = 0x0
+	_NDR_CHAR_ASCII        = 0x0
+
+	_SA_SIGINFO   = 0x40
+	_SA_RESTART   = 0x2
+	_SA_ONSTACK   = 0x1
+	_SA_USERTRAMP = 0x100
+	_SA_64REGSET  = 0x200
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x7
+	_FPE_INTOVF = 0x8
+	_FPE_FLTDIV = 0x1
+	_FPE_FLTOVF = 0x2
+	_FPE_FLTUND = 0x3
+	_FPE_FLTRES = 0x4
+	_FPE_FLTINV = 0x5
+	_FPE_FLTSUB = 0x6
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0x40
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type machbody struct {
+	msgh_descriptor_count uint32
+}
+
+type machheader struct {
+	msgh_bits        uint32
+	msgh_size        uint32
+	msgh_remote_port uint32
+	msgh_local_port  uint32
+	msgh_reserved    uint32
+	msgh_id          int32
+}
+
+type machndr struct {
+	mig_vers     uint8
+	if_vers      uint8
+	reserved1    uint8
+	mig_encoding uint8
+	int_rep      uint8
+	char_rep     uint8
+	float_rep    uint8
+	reserved2    uint8
+}
+
+type machport struct {
+	name        uint32
+	pad1        uint32
+	pad2        uint16
+	disposition uint8
+	_type       uint8
+}
+
+type stackt struct {
+	ss_sp    *byte
+	ss_size  uintptr
+	ss_flags int32
+}
+
+type sigactiont struct {
+	__sigaction_u [4]byte
+	sa_tramp      unsafe.Pointer
+	sa_mask       uint32
+	sa_flags      int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   *byte
+	si_value  [4]byte
+	si_band   int32
+	__pad     [7]uint32
+}
+
+type timeval struct {
+	tv_sec  int32
+	tv_usec int32
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type timespec struct {
+	tv_sec  int32
+	tv_nsec int32
+}
+
+type fpcontrol struct {
+	pad_cgo_0 [2]byte
+}
+
+type fpstatus struct {
+	pad_cgo_0 [2]byte
+}
+
+type regmmst struct {
+	mmst_reg  [10]int8
+	mmst_rsrv [6]int8
+}
+
+type regxmm struct {
+	xmm_reg [16]int8
+}
+
+type regs64 struct {
+	rax    uint64
+	rbx    uint64
+	rcx    uint64
+	rdx    uint64
+	rdi    uint64
+	rsi    uint64
+	rbp    uint64
+	rsp    uint64
+	r8     uint64
+	r9     uint64
+	r10    uint64
+	r11    uint64
+	r12    uint64
+	r13    uint64
+	r14    uint64
+	r15    uint64
+	rip    uint64
+	rflags uint64
+	cs     uint64
+	fs     uint64
+	gs     uint64
+}
+
+type floatstate64 struct {
+	fpu_reserved  [2]int32
+	fpu_fcw       fpcontrol
+	fpu_fsw       fpstatus
+	fpu_ftw       uint8
+	fpu_rsrv1     uint8
+	fpu_fop       uint16
+	fpu_ip        uint32
+	fpu_cs        uint16
+	fpu_rsrv2     uint16
+	fpu_dp        uint32
+	fpu_ds        uint16
+	fpu_rsrv3     uint16
+	fpu_mxcsr     uint32
+	fpu_mxcsrmask uint32
+	fpu_stmm0     regmmst
+	fpu_stmm1     regmmst
+	fpu_stmm2     regmmst
+	fpu_stmm3     regmmst
+	fpu_stmm4     regmmst
+	fpu_stmm5     regmmst
+	fpu_stmm6     regmmst
+	fpu_stmm7     regmmst
+	fpu_xmm0      regxmm
+	fpu_xmm1      regxmm
+	fpu_xmm2      regxmm
+	fpu_xmm3      regxmm
+	fpu_xmm4      regxmm
+	fpu_xmm5      regxmm
+	fpu_xmm6      regxmm
+	fpu_xmm7      regxmm
+	fpu_xmm8      regxmm
+	fpu_xmm9      regxmm
+	fpu_xmm10     regxmm
+	fpu_xmm11     regxmm
+	fpu_xmm12     regxmm
+	fpu_xmm13     regxmm
+	fpu_xmm14     regxmm
+	fpu_xmm15     regxmm
+	fpu_rsrv4     [96]int8
+	fpu_reserved1 int32
+}
+
+type exceptionstate64 struct {
+	trapno     uint16
+	cpu        uint16
+	err        uint32
+	faultvaddr uint64
+}
+
+type mcontext64 struct {
+	es exceptionstate64
+	ss regs64
+	fs floatstate64
+}
+
+type regs32 struct {
+	eax    uint32
+	ebx    uint32
+	ecx    uint32
+	edx    uint32
+	edi    uint32
+	esi    uint32
+	ebp    uint32
+	esp    uint32
+	ss     uint32
+	eflags uint32
+	eip    uint32
+	cs     uint32
+	ds     uint32
+	es     uint32
+	fs     uint32
+	gs     uint32
+}
+
+type floatstate32 struct {
+	fpu_reserved  [2]int32
+	fpu_fcw       fpcontrol
+	fpu_fsw       fpstatus
+	fpu_ftw       uint8
+	fpu_rsrv1     uint8
+	fpu_fop       uint16
+	fpu_ip        uint32
+	fpu_cs        uint16
+	fpu_rsrv2     uint16
+	fpu_dp        uint32
+	fpu_ds        uint16
+	fpu_rsrv3     uint16
+	fpu_mxcsr     uint32
+	fpu_mxcsrmask uint32
+	fpu_stmm0     regmmst
+	fpu_stmm1     regmmst
+	fpu_stmm2     regmmst
+	fpu_stmm3     regmmst
+	fpu_stmm4     regmmst
+	fpu_stmm5     regmmst
+	fpu_stmm6     regmmst
+	fpu_stmm7     regmmst
+	fpu_xmm0      regxmm
+	fpu_xmm1      regxmm
+	fpu_xmm2      regxmm
+	fpu_xmm3      regxmm
+	fpu_xmm4      regxmm
+	fpu_xmm5      regxmm
+	fpu_xmm6      regxmm
+	fpu_xmm7      regxmm
+	fpu_rsrv4     [224]int8
+	fpu_reserved1 int32
+}
+
+type exceptionstate32 struct {
+	trapno     uint16
+	cpu        uint16
+	err        uint32
+	faultvaddr uint32
+}
+
+type mcontext32 struct {
+	es exceptionstate32
+	ss regs32
+	fs floatstate32
+}
+
+type ucontext struct {
+	uc_onstack  int32
+	uc_sigmask  uint32
+	uc_stack    stackt
+	uc_link     *ucontext
+	uc_mcsize   uint32
+	uc_mcontext *mcontext32
+}
+
+type keventt struct {
+	ident  uint32
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int32
+	udata  *byte
+}
diff --git a/src/runtime/defs_darwin_386.h b/src/runtime/defs_darwin_386.h
deleted file mode 100644
index 0e0b4fb..0000000
--- a/src/runtime/defs_darwin_386.h
+++ /dev/null
@@ -1,392 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_darwin.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_DONTNEED	= 0x4,
-	MADV_FREE	= 0x5,
-
-	MACH_MSG_TYPE_MOVE_RECEIVE	= 0x10,
-	MACH_MSG_TYPE_MOVE_SEND		= 0x11,
-	MACH_MSG_TYPE_MOVE_SEND_ONCE	= 0x12,
-	MACH_MSG_TYPE_COPY_SEND		= 0x13,
-	MACH_MSG_TYPE_MAKE_SEND		= 0x14,
-	MACH_MSG_TYPE_MAKE_SEND_ONCE	= 0x15,
-	MACH_MSG_TYPE_COPY_RECEIVE	= 0x16,
-
-	MACH_MSG_PORT_DESCRIPTOR		= 0x0,
-	MACH_MSG_OOL_DESCRIPTOR			= 0x1,
-	MACH_MSG_OOL_PORTS_DESCRIPTOR		= 0x2,
-	MACH_MSG_OOL_VOLATILE_DESCRIPTOR	= 0x3,
-
-	MACH_MSGH_BITS_COMPLEX	= 0x80000000,
-
-	MACH_SEND_MSG	= 0x1,
-	MACH_RCV_MSG	= 0x2,
-	MACH_RCV_LARGE	= 0x4,
-
-	MACH_SEND_TIMEOUT	= 0x10,
-	MACH_SEND_INTERRUPT	= 0x40,
-	MACH_SEND_ALWAYS	= 0x10000,
-	MACH_SEND_TRAILER	= 0x20000,
-	MACH_RCV_TIMEOUT	= 0x100,
-	MACH_RCV_NOTIFY		= 0x200,
-	MACH_RCV_INTERRUPT	= 0x400,
-	MACH_RCV_OVERWRITE	= 0x1000,
-
-	NDR_PROTOCOL_2_0	= 0x0,
-	NDR_INT_BIG_ENDIAN	= 0x0,
-	NDR_INT_LITTLE_ENDIAN	= 0x1,
-	NDR_FLOAT_IEEE		= 0x0,
-	NDR_CHAR_ASCII		= 0x0,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-	SA_USERTRAMP	= 0x100,
-	SA_64REGSET	= 0x200,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x7,
-	FPE_INTOVF	= 0x8,
-	FPE_FLTDIV	= 0x1,
-	FPE_FLTOVF	= 0x2,
-	FPE_FLTUND	= 0x3,
-	FPE_FLTRES	= 0x4,
-	FPE_FLTINV	= 0x5,
-	FPE_FLTSUB	= 0x6,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0x40,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct MachBody MachBody;
-typedef struct MachHeader MachHeader;
-typedef struct MachNDR MachNDR;
-typedef struct MachPort MachPort;
-typedef struct StackT StackT;
-typedef struct SigactionT SigactionT;
-typedef struct Siginfo Siginfo;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct Timespec Timespec;
-typedef struct FPControl FPControl;
-typedef struct FPStatus FPStatus;
-typedef struct RegMMST RegMMST;
-typedef struct RegXMM RegXMM;
-typedef struct Regs64 Regs64;
-typedef struct FloatState64 FloatState64;
-typedef struct ExceptionState64 ExceptionState64;
-typedef struct Mcontext64 Mcontext64;
-typedef struct Regs32 Regs32;
-typedef struct FloatState32 FloatState32;
-typedef struct ExceptionState32 ExceptionState32;
-typedef struct Mcontext32 Mcontext32;
-typedef struct Ucontext Ucontext;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct MachBody {
-	uint32	msgh_descriptor_count;
-};
-struct MachHeader {
-	uint32	msgh_bits;
-	uint32	msgh_size;
-	uint32	msgh_remote_port;
-	uint32	msgh_local_port;
-	uint32	msgh_reserved;
-	int32	msgh_id;
-};
-struct MachNDR {
-	uint8	mig_vers;
-	uint8	if_vers;
-	uint8	reserved1;
-	uint8	mig_encoding;
-	uint8	int_rep;
-	uint8	char_rep;
-	uint8	float_rep;
-	uint8	reserved2;
-};
-struct MachPort {
-	uint32	name;
-	uint32	pad1;
-	uint16	pad2;
-	uint8	disposition;
-	uint8	type;
-};
-
-struct StackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-typedef	byte	Sighandler[4];
-
-struct SigactionT {
-	byte	__sigaction_u[4];
-	void	*sa_tramp;
-	uint32	sa_mask;
-	int32	sa_flags;
-};
-
-typedef	byte	Sigval[4];
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[4];
-	int32	si_band;
-	uint32	__pad[7];
-};
-struct Timeval {
-	int32	tv_sec;
-	int32	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-struct Timespec {
-	int32	tv_sec;
-	int32	tv_nsec;
-};
-
-struct FPControl {
-	byte	Pad_cgo_0[2];
-};
-struct FPStatus {
-	byte	Pad_cgo_0[2];
-};
-struct RegMMST {
-	int8	mmst_reg[10];
-	int8	mmst_rsrv[6];
-};
-struct RegXMM {
-	int8	xmm_reg[16];
-};
-
-struct Regs64 {
-	uint64	rax;
-	uint64	rbx;
-	uint64	rcx;
-	uint64	rdx;
-	uint64	rdi;
-	uint64	rsi;
-	uint64	rbp;
-	uint64	rsp;
-	uint64	r8;
-	uint64	r9;
-	uint64	r10;
-	uint64	r11;
-	uint64	r12;
-	uint64	r13;
-	uint64	r14;
-	uint64	r15;
-	uint64	rip;
-	uint64	rflags;
-	uint64	cs;
-	uint64	fs;
-	uint64	gs;
-};
-struct FloatState64 {
-	int32	fpu_reserved[2];
-	FPControl	fpu_fcw;
-	FPStatus	fpu_fsw;
-	uint8	fpu_ftw;
-	uint8	fpu_rsrv1;
-	uint16	fpu_fop;
-	uint32	fpu_ip;
-	uint16	fpu_cs;
-	uint16	fpu_rsrv2;
-	uint32	fpu_dp;
-	uint16	fpu_ds;
-	uint16	fpu_rsrv3;
-	uint32	fpu_mxcsr;
-	uint32	fpu_mxcsrmask;
-	RegMMST	fpu_stmm0;
-	RegMMST	fpu_stmm1;
-	RegMMST	fpu_stmm2;
-	RegMMST	fpu_stmm3;
-	RegMMST	fpu_stmm4;
-	RegMMST	fpu_stmm5;
-	RegMMST	fpu_stmm6;
-	RegMMST	fpu_stmm7;
-	RegXMM	fpu_xmm0;
-	RegXMM	fpu_xmm1;
-	RegXMM	fpu_xmm2;
-	RegXMM	fpu_xmm3;
-	RegXMM	fpu_xmm4;
-	RegXMM	fpu_xmm5;
-	RegXMM	fpu_xmm6;
-	RegXMM	fpu_xmm7;
-	RegXMM	fpu_xmm8;
-	RegXMM	fpu_xmm9;
-	RegXMM	fpu_xmm10;
-	RegXMM	fpu_xmm11;
-	RegXMM	fpu_xmm12;
-	RegXMM	fpu_xmm13;
-	RegXMM	fpu_xmm14;
-	RegXMM	fpu_xmm15;
-	int8	fpu_rsrv4[96];
-	int32	fpu_reserved1;
-};
-struct ExceptionState64 {
-	uint16	trapno;
-	uint16	cpu;
-	uint32	err;
-	uint64	faultvaddr;
-};
-struct Mcontext64 {
-	ExceptionState64	es;
-	Regs64	ss;
-	FloatState64	fs;
-};
-
-struct Regs32 {
-	uint32	eax;
-	uint32	ebx;
-	uint32	ecx;
-	uint32	edx;
-	uint32	edi;
-	uint32	esi;
-	uint32	ebp;
-	uint32	esp;
-	uint32	ss;
-	uint32	eflags;
-	uint32	eip;
-	uint32	cs;
-	uint32	ds;
-	uint32	es;
-	uint32	fs;
-	uint32	gs;
-};
-struct FloatState32 {
-	int32	fpu_reserved[2];
-	FPControl	fpu_fcw;
-	FPStatus	fpu_fsw;
-	uint8	fpu_ftw;
-	uint8	fpu_rsrv1;
-	uint16	fpu_fop;
-	uint32	fpu_ip;
-	uint16	fpu_cs;
-	uint16	fpu_rsrv2;
-	uint32	fpu_dp;
-	uint16	fpu_ds;
-	uint16	fpu_rsrv3;
-	uint32	fpu_mxcsr;
-	uint32	fpu_mxcsrmask;
-	RegMMST	fpu_stmm0;
-	RegMMST	fpu_stmm1;
-	RegMMST	fpu_stmm2;
-	RegMMST	fpu_stmm3;
-	RegMMST	fpu_stmm4;
-	RegMMST	fpu_stmm5;
-	RegMMST	fpu_stmm6;
-	RegMMST	fpu_stmm7;
-	RegXMM	fpu_xmm0;
-	RegXMM	fpu_xmm1;
-	RegXMM	fpu_xmm2;
-	RegXMM	fpu_xmm3;
-	RegXMM	fpu_xmm4;
-	RegXMM	fpu_xmm5;
-	RegXMM	fpu_xmm6;
-	RegXMM	fpu_xmm7;
-	int8	fpu_rsrv4[224];
-	int32	fpu_reserved1;
-};
-struct ExceptionState32 {
-	uint16	trapno;
-	uint16	cpu;
-	uint32	err;
-	uint32	faultvaddr;
-};
-struct Mcontext32 {
-	ExceptionState32	es;
-	Regs32	ss;
-	FloatState32	fs;
-};
-
-struct Ucontext {
-	int32	uc_onstack;
-	uint32	uc_sigmask;
-	StackT	uc_stack;
-	Ucontext	*uc_link;
-	uint32	uc_mcsize;
-	Mcontext32	*uc_mcontext;
-};
-
-struct KeventT {
-	uint32	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int32	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_darwin_amd64.go b/src/runtime/defs_darwin_amd64.go
new file mode 100644
index 0000000..2cd4c0c
--- /dev/null
+++ b/src/runtime/defs_darwin_amd64.go
@@ -0,0 +1,385 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_darwin.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_DONTNEED = 0x4
+	_MADV_FREE     = 0x5
+
+	_MACH_MSG_TYPE_MOVE_RECEIVE   = 0x10
+	_MACH_MSG_TYPE_MOVE_SEND      = 0x11
+	_MACH_MSG_TYPE_MOVE_SEND_ONCE = 0x12
+	_MACH_MSG_TYPE_COPY_SEND      = 0x13
+	_MACH_MSG_TYPE_MAKE_SEND      = 0x14
+	_MACH_MSG_TYPE_MAKE_SEND_ONCE = 0x15
+	_MACH_MSG_TYPE_COPY_RECEIVE   = 0x16
+
+	_MACH_MSG_PORT_DESCRIPTOR         = 0x0
+	_MACH_MSG_OOL_DESCRIPTOR          = 0x1
+	_MACH_MSG_OOL_PORTS_DESCRIPTOR    = 0x2
+	_MACH_MSG_OOL_VOLATILE_DESCRIPTOR = 0x3
+
+	_MACH_MSGH_BITS_COMPLEX = 0x80000000
+
+	_MACH_SEND_MSG  = 0x1
+	_MACH_RCV_MSG   = 0x2
+	_MACH_RCV_LARGE = 0x4
+
+	_MACH_SEND_TIMEOUT   = 0x10
+	_MACH_SEND_INTERRUPT = 0x40
+	_MACH_SEND_ALWAYS    = 0x10000
+	_MACH_SEND_TRAILER   = 0x20000
+	_MACH_RCV_TIMEOUT    = 0x100
+	_MACH_RCV_NOTIFY     = 0x200
+	_MACH_RCV_INTERRUPT  = 0x400
+	_MACH_RCV_OVERWRITE  = 0x1000
+
+	_NDR_PROTOCOL_2_0      = 0x0
+	_NDR_INT_BIG_ENDIAN    = 0x0
+	_NDR_INT_LITTLE_ENDIAN = 0x1
+	_NDR_FLOAT_IEEE        = 0x0
+	_NDR_CHAR_ASCII        = 0x0
+
+	_SA_SIGINFO   = 0x40
+	_SA_RESTART   = 0x2
+	_SA_ONSTACK   = 0x1
+	_SA_USERTRAMP = 0x100
+	_SA_64REGSET  = 0x200
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x7
+	_FPE_INTOVF = 0x8
+	_FPE_FLTDIV = 0x1
+	_FPE_FLTOVF = 0x2
+	_FPE_FLTUND = 0x3
+	_FPE_FLTRES = 0x4
+	_FPE_FLTINV = 0x5
+	_FPE_FLTSUB = 0x6
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0x40
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type machbody struct {
+	msgh_descriptor_count uint32
+}
+
+type machheader struct {
+	msgh_bits        uint32
+	msgh_size        uint32
+	msgh_remote_port uint32
+	msgh_local_port  uint32
+	msgh_reserved    uint32
+	msgh_id          int32
+}
+
+type machndr struct {
+	mig_vers     uint8
+	if_vers      uint8
+	reserved1    uint8
+	mig_encoding uint8
+	int_rep      uint8
+	char_rep     uint8
+	float_rep    uint8
+	reserved2    uint8
+}
+
+type machport struct {
+	name        uint32
+	pad1        uint32
+	pad2        uint16
+	disposition uint8
+	_type       uint8
+}
+
+type stackt struct {
+	ss_sp     *byte
+	ss_size   uintptr
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type sigactiont struct {
+	__sigaction_u [8]byte
+	sa_tramp      unsafe.Pointer
+	sa_mask       uint32
+	sa_flags      int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   *byte
+	si_value  [8]byte
+	si_band   int64
+	__pad     [7]uint64
+}
+
+type timeval struct {
+	tv_sec    int64
+	tv_usec   int32
+	pad_cgo_0 [4]byte
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+type fpcontrol struct {
+	pad_cgo_0 [2]byte
+}
+
+type fpstatus struct {
+	pad_cgo_0 [2]byte
+}
+
+type regmmst struct {
+	mmst_reg  [10]int8
+	mmst_rsrv [6]int8
+}
+
+type regxmm struct {
+	xmm_reg [16]int8
+}
+
+type regs64 struct {
+	rax    uint64
+	rbx    uint64
+	rcx    uint64
+	rdx    uint64
+	rdi    uint64
+	rsi    uint64
+	rbp    uint64
+	rsp    uint64
+	r8     uint64
+	r9     uint64
+	r10    uint64
+	r11    uint64
+	r12    uint64
+	r13    uint64
+	r14    uint64
+	r15    uint64
+	rip    uint64
+	rflags uint64
+	cs     uint64
+	fs     uint64
+	gs     uint64
+}
+
+type floatstate64 struct {
+	fpu_reserved  [2]int32
+	fpu_fcw       fpcontrol
+	fpu_fsw       fpstatus
+	fpu_ftw       uint8
+	fpu_rsrv1     uint8
+	fpu_fop       uint16
+	fpu_ip        uint32
+	fpu_cs        uint16
+	fpu_rsrv2     uint16
+	fpu_dp        uint32
+	fpu_ds        uint16
+	fpu_rsrv3     uint16
+	fpu_mxcsr     uint32
+	fpu_mxcsrmask uint32
+	fpu_stmm0     regmmst
+	fpu_stmm1     regmmst
+	fpu_stmm2     regmmst
+	fpu_stmm3     regmmst
+	fpu_stmm4     regmmst
+	fpu_stmm5     regmmst
+	fpu_stmm6     regmmst
+	fpu_stmm7     regmmst
+	fpu_xmm0      regxmm
+	fpu_xmm1      regxmm
+	fpu_xmm2      regxmm
+	fpu_xmm3      regxmm
+	fpu_xmm4      regxmm
+	fpu_xmm5      regxmm
+	fpu_xmm6      regxmm
+	fpu_xmm7      regxmm
+	fpu_xmm8      regxmm
+	fpu_xmm9      regxmm
+	fpu_xmm10     regxmm
+	fpu_xmm11     regxmm
+	fpu_xmm12     regxmm
+	fpu_xmm13     regxmm
+	fpu_xmm14     regxmm
+	fpu_xmm15     regxmm
+	fpu_rsrv4     [96]int8
+	fpu_reserved1 int32
+}
+
+type exceptionstate64 struct {
+	trapno     uint16
+	cpu        uint16
+	err        uint32
+	faultvaddr uint64
+}
+
+type mcontext64 struct {
+	es        exceptionstate64
+	ss        regs64
+	fs        floatstate64
+	pad_cgo_0 [4]byte
+}
+
+type regs32 struct {
+	eax    uint32
+	ebx    uint32
+	ecx    uint32
+	edx    uint32
+	edi    uint32
+	esi    uint32
+	ebp    uint32
+	esp    uint32
+	ss     uint32
+	eflags uint32
+	eip    uint32
+	cs     uint32
+	ds     uint32
+	es     uint32
+	fs     uint32
+	gs     uint32
+}
+
+type floatstate32 struct {
+	fpu_reserved  [2]int32
+	fpu_fcw       fpcontrol
+	fpu_fsw       fpstatus
+	fpu_ftw       uint8
+	fpu_rsrv1     uint8
+	fpu_fop       uint16
+	fpu_ip        uint32
+	fpu_cs        uint16
+	fpu_rsrv2     uint16
+	fpu_dp        uint32
+	fpu_ds        uint16
+	fpu_rsrv3     uint16
+	fpu_mxcsr     uint32
+	fpu_mxcsrmask uint32
+	fpu_stmm0     regmmst
+	fpu_stmm1     regmmst
+	fpu_stmm2     regmmst
+	fpu_stmm3     regmmst
+	fpu_stmm4     regmmst
+	fpu_stmm5     regmmst
+	fpu_stmm6     regmmst
+	fpu_stmm7     regmmst
+	fpu_xmm0      regxmm
+	fpu_xmm1      regxmm
+	fpu_xmm2      regxmm
+	fpu_xmm3      regxmm
+	fpu_xmm4      regxmm
+	fpu_xmm5      regxmm
+	fpu_xmm6      regxmm
+	fpu_xmm7      regxmm
+	fpu_rsrv4     [224]int8
+	fpu_reserved1 int32
+}
+
+type exceptionstate32 struct {
+	trapno     uint16
+	cpu        uint16
+	err        uint32
+	faultvaddr uint32
+}
+
+type mcontext32 struct {
+	es exceptionstate32
+	ss regs32
+	fs floatstate32
+}
+
+type ucontext struct {
+	uc_onstack  int32
+	uc_sigmask  uint32
+	uc_stack    stackt
+	uc_link     *ucontext
+	uc_mcsize   uint64
+	uc_mcontext *mcontext64
+}
+
+type keventt struct {
+	ident  uint64
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int64
+	udata  *byte
+}
diff --git a/src/runtime/defs_darwin_amd64.h b/src/runtime/defs_darwin_amd64.h
deleted file mode 100644
index 4bf83c1..0000000
--- a/src/runtime/defs_darwin_amd64.h
+++ /dev/null
@@ -1,395 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_darwin.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_DONTNEED	= 0x4,
-	MADV_FREE	= 0x5,
-
-	MACH_MSG_TYPE_MOVE_RECEIVE	= 0x10,
-	MACH_MSG_TYPE_MOVE_SEND		= 0x11,
-	MACH_MSG_TYPE_MOVE_SEND_ONCE	= 0x12,
-	MACH_MSG_TYPE_COPY_SEND		= 0x13,
-	MACH_MSG_TYPE_MAKE_SEND		= 0x14,
-	MACH_MSG_TYPE_MAKE_SEND_ONCE	= 0x15,
-	MACH_MSG_TYPE_COPY_RECEIVE	= 0x16,
-
-	MACH_MSG_PORT_DESCRIPTOR		= 0x0,
-	MACH_MSG_OOL_DESCRIPTOR			= 0x1,
-	MACH_MSG_OOL_PORTS_DESCRIPTOR		= 0x2,
-	MACH_MSG_OOL_VOLATILE_DESCRIPTOR	= 0x3,
-
-	MACH_MSGH_BITS_COMPLEX	= 0x80000000,
-
-	MACH_SEND_MSG	= 0x1,
-	MACH_RCV_MSG	= 0x2,
-	MACH_RCV_LARGE	= 0x4,
-
-	MACH_SEND_TIMEOUT	= 0x10,
-	MACH_SEND_INTERRUPT	= 0x40,
-	MACH_SEND_ALWAYS	= 0x10000,
-	MACH_SEND_TRAILER	= 0x20000,
-	MACH_RCV_TIMEOUT	= 0x100,
-	MACH_RCV_NOTIFY		= 0x200,
-	MACH_RCV_INTERRUPT	= 0x400,
-	MACH_RCV_OVERWRITE	= 0x1000,
-
-	NDR_PROTOCOL_2_0	= 0x0,
-	NDR_INT_BIG_ENDIAN	= 0x0,
-	NDR_INT_LITTLE_ENDIAN	= 0x1,
-	NDR_FLOAT_IEEE		= 0x0,
-	NDR_CHAR_ASCII		= 0x0,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-	SA_USERTRAMP	= 0x100,
-	SA_64REGSET	= 0x200,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x7,
-	FPE_INTOVF	= 0x8,
-	FPE_FLTDIV	= 0x1,
-	FPE_FLTOVF	= 0x2,
-	FPE_FLTUND	= 0x3,
-	FPE_FLTRES	= 0x4,
-	FPE_FLTINV	= 0x5,
-	FPE_FLTSUB	= 0x6,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0x40,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct MachBody MachBody;
-typedef struct MachHeader MachHeader;
-typedef struct MachNDR MachNDR;
-typedef struct MachPort MachPort;
-typedef struct StackT StackT;
-typedef struct SigactionT SigactionT;
-typedef struct Siginfo Siginfo;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct Timespec Timespec;
-typedef struct FPControl FPControl;
-typedef struct FPStatus FPStatus;
-typedef struct RegMMST RegMMST;
-typedef struct RegXMM RegXMM;
-typedef struct Regs64 Regs64;
-typedef struct FloatState64 FloatState64;
-typedef struct ExceptionState64 ExceptionState64;
-typedef struct Mcontext64 Mcontext64;
-typedef struct Regs32 Regs32;
-typedef struct FloatState32 FloatState32;
-typedef struct ExceptionState32 ExceptionState32;
-typedef struct Mcontext32 Mcontext32;
-typedef struct Ucontext Ucontext;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct MachBody {
-	uint32	msgh_descriptor_count;
-};
-struct MachHeader {
-	uint32	msgh_bits;
-	uint32	msgh_size;
-	uint32	msgh_remote_port;
-	uint32	msgh_local_port;
-	uint32	msgh_reserved;
-	int32	msgh_id;
-};
-struct MachNDR {
-	uint8	mig_vers;
-	uint8	if_vers;
-	uint8	reserved1;
-	uint8	mig_encoding;
-	uint8	int_rep;
-	uint8	char_rep;
-	uint8	float_rep;
-	uint8	reserved2;
-};
-struct MachPort {
-	uint32	name;
-	uint32	pad1;
-	uint16	pad2;
-	uint8	disposition;
-	uint8	type;
-};
-
-struct StackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-typedef	byte	Sighandler[8];
-
-struct SigactionT {
-	byte	__sigaction_u[8];
-	void	*sa_tramp;
-	uint32	sa_mask;
-	int32	sa_flags;
-};
-
-typedef	byte	Sigval[8];
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[8];
-	int64	si_band;
-	uint64	__pad[7];
-};
-struct Timeval {
-	int64	tv_sec;
-	int32	tv_usec;
-	byte	Pad_cgo_0[4];
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-
-struct FPControl {
-	byte	Pad_cgo_0[2];
-};
-struct FPStatus {
-	byte	Pad_cgo_0[2];
-};
-struct RegMMST {
-	int8	mmst_reg[10];
-	int8	mmst_rsrv[6];
-};
-struct RegXMM {
-	int8	xmm_reg[16];
-};
-
-struct Regs64 {
-	uint64	rax;
-	uint64	rbx;
-	uint64	rcx;
-	uint64	rdx;
-	uint64	rdi;
-	uint64	rsi;
-	uint64	rbp;
-	uint64	rsp;
-	uint64	r8;
-	uint64	r9;
-	uint64	r10;
-	uint64	r11;
-	uint64	r12;
-	uint64	r13;
-	uint64	r14;
-	uint64	r15;
-	uint64	rip;
-	uint64	rflags;
-	uint64	cs;
-	uint64	fs;
-	uint64	gs;
-};
-struct FloatState64 {
-	int32	fpu_reserved[2];
-	FPControl	fpu_fcw;
-	FPStatus	fpu_fsw;
-	uint8	fpu_ftw;
-	uint8	fpu_rsrv1;
-	uint16	fpu_fop;
-	uint32	fpu_ip;
-	uint16	fpu_cs;
-	uint16	fpu_rsrv2;
-	uint32	fpu_dp;
-	uint16	fpu_ds;
-	uint16	fpu_rsrv3;
-	uint32	fpu_mxcsr;
-	uint32	fpu_mxcsrmask;
-	RegMMST	fpu_stmm0;
-	RegMMST	fpu_stmm1;
-	RegMMST	fpu_stmm2;
-	RegMMST	fpu_stmm3;
-	RegMMST	fpu_stmm4;
-	RegMMST	fpu_stmm5;
-	RegMMST	fpu_stmm6;
-	RegMMST	fpu_stmm7;
-	RegXMM	fpu_xmm0;
-	RegXMM	fpu_xmm1;
-	RegXMM	fpu_xmm2;
-	RegXMM	fpu_xmm3;
-	RegXMM	fpu_xmm4;
-	RegXMM	fpu_xmm5;
-	RegXMM	fpu_xmm6;
-	RegXMM	fpu_xmm7;
-	RegXMM	fpu_xmm8;
-	RegXMM	fpu_xmm9;
-	RegXMM	fpu_xmm10;
-	RegXMM	fpu_xmm11;
-	RegXMM	fpu_xmm12;
-	RegXMM	fpu_xmm13;
-	RegXMM	fpu_xmm14;
-	RegXMM	fpu_xmm15;
-	int8	fpu_rsrv4[96];
-	int32	fpu_reserved1;
-};
-struct ExceptionState64 {
-	uint16	trapno;
-	uint16	cpu;
-	uint32	err;
-	uint64	faultvaddr;
-};
-struct Mcontext64 {
-	ExceptionState64	es;
-	Regs64	ss;
-	FloatState64	fs;
-	byte	Pad_cgo_0[4];
-};
-
-struct Regs32 {
-	uint32	eax;
-	uint32	ebx;
-	uint32	ecx;
-	uint32	edx;
-	uint32	edi;
-	uint32	esi;
-	uint32	ebp;
-	uint32	esp;
-	uint32	ss;
-	uint32	eflags;
-	uint32	eip;
-	uint32	cs;
-	uint32	ds;
-	uint32	es;
-	uint32	fs;
-	uint32	gs;
-};
-struct FloatState32 {
-	int32	fpu_reserved[2];
-	FPControl	fpu_fcw;
-	FPStatus	fpu_fsw;
-	uint8	fpu_ftw;
-	uint8	fpu_rsrv1;
-	uint16	fpu_fop;
-	uint32	fpu_ip;
-	uint16	fpu_cs;
-	uint16	fpu_rsrv2;
-	uint32	fpu_dp;
-	uint16	fpu_ds;
-	uint16	fpu_rsrv3;
-	uint32	fpu_mxcsr;
-	uint32	fpu_mxcsrmask;
-	RegMMST	fpu_stmm0;
-	RegMMST	fpu_stmm1;
-	RegMMST	fpu_stmm2;
-	RegMMST	fpu_stmm3;
-	RegMMST	fpu_stmm4;
-	RegMMST	fpu_stmm5;
-	RegMMST	fpu_stmm6;
-	RegMMST	fpu_stmm7;
-	RegXMM	fpu_xmm0;
-	RegXMM	fpu_xmm1;
-	RegXMM	fpu_xmm2;
-	RegXMM	fpu_xmm3;
-	RegXMM	fpu_xmm4;
-	RegXMM	fpu_xmm5;
-	RegXMM	fpu_xmm6;
-	RegXMM	fpu_xmm7;
-	int8	fpu_rsrv4[224];
-	int32	fpu_reserved1;
-};
-struct ExceptionState32 {
-	uint16	trapno;
-	uint16	cpu;
-	uint32	err;
-	uint32	faultvaddr;
-};
-struct Mcontext32 {
-	ExceptionState32	es;
-	Regs32	ss;
-	FloatState32	fs;
-};
-
-struct Ucontext {
-	int32	uc_onstack;
-	uint32	uc_sigmask;
-	StackT	uc_stack;
-	Ucontext	*uc_link;
-	uint64	uc_mcsize;
-	Mcontext64	*uc_mcontext;
-};
-
-struct KeventT {
-	uint64	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_dragonfly_386.go b/src/runtime/defs_dragonfly_386.go
new file mode 100644
index 0000000..1768dba
--- /dev/null
+++ b/src/runtime/defs_dragonfly_386.go
@@ -0,0 +1,190 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_dragonfly.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+	_EBUSY  = 0x10
+	_EAGAIN = 0x23
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x5
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x2
+	_FPE_INTOVF = 0x1
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+	_type uint16
+	prio  uint16
+}
+
+type lwpparams struct {
+	_type unsafe.Pointer
+	arg   *byte
+	stack *byte
+	tid1  *int32
+	tid2  *int32
+}
+
+type sigaltstackt struct {
+	ss_sp    *int8
+	ss_size  uint32
+	ss_flags int32
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type stackt struct {
+	ss_sp    *int8
+	ss_size  uint32
+	ss_flags int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   *byte
+	si_value  [4]byte
+	si_band   int32
+	__spare__ [7]int32
+}
+
+type mcontext struct {
+	mc_onstack  int32
+	mc_gs       int32
+	mc_fs       int32
+	mc_es       int32
+	mc_ds       int32
+	mc_edi      int32
+	mc_esi      int32
+	mc_ebp      int32
+	mc_isp      int32
+	mc_ebx      int32
+	mc_edx      int32
+	mc_ecx      int32
+	mc_eax      int32
+	mc_xflags   int32
+	mc_trapno   int32
+	mc_err      int32
+	mc_eip      int32
+	mc_cs       int32
+	mc_eflags   int32
+	mc_esp      int32
+	mc_ss       int32
+	mc_len      int32
+	mc_fpformat int32
+	mc_ownedfp  int32
+	mc_fpregs   [128]int32
+	__spare__   [16]int32
+}
+
+type ucontext struct {
+	uc_sigmask  sigset
+	uc_mcontext mcontext
+	uc_link     *ucontext
+	uc_stack    stackt
+	__spare__   [8]int32
+}
+
+type timespec struct {
+	tv_sec  int32
+	tv_nsec int32
+}
+
+type timeval struct {
+	tv_sec  int32
+	tv_usec int32
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint32
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int32
+	udata  *byte
+}
diff --git a/src/runtime/defs_dragonfly_386.h b/src/runtime/defs_dragonfly_386.h
deleted file mode 100644
index f86b9c6..0000000
--- a/src/runtime/defs_dragonfly_386.h
+++ /dev/null
@@ -1,198 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_dragonfly.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-	EBUSY	= 0x10,
-	EAGAIN	= 0x23,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x5,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x2,
-	FPE_INTOVF	= 0x1,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct Rtprio Rtprio;
-typedef struct Lwpparams Lwpparams;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct StackT StackT;
-typedef struct Siginfo Siginfo;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct Rtprio {
-	uint16	type;
-	uint16	prio;
-};
-struct Lwpparams {
-	void	*func;
-	byte	*arg;
-	byte	*stack;
-	int32	*tid1;
-	int32	*tid2;
-};
-struct SigaltstackT {
-	int8	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct StackT {
-	int8	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[4];
-	int32	si_band;
-	int32	__spare__[7];
-};
-
-struct Mcontext {
-	int32	mc_onstack;
-	int32	mc_gs;
-	int32	mc_fs;
-	int32	mc_es;
-	int32	mc_ds;
-	int32	mc_edi;
-	int32	mc_esi;
-	int32	mc_ebp;
-	int32	mc_isp;
-	int32	mc_ebx;
-	int32	mc_edx;
-	int32	mc_ecx;
-	int32	mc_eax;
-	int32	mc_xflags;
-	int32	mc_trapno;
-	int32	mc_err;
-	int32	mc_eip;
-	int32	mc_cs;
-	int32	mc_eflags;
-	int32	mc_esp;
-	int32	mc_ss;
-	int32	mc_len;
-	int32	mc_fpformat;
-	int32	mc_ownedfp;
-	int32	mc_fpregs[128];
-	int32	__spare__[16];
-};
-struct Ucontext {
-	Sigset	uc_sigmask;
-	Mcontext	uc_mcontext;
-	Ucontext	*uc_link;
-	StackT	uc_stack;
-	int32	__spare__[8];
-};
-
-struct Timespec {
-	int32	tv_sec;
-	int32	tv_nsec;
-};
-struct Timeval {
-	int32	tv_sec;
-	int32	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint32	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int32	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_dragonfly_amd64.go b/src/runtime/defs_dragonfly_amd64.go
new file mode 100644
index 0000000..7e959770
--- /dev/null
+++ b/src/runtime/defs_dragonfly_amd64.go
@@ -0,0 +1,208 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_dragonfly.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+	_EBUSY  = 0x10
+	_EAGAIN = 0x23
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x5
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x2
+	_FPE_INTOVF = 0x1
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+	_type uint16
+	prio  uint16
+}
+
+type lwpparams struct {
+	start_func uintptr
+	arg        unsafe.Pointer
+	stack      uintptr
+	tid1       unsafe.Pointer // *int32
+	tid2       unsafe.Pointer // *int32
+}
+
+type sigaltstackt struct {
+	ss_sp     uintptr
+	ss_size   uintptr
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type stackt struct {
+	ss_sp     uintptr
+	ss_size   uintptr
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   uint64
+	si_value  [8]byte
+	si_band   int64
+	__spare__ [7]int32
+	pad_cgo_0 [4]byte
+}
+
+type mcontext struct {
+	mc_onstack  uint64
+	mc_rdi      uint64
+	mc_rsi      uint64
+	mc_rdx      uint64
+	mc_rcx      uint64
+	mc_r8       uint64
+	mc_r9       uint64
+	mc_rax      uint64
+	mc_rbx      uint64
+	mc_rbp      uint64
+	mc_r10      uint64
+	mc_r11      uint64
+	mc_r12      uint64
+	mc_r13      uint64
+	mc_r14      uint64
+	mc_r15      uint64
+	mc_xflags   uint64
+	mc_trapno   uint64
+	mc_addr     uint64
+	mc_flags    uint64
+	mc_err      uint64
+	mc_rip      uint64
+	mc_cs       uint64
+	mc_rflags   uint64
+	mc_rsp      uint64
+	mc_ss       uint64
+	mc_len      uint32
+	mc_fpformat uint32
+	mc_ownedfp  uint32
+	mc_reserved uint32
+	mc_unused   [8]uint32
+	mc_fpregs   [256]int32
+}
+
+type ucontext struct {
+	uc_sigmask  sigset
+	pad_cgo_0   [48]byte
+	uc_mcontext mcontext
+	uc_link     *ucontext
+	uc_stack    stackt
+	__spare__   [8]int32
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = int64(x)
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = int64(x)
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint64
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int64
+	udata  *byte
+}
diff --git a/src/runtime/defs_dragonfly_amd64.h b/src/runtime/defs_dragonfly_amd64.h
deleted file mode 100644
index 6715552..0000000
--- a/src/runtime/defs_dragonfly_amd64.h
+++ /dev/null
@@ -1,208 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_dragonfly.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-	EBUSY	= 0x10,
-	EAGAIN	= 0x23,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x5,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x2,
-	FPE_INTOVF	= 0x1,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct Rtprio Rtprio;
-typedef struct Lwpparams Lwpparams;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct StackT StackT;
-typedef struct Siginfo Siginfo;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct Rtprio {
-	uint16	type;
-	uint16	prio;
-};
-struct Lwpparams {
-	void	*func;
-	byte	*arg;
-	byte	*stack;
-	int32	*tid1;
-	int32	*tid2;
-};
-struct SigaltstackT {
-	int8	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct StackT {
-	int8	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[8];
-	int64	si_band;
-	int32	__spare__[7];
-	byte	Pad_cgo_0[4];
-};
-
-struct Mcontext {
-	int64	mc_onstack;
-	int64	mc_rdi;
-	int64	mc_rsi;
-	int64	mc_rdx;
-	int64	mc_rcx;
-	int64	mc_r8;
-	int64	mc_r9;
-	int64	mc_rax;
-	int64	mc_rbx;
-	int64	mc_rbp;
-	int64	mc_r10;
-	int64	mc_r11;
-	int64	mc_r12;
-	int64	mc_r13;
-	int64	mc_r14;
-	int64	mc_r15;
-	int64	mc_xflags;
-	int64	mc_trapno;
-	int64	mc_addr;
-	int64	mc_flags;
-	int64	mc_err;
-	int64	mc_rip;
-	int64	mc_cs;
-	int64	mc_rflags;
-	int64	mc_rsp;
-	int64	mc_ss;
-	uint32	mc_len;
-	uint32	mc_fpformat;
-	uint32	mc_ownedfp;
-	uint32	mc_reserved;
-	uint32	mc_unused[8];
-	int32	mc_fpregs[256];
-};
-struct Ucontext {
-	Sigset	uc_sigmask;
-	byte	Pad_cgo_0[48];
-	Mcontext	uc_mcontext;
-	Ucontext	*uc_link;
-	StackT	uc_stack;
-	int32	__spare__[8];
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int64	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint64	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_freebsd_386.go b/src/runtime/defs_freebsd_386.go
new file mode 100644
index 0000000..2cb3a8f
--- /dev/null
+++ b/src/runtime/defs_freebsd_386.go
@@ -0,0 +1,213 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_freebsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x5
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_UMTX_OP_WAIT_UINT         = 0xb
+	_UMTX_OP_WAIT_UINT_PRIVATE = 0xf
+	_UMTX_OP_WAKE              = 0x3
+	_UMTX_OP_WAKE_PRIVATE      = 0x10
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x2
+	_FPE_INTOVF = 0x1
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0x40
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+	_type uint16
+	prio  uint16
+}
+
+type thrparam struct {
+	start_func uintptr
+	arg        unsafe.Pointer
+	stack_base uintptr
+	stack_size uintptr
+	tls_base   unsafe.Pointer
+	tls_size   uintptr
+	child_tid  unsafe.Pointer // *int32
+	parent_tid *int32
+	flags      int32
+	rtp        *rtprio
+	spare      [3]uintptr
+}
+
+type sigaltstackt struct {
+	ss_sp    *int8
+	ss_size  uint32
+	ss_flags int32
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type stackt struct {
+	ss_sp    uintptr
+	ss_size  uintptr
+	ss_flags int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   uintptr
+	si_value  [4]byte
+	_reason   [32]byte
+}
+
+type mcontext struct {
+	mc_onstack       uint32
+	mc_gs            uint32
+	mc_fs            uint32
+	mc_es            uint32
+	mc_ds            uint32
+	mc_edi           uint32
+	mc_esi           uint32
+	mc_ebp           uint32
+	mc_isp           uint32
+	mc_ebx           uint32
+	mc_edx           uint32
+	mc_ecx           uint32
+	mc_eax           uint32
+	mc_trapno        uint32
+	mc_err           uint32
+	mc_eip           uint32
+	mc_cs            uint32
+	mc_eflags        uint32
+	mc_esp           uint32
+	mc_ss            uint32
+	mc_len           uint32
+	mc_fpformat      uint32
+	mc_ownedfp       uint32
+	mc_flags         uint32
+	mc_fpstate       [128]uint32
+	mc_fsbase        uint32
+	mc_gsbase        uint32
+	mc_xfpustate     uint32
+	mc_xfpustate_len uint32
+	mc_spare2        [4]uint32
+}
+
+type ucontext struct {
+	uc_sigmask  sigset
+	uc_mcontext mcontext
+	uc_link     *ucontext
+	uc_stack    stackt
+	uc_flags    int32
+	__spare__   [4]int32
+	pad_cgo_0   [12]byte
+}
+
+type timespec struct {
+	tv_sec  int32
+	tv_nsec int32
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = x
+}
+
+type timeval struct {
+	tv_sec  int32
+	tv_usec int32
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint32
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int32
+	udata  *byte
+}
diff --git a/src/runtime/defs_freebsd_386.h b/src/runtime/defs_freebsd_386.h
deleted file mode 100644
index 156dccb..0000000
--- a/src/runtime/defs_freebsd_386.h
+++ /dev/null
@@ -1,213 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_freebsd.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x5,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	UMTX_OP_WAIT_UINT		= 0xb,
-	UMTX_OP_WAIT_UINT_PRIVATE	= 0xf,
-	UMTX_OP_WAKE			= 0x3,
-	UMTX_OP_WAKE_PRIVATE		= 0x10,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x2,
-	FPE_INTOVF	= 0x1,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0x40,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct Rtprio Rtprio;
-typedef struct ThrParam ThrParam;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct StackT StackT;
-typedef struct Siginfo Siginfo;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct Rtprio {
-	uint16	type;
-	uint16	prio;
-};
-struct ThrParam {
-	void	*start_func;
-	byte	*arg;
-	int8	*stack_base;
-	uint32	stack_size;
-	int8	*tls_base;
-	uint32	tls_size;
-	int32	*child_tid;
-	int32	*parent_tid;
-	int32	flags;
-	Rtprio	*rtp;
-	void	*spare[3];
-};
-struct SigaltstackT {
-	int8	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct StackT {
-	int8	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[4];
-	byte	_reason[32];
-};
-
-struct Mcontext {
-	int32	mc_onstack;
-	int32	mc_gs;
-	int32	mc_fs;
-	int32	mc_es;
-	int32	mc_ds;
-	int32	mc_edi;
-	int32	mc_esi;
-	int32	mc_ebp;
-	int32	mc_isp;
-	int32	mc_ebx;
-	int32	mc_edx;
-	int32	mc_ecx;
-	int32	mc_eax;
-	int32	mc_trapno;
-	int32	mc_err;
-	int32	mc_eip;
-	int32	mc_cs;
-	int32	mc_eflags;
-	int32	mc_esp;
-	int32	mc_ss;
-	int32	mc_len;
-	int32	mc_fpformat;
-	int32	mc_ownedfp;
-	int32	mc_flags;
-	int32	mc_fpstate[128];
-	int32	mc_fsbase;
-	int32	mc_gsbase;
-	int32	mc_xfpustate;
-	int32	mc_xfpustate_len;
-	int32	mc_spare2[4];
-};
-struct Ucontext {
-	Sigset	uc_sigmask;
-	Mcontext	uc_mcontext;
-	Ucontext	*uc_link;
-	StackT	uc_stack;
-	int32	uc_flags;
-	int32	__spare__[4];
-	byte	Pad_cgo_0[12];
-};
-
-struct Timespec {
-	int32	tv_sec;
-	int32	tv_nsec;
-};
-struct Timeval {
-	int32	tv_sec;
-	int32	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint32	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int32	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_freebsd_amd64.go b/src/runtime/defs_freebsd_amd64.go
new file mode 100644
index 0000000..a2646fb
--- /dev/null
+++ b/src/runtime/defs_freebsd_amd64.go
@@ -0,0 +1,224 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_freebsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x5
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_UMTX_OP_WAIT_UINT         = 0xb
+	_UMTX_OP_WAIT_UINT_PRIVATE = 0xf
+	_UMTX_OP_WAKE              = 0x3
+	_UMTX_OP_WAKE_PRIVATE      = 0x10
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x2
+	_FPE_INTOVF = 0x1
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0x40
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+	_type uint16
+	prio  uint16
+}
+
+type thrparam struct {
+	start_func uintptr
+	arg        unsafe.Pointer
+	stack_base uintptr
+	stack_size uintptr
+	tls_base   unsafe.Pointer
+	tls_size   uintptr
+	child_tid  unsafe.Pointer // *int64
+	parent_tid *int64
+	flags      int32
+	pad_cgo_0  [4]byte
+	rtp        *rtprio
+	spare      [3]uintptr
+}
+
+type sigaltstackt struct {
+	ss_sp     *int8
+	ss_size   uint64
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type stackt struct {
+	ss_sp     uintptr
+	ss_size   uintptr
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   uint64
+	si_value  [8]byte
+	_reason   [40]byte
+}
+
+type mcontext struct {
+	mc_onstack       uint64
+	mc_rdi           uint64
+	mc_rsi           uint64
+	mc_rdx           uint64
+	mc_rcx           uint64
+	mc_r8            uint64
+	mc_r9            uint64
+	mc_rax           uint64
+	mc_rbx           uint64
+	mc_rbp           uint64
+	mc_r10           uint64
+	mc_r11           uint64
+	mc_r12           uint64
+	mc_r13           uint64
+	mc_r14           uint64
+	mc_r15           uint64
+	mc_trapno        uint32
+	mc_fs            uint16
+	mc_gs            uint16
+	mc_addr          uint64
+	mc_flags         uint32
+	mc_es            uint16
+	mc_ds            uint16
+	mc_err           uint64
+	mc_rip           uint64
+	mc_cs            uint64
+	mc_rflags        uint64
+	mc_rsp           uint64
+	mc_ss            uint64
+	mc_len           uint64
+	mc_fpformat      uint64
+	mc_ownedfp       uint64
+	mc_fpstate       [64]uint64
+	mc_fsbase        uint64
+	mc_gsbase        uint64
+	mc_xfpustate     uint64
+	mc_xfpustate_len uint64
+	mc_spare         [4]uint64
+}
+
+type ucontext struct {
+	uc_sigmask  sigset
+	uc_mcontext mcontext
+	uc_link     *ucontext
+	uc_stack    stackt
+	uc_flags    int32
+	__spare__   [4]int32
+	pad_cgo_0   [12]byte
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = int64(x)
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = int64(x)
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint64
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int64
+	udata  *byte
+}
diff --git a/src/runtime/defs_freebsd_amd64.h b/src/runtime/defs_freebsd_amd64.h
deleted file mode 100644
index 4ba8956..0000000
--- a/src/runtime/defs_freebsd_amd64.h
+++ /dev/null
@@ -1,224 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_freebsd.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x5,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	UMTX_OP_WAIT_UINT		= 0xb,
-	UMTX_OP_WAIT_UINT_PRIVATE	= 0xf,
-	UMTX_OP_WAKE			= 0x3,
-	UMTX_OP_WAKE_PRIVATE		= 0x10,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x2,
-	FPE_INTOVF	= 0x1,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0x40,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct Rtprio Rtprio;
-typedef struct ThrParam ThrParam;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct StackT StackT;
-typedef struct Siginfo Siginfo;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct Rtprio {
-	uint16	type;
-	uint16	prio;
-};
-struct ThrParam {
-	void	*start_func;
-	byte	*arg;
-	int8	*stack_base;
-	uint64	stack_size;
-	int8	*tls_base;
-	uint64	tls_size;
-	int64	*child_tid;
-	int64	*parent_tid;
-	int32	flags;
-	byte	Pad_cgo_0[4];
-	Rtprio	*rtp;
-	void	*spare[3];
-};
-struct SigaltstackT {
-	int8	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct StackT {
-	int8	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[8];
-	byte	_reason[40];
-};
-
-struct Mcontext {
-	int64	mc_onstack;
-	int64	mc_rdi;
-	int64	mc_rsi;
-	int64	mc_rdx;
-	int64	mc_rcx;
-	int64	mc_r8;
-	int64	mc_r9;
-	int64	mc_rax;
-	int64	mc_rbx;
-	int64	mc_rbp;
-	int64	mc_r10;
-	int64	mc_r11;
-	int64	mc_r12;
-	int64	mc_r13;
-	int64	mc_r14;
-	int64	mc_r15;
-	uint32	mc_trapno;
-	uint16	mc_fs;
-	uint16	mc_gs;
-	int64	mc_addr;
-	uint32	mc_flags;
-	uint16	mc_es;
-	uint16	mc_ds;
-	int64	mc_err;
-	int64	mc_rip;
-	int64	mc_cs;
-	int64	mc_rflags;
-	int64	mc_rsp;
-	int64	mc_ss;
-	int64	mc_len;
-	int64	mc_fpformat;
-	int64	mc_ownedfp;
-	int64	mc_fpstate[64];
-	int64	mc_fsbase;
-	int64	mc_gsbase;
-	int64	mc_xfpustate;
-	int64	mc_xfpustate_len;
-	int64	mc_spare[4];
-};
-struct Ucontext {
-	Sigset	uc_sigmask;
-	Mcontext	uc_mcontext;
-	Ucontext	*uc_link;
-	StackT	uc_stack;
-	int32	uc_flags;
-	int32	__spare__[4];
-	byte	Pad_cgo_0[12];
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int64	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint64	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_freebsd_arm.go b/src/runtime/defs_freebsd_arm.go
new file mode 100644
index 0000000..e86ce45
--- /dev/null
+++ b/src/runtime/defs_freebsd_arm.go
@@ -0,0 +1,186 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_freebsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x5
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_UMTX_OP_WAIT_UINT         = 0xb
+	_UMTX_OP_WAIT_UINT_PRIVATE = 0xf
+	_UMTX_OP_WAKE              = 0x3
+	_UMTX_OP_WAKE_PRIVATE      = 0x10
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x2
+	_FPE_INTOVF = 0x1
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_RECEIPT   = 0x40
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type rtprio struct {
+	_type uint16
+	prio  uint16
+}
+
+type thrparam struct {
+	start_func uintptr
+	arg        unsafe.Pointer
+	stack_base uintptr
+	stack_size uintptr
+	tls_base   unsafe.Pointer
+	tls_size   uintptr
+	child_tid  unsafe.Pointer // *int32
+	parent_tid *int32
+	flags      int32
+	rtp        *rtprio
+	spare      [3]uintptr
+}
+
+type sigaltstackt struct {
+	ss_sp    *uint8
+	ss_size  uint32
+	ss_flags int32
+}
+
+type sigset struct {
+	__bits [4]uint32
+}
+
+type stackt struct {
+	ss_sp    uintptr
+	ss_size  uintptr
+	ss_flags int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	si_pid    int32
+	si_uid    uint32
+	si_status int32
+	si_addr   uintptr
+	si_value  [4]byte
+	_reason   [32]byte
+}
+
+type mcontext struct {
+	__gregs [17]uint32
+	__fpu   [140]byte
+}
+
+type ucontext struct {
+	uc_sigmask  sigset
+	uc_mcontext mcontext
+	uc_link     *ucontext
+	uc_stack    stackt
+	uc_flags    int32
+	__spare__   [4]int32
+}
+
+type timespec struct {
+	tv_sec    int64
+	tv_nsec   int32
+	pad_cgo_0 [4]byte
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = int64(x)
+}
+
+type timeval struct {
+	tv_sec    int64
+	tv_usec   int32
+	pad_cgo_0 [4]byte
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint32
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int32
+	udata  *byte
+}
diff --git a/src/runtime/defs_freebsd_arm.h b/src/runtime/defs_freebsd_arm.h
deleted file mode 100644
index 17deba6..0000000
--- a/src/runtime/defs_freebsd_arm.h
+++ /dev/null
@@ -1,186 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_freebsd.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x5,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	UMTX_OP_WAIT_UINT		= 0xb,
-	UMTX_OP_WAIT_UINT_PRIVATE	= 0xf,
-	UMTX_OP_WAKE			= 0x3,
-	UMTX_OP_WAKE_PRIVATE		= 0x10,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x2,
-	FPE_INTOVF	= 0x1,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0x40,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct Rtprio Rtprio;
-typedef struct ThrParam ThrParam;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct StackT StackT;
-typedef struct Siginfo Siginfo;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct Rtprio {
-	uint16	type;
-	uint16	prio;
-};
-struct ThrParam {
-	void	*start_func;
-	byte	*arg;
-	uint8	*stack_base;
-	uint32	stack_size;
-	uint8	*tls_base;
-	uint32	tls_size;
-	int32	*child_tid;
-	int32	*parent_tid;
-	int32	flags;
-	Rtprio	*rtp;
-	void	*spare[3];
-};
-struct SigaltstackT {
-	uint8	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct StackT {
-	uint8	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	int32	si_pid;
-	uint32	si_uid;
-	int32	si_status;
-	byte	*si_addr;
-	byte	si_value[4];
-	byte	_reason[32];
-};
-
-struct Mcontext {
-	uint32	__gregs[17];
-	byte	__fpu[140];
-};
-struct Ucontext {
-	Sigset	uc_sigmask;
-	Mcontext	uc_mcontext;
-	Ucontext	*uc_link;
-	StackT	uc_stack;
-	int32	uc_flags;
-	int32	__spare__[4];
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int32	tv_nsec;
-	byte	Pad_cgo_0[4];
-};
-struct Timeval {
-	int64	tv_sec;
-	int32	tv_usec;
-	byte	Pad_cgo_0[4];
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint32	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int32	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_linux_386.go b/src/runtime/defs_linux_386.go
new file mode 100644
index 0000000..a468f60
--- /dev/null
+++ b/src/runtime/defs_linux_386.go
@@ -0,0 +1,217 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs2_linux.go
+
+package runtime
+
+const (
+	_EINTR  = 0x4
+	_EAGAIN = 0xb
+	_ENOMEM = 0xc
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x20
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_DONTNEED = 0x4
+
+	_SA_RESTART  = 0x10000000
+	_SA_ONSTACK  = 0x8000000
+	_SA_RESTORER = 0x4000000
+	_SA_SIGINFO  = 0x4
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGBUS    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGUSR1   = 0xa
+	_SIGSEGV   = 0xb
+	_SIGUSR2   = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGSTKFLT = 0x10
+	_SIGCHLD   = 0x11
+	_SIGCONT   = 0x12
+	_SIGSTOP   = 0x13
+	_SIGTSTP   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGURG    = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGIO     = 0x1d
+	_SIGPWR    = 0x1e
+	_SIGSYS    = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_O_RDONLY  = 0x0
+	_O_CLOEXEC = 0x80000
+
+	_EPOLLIN       = 0x1
+	_EPOLLOUT      = 0x4
+	_EPOLLERR      = 0x8
+	_EPOLLHUP      = 0x10
+	_EPOLLRDHUP    = 0x2000
+	_EPOLLET       = 0x80000000
+	_EPOLL_CLOEXEC = 0x80000
+	_EPOLL_CTL_ADD = 0x1
+	_EPOLL_CTL_DEL = 0x2
+	_EPOLL_CTL_MOD = 0x3
+)
+
+type fpreg struct {
+	significand [4]uint16
+	exponent    uint16
+}
+
+type fpxreg struct {
+	significand [4]uint16
+	exponent    uint16
+	padding     [3]uint16
+}
+
+type xmmreg struct {
+	element [4]uint32
+}
+
+type fpstate struct {
+	cw        uint32
+	sw        uint32
+	tag       uint32
+	ipoff     uint32
+	cssel     uint32
+	dataoff   uint32
+	datasel   uint32
+	_st       [8]fpreg
+	status    uint16
+	magic     uint16
+	_fxsr_env [6]uint32
+	mxcsr     uint32
+	reserved  uint32
+	_fxsr_st  [8]fpxreg
+	_xmm      [8]xmmreg
+	padding1  [44]uint32
+	anon0     [48]byte
+}
+
+type timespec struct {
+	tv_sec  int32
+	tv_nsec int32
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = x
+}
+
+func (ts *timespec) set_nsec(x int32) {
+	ts.tv_nsec = x
+}
+
+type timeval struct {
+	tv_sec  int32
+	tv_usec int32
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type sigactiont struct {
+	sa_handler  uintptr
+	sa_flags    uint32
+	sa_restorer uintptr
+	sa_mask     uint64
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	_sifields [116]byte
+}
+
+type sigaltstackt struct {
+	ss_sp    *byte
+	ss_flags int32
+	ss_size  uintptr
+}
+
+type sigcontext struct {
+	gs            uint16
+	__gsh         uint16
+	fs            uint16
+	__fsh         uint16
+	es            uint16
+	__esh         uint16
+	ds            uint16
+	__dsh         uint16
+	edi           uint32
+	esi           uint32
+	ebp           uint32
+	esp           uint32
+	ebx           uint32
+	edx           uint32
+	ecx           uint32
+	eax           uint32
+	trapno        uint32
+	err           uint32
+	eip           uint32
+	cs            uint16
+	__csh         uint16
+	eflags        uint32
+	esp_at_signal uint32
+	ss            uint16
+	__ssh         uint16
+	fpstate       *fpstate
+	oldmask       uint32
+	cr2           uint32
+}
+
+type ucontext struct {
+	uc_flags    uint32
+	uc_link     *ucontext
+	uc_stack    sigaltstackt
+	uc_mcontext sigcontext
+	uc_sigmask  uint32
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type epollevent struct {
+	events uint32
+	data   [8]byte // to match amd64
+}
diff --git a/src/runtime/defs_linux_386.h b/src/runtime/defs_linux_386.h
deleted file mode 100644
index 24a05d8..0000000
--- a/src/runtime/defs_linux_386.h
+++ /dev/null
@@ -1,211 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs2_linux.go
-
-
-enum {
-	EINTR	= 0x4,
-	EAGAIN	= 0xb,
-	ENOMEM	= 0xc,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x20,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_DONTNEED	= 0x4,
-
-	SA_RESTART	= 0x10000000,
-	SA_ONSTACK	= 0x8000000,
-	SA_RESTORER	= 0x4000000,
-	SA_SIGINFO	= 0x4,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGBUS		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGUSR1		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGUSR2		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGSTKFLT	= 0x10,
-	SIGCHLD		= 0x11,
-	SIGCONT		= 0x12,
-	SIGSTOP		= 0x13,
-	SIGTSTP		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGURG		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGIO		= 0x1d,
-	SIGPWR		= 0x1e,
-	SIGSYS		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	O_RDONLY	= 0x0,
-	O_CLOEXEC	= 0x80000,
-
-	EPOLLIN		= 0x1,
-	EPOLLOUT	= 0x4,
-	EPOLLERR	= 0x8,
-	EPOLLHUP	= 0x10,
-	EPOLLRDHUP	= 0x2000,
-	EPOLLET		= -0x80000000,
-	EPOLL_CLOEXEC	= 0x80000,
-	EPOLL_CTL_ADD	= 0x1,
-	EPOLL_CTL_DEL	= 0x2,
-	EPOLL_CTL_MOD	= 0x3,
-};
-
-typedef struct Fpreg Fpreg;
-typedef struct Fpxreg Fpxreg;
-typedef struct Xmmreg Xmmreg;
-typedef struct Fpstate Fpstate;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct SigactionT SigactionT;
-typedef struct Siginfo Siginfo;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigcontext Sigcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Itimerval Itimerval;
-typedef struct EpollEvent EpollEvent;
-
-#pragma pack on
-
-struct Fpreg {
-	uint16	significand[4];
-	uint16	exponent;
-};
-struct Fpxreg {
-	uint16	significand[4];
-	uint16	exponent;
-	uint16	padding[3];
-};
-struct Xmmreg {
-	uint32	element[4];
-};
-struct Fpstate {
-	uint32	cw;
-	uint32	sw;
-	uint32	tag;
-	uint32	ipoff;
-	uint32	cssel;
-	uint32	dataoff;
-	uint32	datasel;
-	Fpreg	_st[8];
-	uint16	status;
-	uint16	magic;
-	uint32	_fxsr_env[6];
-	uint32	mxcsr;
-	uint32	reserved;
-	Fpxreg	_fxsr_st[8];
-	Xmmreg	_xmm[8];
-	uint32	padding1[44];
-	byte	anon0[48];
-};
-struct Timespec {
-	int32	tv_sec;
-	int32	tv_nsec;
-};
-struct Timeval {
-	int32	tv_sec;
-	int32	tv_usec;
-};
-struct SigactionT {
-	void	*k_sa_handler;
-	uint32	sa_flags;
-	void	*sa_restorer;
-	uint64	sa_mask;
-};
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	byte	_sifields[116];
-};
-struct SigaltstackT {
-	byte	*ss_sp;
-	int32	ss_flags;
-	uint32	ss_size;
-};
-struct Sigcontext {
-	uint16	gs;
-	uint16	__gsh;
-	uint16	fs;
-	uint16	__fsh;
-	uint16	es;
-	uint16	__esh;
-	uint16	ds;
-	uint16	__dsh;
-	uint32	edi;
-	uint32	esi;
-	uint32	ebp;
-	uint32	esp;
-	uint32	ebx;
-	uint32	edx;
-	uint32	ecx;
-	uint32	eax;
-	uint32	trapno;
-	uint32	err;
-	uint32	eip;
-	uint16	cs;
-	uint16	__csh;
-	uint32	eflags;
-	uint32	esp_at_signal;
-	uint16	ss;
-	uint16	__ssh;
-	Fpstate	*fpstate;
-	uint32	oldmask;
-	uint32	cr2;
-};
-struct Ucontext {
-	uint32	uc_flags;
-	Ucontext	*uc_link;
-	SigaltstackT	uc_stack;
-	Sigcontext	uc_mcontext;
-	uint32	uc_sigmask;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-struct EpollEvent {
-	uint32	events;
-	byte	data[8]; // to match amd64
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_linux_amd64.go b/src/runtime/defs_linux_amd64.go
new file mode 100644
index 0000000..7a1caea
--- /dev/null
+++ b/src/runtime/defs_linux_amd64.go
@@ -0,0 +1,253 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_linux.go defs1_linux.go
+
+package runtime
+
+const (
+	_EINTR  = 0x4
+	_EAGAIN = 0xb
+	_ENOMEM = 0xc
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x20
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_DONTNEED = 0x4
+
+	_SA_RESTART  = 0x10000000
+	_SA_ONSTACK  = 0x8000000
+	_SA_RESTORER = 0x4000000
+	_SA_SIGINFO  = 0x4
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGBUS    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGUSR1   = 0xa
+	_SIGSEGV   = 0xb
+	_SIGUSR2   = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGSTKFLT = 0x10
+	_SIGCHLD   = 0x11
+	_SIGCONT   = 0x12
+	_SIGSTOP   = 0x13
+	_SIGTSTP   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGURG    = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGIO     = 0x1d
+	_SIGPWR    = 0x1e
+	_SIGSYS    = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EPOLLIN       = 0x1
+	_EPOLLOUT      = 0x4
+	_EPOLLERR      = 0x8
+	_EPOLLHUP      = 0x10
+	_EPOLLRDHUP    = 0x2000
+	_EPOLLET       = 0x80000000
+	_EPOLL_CLOEXEC = 0x80000
+	_EPOLL_CTL_ADD = 0x1
+	_EPOLL_CTL_DEL = 0x2
+	_EPOLL_CTL_MOD = 0x3
+)
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = int64(x)
+}
+
+func (ts *timespec) set_nsec(x int32) {
+	ts.tv_nsec = int64(x)
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = int64(x)
+}
+
+type sigactiont struct {
+	sa_handler  uintptr
+	sa_flags    uint64
+	sa_restorer uintptr
+	sa_mask     uint64
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	pad_cgo_0 [4]byte
+	_sifields [112]byte
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type epollevent struct {
+	events uint32
+	data   [8]byte // unaligned uintptr
+}
+
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_linux.go defs1_linux.go
+
+const (
+	_O_RDONLY  = 0x0
+	_O_CLOEXEC = 0x80000
+)
+
+type usigset struct {
+	__val [16]uint64
+}
+
+type fpxreg struct {
+	significand [4]uint16
+	exponent    uint16
+	padding     [3]uint16
+}
+
+type xmmreg struct {
+	element [4]uint32
+}
+
+type fpstate struct {
+	cwd       uint16
+	swd       uint16
+	ftw       uint16
+	fop       uint16
+	rip       uint64
+	rdp       uint64
+	mxcsr     uint32
+	mxcr_mask uint32
+	_st       [8]fpxreg
+	_xmm      [16]xmmreg
+	padding   [24]uint32
+}
+
+type fpxreg1 struct {
+	significand [4]uint16
+	exponent    uint16
+	padding     [3]uint16
+}
+
+type xmmreg1 struct {
+	element [4]uint32
+}
+
+type fpstate1 struct {
+	cwd       uint16
+	swd       uint16
+	ftw       uint16
+	fop       uint16
+	rip       uint64
+	rdp       uint64
+	mxcsr     uint32
+	mxcr_mask uint32
+	_st       [8]fpxreg1
+	_xmm      [16]xmmreg1
+	padding   [24]uint32
+}
+
+type fpreg1 struct {
+	significand [4]uint16
+	exponent    uint16
+}
+
+type sigaltstackt struct {
+	ss_sp     *byte
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+	ss_size   uintptr
+}
+
+type mcontext struct {
+	gregs       [23]uint64
+	fpregs      *fpstate
+	__reserved1 [8]uint64
+}
+
+type ucontext struct {
+	uc_flags     uint64
+	uc_link      *ucontext
+	uc_stack     sigaltstackt
+	uc_mcontext  mcontext
+	uc_sigmask   usigset
+	__fpregs_mem fpstate
+}
+
+type sigcontext struct {
+	r8          uint64
+	r9          uint64
+	r10         uint64
+	r11         uint64
+	r12         uint64
+	r13         uint64
+	r14         uint64
+	r15         uint64
+	rdi         uint64
+	rsi         uint64
+	rbp         uint64
+	rbx         uint64
+	rdx         uint64
+	rax         uint64
+	rcx         uint64
+	rsp         uint64
+	rip         uint64
+	eflags      uint64
+	cs          uint16
+	gs          uint16
+	fs          uint16
+	__pad0      uint16
+	err         uint64
+	trapno      uint64
+	oldmask     uint64
+	cr2         uint64
+	fpstate     *fpstate1
+	__reserved1 [8]uint64
+}
diff --git a/src/runtime/defs_linux_amd64.h b/src/runtime/defs_linux_amd64.h
deleted file mode 100644
index 14616df..0000000
--- a/src/runtime/defs_linux_amd64.h
+++ /dev/null
@@ -1,254 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_linux.go defs1_linux.go
-
-
-enum {
-	EINTR	= 0x4,
-	EAGAIN	= 0xb,
-	ENOMEM	= 0xc,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x20,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_DONTNEED	= 0x4,
-
-	SA_RESTART	= 0x10000000,
-	SA_ONSTACK	= 0x8000000,
-	SA_RESTORER	= 0x4000000,
-	SA_SIGINFO	= 0x4,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGBUS		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGUSR1		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGUSR2		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGSTKFLT	= 0x10,
-	SIGCHLD		= 0x11,
-	SIGCONT		= 0x12,
-	SIGSTOP		= 0x13,
-	SIGTSTP		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGURG		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGIO		= 0x1d,
-	SIGPWR		= 0x1e,
-	SIGSYS		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EPOLLIN		= 0x1,
-	EPOLLOUT	= 0x4,
-	EPOLLERR	= 0x8,
-	EPOLLHUP	= 0x10,
-	EPOLLRDHUP	= 0x2000,
-	EPOLLET		= -0x80000000,
-	EPOLL_CLOEXEC	= 0x80000,
-	EPOLL_CTL_ADD	= 0x1,
-	EPOLL_CTL_DEL	= 0x2,
-	EPOLL_CTL_MOD	= 0x3,
-};
-
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct SigactionT SigactionT;
-typedef struct Siginfo Siginfo;
-typedef struct Itimerval Itimerval;
-typedef struct EpollEvent EpollEvent;
-
-#pragma pack on
-
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int64	tv_usec;
-};
-struct SigactionT {
-	void	*sa_handler;
-	uint64	sa_flags;
-	void	*sa_restorer;
-	uint64	sa_mask;
-};
-struct Siginfo {
-	int32	si_signo;
-	int32	si_errno;
-	int32	si_code;
-	byte	Pad_cgo_0[4];
-	byte	_sifields[112];
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-struct EpollEvent {
-	uint32	events;
-	byte	data[8]; // unaligned uintptr
-};
-
-
-#pragma pack off
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_linux.go defs1_linux.go
-
-
-enum {
-	O_RDONLY	= 0x0,
-	O_CLOEXEC	= 0x80000,
-};
-
-typedef struct Usigset Usigset;
-typedef struct Fpxreg Fpxreg;
-typedef struct Xmmreg Xmmreg;
-typedef struct Fpstate Fpstate;
-typedef struct Fpxreg1 Fpxreg1;
-typedef struct Xmmreg1 Xmmreg1;
-typedef struct Fpstate1 Fpstate1;
-typedef struct Fpreg1 Fpreg1;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Sigcontext Sigcontext;
-
-#pragma pack on
-
-struct Usigset {
-	uint64	__val[16];
-};
-struct Fpxreg {
-	uint16	significand[4];
-	uint16	exponent;
-	uint16	padding[3];
-};
-struct Xmmreg {
-	uint32	element[4];
-};
-struct Fpstate {
-	uint16	cwd;
-	uint16	swd;
-	uint16	ftw;
-	uint16	fop;
-	uint64	rip;
-	uint64	rdp;
-	uint32	mxcsr;
-	uint32	mxcr_mask;
-	Fpxreg	_st[8];
-	Xmmreg	_xmm[16];
-	uint32	padding[24];
-};
-struct Fpxreg1 {
-	uint16	significand[4];
-	uint16	exponent;
-	uint16	padding[3];
-};
-struct Xmmreg1 {
-	uint32	element[4];
-};
-struct Fpstate1 {
-	uint16	cwd;
-	uint16	swd;
-	uint16	ftw;
-	uint16	fop;
-	uint64	rip;
-	uint64	rdp;
-	uint32	mxcsr;
-	uint32	mxcr_mask;
-	Fpxreg1	_st[8];
-	Xmmreg1	_xmm[16];
-	uint32	padding[24];
-};
-struct Fpreg1 {
-	uint16	significand[4];
-	uint16	exponent;
-};
-struct SigaltstackT {
-	byte	*ss_sp;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-	uint64	ss_size;
-};
-struct Mcontext {
-	int64	gregs[23];
-	Fpstate	*fpregs;
-	uint64	__reserved1[8];
-};
-struct Ucontext {
-	uint64	uc_flags;
-	Ucontext	*uc_link;
-	SigaltstackT	uc_stack;
-	Mcontext	uc_mcontext;
-	Usigset	uc_sigmask;
-	Fpstate	__fpregs_mem;
-};
-struct Sigcontext {
-	uint64	r8;
-	uint64	r9;
-	uint64	r10;
-	uint64	r11;
-	uint64	r12;
-	uint64	r13;
-	uint64	r14;
-	uint64	r15;
-	uint64	rdi;
-	uint64	rsi;
-	uint64	rbp;
-	uint64	rbx;
-	uint64	rdx;
-	uint64	rax;
-	uint64	rcx;
-	uint64	rsp;
-	uint64	rip;
-	uint64	eflags;
-	uint16	cs;
-	uint16	gs;
-	uint16	fs;
-	uint16	__pad0;
-	uint64	err;
-	uint64	trapno;
-	uint64	oldmask;
-	uint64	cr2;
-	Fpstate1	*fpstate;
-	uint64	__reserved1[8];
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_linux_arm.go b/src/runtime/defs_linux_arm.go
new file mode 100644
index 0000000..7f83002
--- /dev/null
+++ b/src/runtime/defs_linux_arm.go
@@ -0,0 +1,167 @@
+package runtime
+
+// Constants
+const (
+	_EINTR  = 0x4
+	_ENOMEM = 0xc
+	_EAGAIN = 0xb
+
+	_PROT_NONE      = 0
+	_PROT_READ      = 0x1
+	_PROT_WRITE     = 0x2
+	_PROT_EXEC      = 0x4
+	_MAP_ANON       = 0x20
+	_MAP_PRIVATE    = 0x2
+	_MAP_FIXED      = 0x10
+	_MADV_DONTNEED  = 0x4
+	_SA_RESTART     = 0x10000000
+	_SA_ONSTACK     = 0x8000000
+	_SA_RESTORER    = 0 // unused on ARM
+	_SA_SIGINFO     = 0x4
+	_SIGHUP         = 0x1
+	_SIGINT         = 0x2
+	_SIGQUIT        = 0x3
+	_SIGILL         = 0x4
+	_SIGTRAP        = 0x5
+	_SIGABRT        = 0x6
+	_SIGBUS         = 0x7
+	_SIGFPE         = 0x8
+	_SIGKILL        = 0x9
+	_SIGUSR1        = 0xa
+	_SIGSEGV        = 0xb
+	_SIGUSR2        = 0xc
+	_SIGPIPE        = 0xd
+	_SIGALRM        = 0xe
+	_SIGSTKFLT      = 0x10
+	_SIGCHLD        = 0x11
+	_SIGCONT        = 0x12
+	_SIGSTOP        = 0x13
+	_SIGTSTP        = 0x14
+	_SIGTTIN        = 0x15
+	_SIGTTOU        = 0x16
+	_SIGURG         = 0x17
+	_SIGXCPU        = 0x18
+	_SIGXFSZ        = 0x19
+	_SIGVTALRM      = 0x1a
+	_SIGPROF        = 0x1b
+	_SIGWINCH       = 0x1c
+	_SIGIO          = 0x1d
+	_SIGPWR         = 0x1e
+	_SIGSYS         = 0x1f
+	_FPE_INTDIV     = 0x1
+	_FPE_INTOVF     = 0x2
+	_FPE_FLTDIV     = 0x3
+	_FPE_FLTOVF     = 0x4
+	_FPE_FLTUND     = 0x5
+	_FPE_FLTRES     = 0x6
+	_FPE_FLTINV     = 0x7
+	_FPE_FLTSUB     = 0x8
+	_BUS_ADRALN     = 0x1
+	_BUS_ADRERR     = 0x2
+	_BUS_OBJERR     = 0x3
+	_SEGV_MAPERR    = 0x1
+	_SEGV_ACCERR    = 0x2
+	_ITIMER_REAL    = 0
+	_ITIMER_PROF    = 0x2
+	_ITIMER_VIRTUAL = 0x1
+	_O_RDONLY       = 0
+	_O_CLOEXEC      = 02000000
+
+	_EPOLLIN       = 0x1
+	_EPOLLOUT      = 0x4
+	_EPOLLERR      = 0x8
+	_EPOLLHUP      = 0x10
+	_EPOLLRDHUP    = 0x2000
+	_EPOLLET       = 0x80000000
+	_EPOLL_CLOEXEC = 0x80000
+	_EPOLL_CTL_ADD = 0x1
+	_EPOLL_CTL_DEL = 0x2
+	_EPOLL_CTL_MOD = 0x3
+)
+
+type timespec struct {
+	tv_sec  int32
+	tv_nsec int32
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = x
+}
+
+func (ts *timespec) set_nsec(x int32) {
+	ts.tv_nsec = x
+}
+
+type sigaltstackt struct {
+	ss_sp    *byte
+	ss_flags int32
+	ss_size  uintptr
+}
+
+type sigcontext struct {
+	trap_no       uint32
+	error_code    uint32
+	oldmask       uint32
+	r0            uint32
+	r1            uint32
+	r2            uint32
+	r3            uint32
+	r4            uint32
+	r5            uint32
+	r6            uint32
+	r7            uint32
+	r8            uint32
+	r9            uint32
+	r10           uint32
+	fp            uint32
+	ip            uint32
+	sp            uint32
+	lr            uint32
+	pc            uint32
+	cpsr          uint32
+	fault_address uint32
+}
+
+type ucontext struct {
+	uc_flags    uint32
+	uc_link     *ucontext
+	uc_stack    sigaltstackt
+	uc_mcontext sigcontext
+	uc_sigmask  uint32
+	__unused    [31]int32
+	uc_regspace [128]uint32
+}
+
+type timeval struct {
+	tv_sec  int32
+	tv_usec int32
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_errno  int32
+	si_code   int32
+	_sifields [4]uint8
+}
+
+type sigactiont struct {
+	sa_handler  uintptr
+	sa_flags    uint32
+	sa_restorer uintptr
+	sa_mask     uint64
+}
+
+type epollevent struct {
+	events uint32
+	_pad   uint32
+	data   [8]byte // to match amd64
+}
diff --git a/src/runtime/defs_linux_arm.h b/src/runtime/defs_linux_arm.h
deleted file mode 100644
index 50b3c91..0000000
--- a/src/runtime/defs_linux_arm.h
+++ /dev/null
@@ -1,168 +0,0 @@
-// TODO: Generate using cgo like defs_linux_{386,amd64}.h
-
-// Constants
-enum {
-	EINTR  = 0x4,
-	ENOMEM = 0xc,
-	EAGAIN = 0xb,
-
-	PROT_NONE = 0,
-	PROT_READ = 0x1,
-	PROT_WRITE = 0x2,
-	PROT_EXEC = 0x4,
-	MAP_ANON = 0x20,
-	MAP_PRIVATE = 0x2,
-	MAP_FIXED = 0x10,
-	MADV_DONTNEED = 0x4,
-	SA_RESTART = 0x10000000,
-	SA_ONSTACK = 0x8000000,
-	SA_RESTORER = 0, // unused on ARM
-	SA_SIGINFO = 0x4,
-	SIGHUP = 0x1,
-	SIGINT = 0x2,
-	SIGQUIT = 0x3,
-	SIGILL = 0x4,
-	SIGTRAP = 0x5,
-	SIGABRT = 0x6,
-	SIGBUS = 0x7,
-	SIGFPE = 0x8,
-	SIGKILL = 0x9,
-	SIGUSR1 = 0xa,
-	SIGSEGV = 0xb,
-	SIGUSR2 = 0xc,
-	SIGPIPE = 0xd,
-	SIGALRM = 0xe,
-	SIGSTKFLT = 0x10,
-	SIGCHLD = 0x11,
-	SIGCONT = 0x12,
-	SIGSTOP = 0x13,
-	SIGTSTP = 0x14,
-	SIGTTIN = 0x15,
-	SIGTTOU = 0x16,
-	SIGURG = 0x17,
-	SIGXCPU = 0x18,
-	SIGXFSZ = 0x19,
-	SIGVTALRM = 0x1a,
-	SIGPROF = 0x1b,
-	SIGWINCH = 0x1c,
-	SIGIO = 0x1d,
-	SIGPWR = 0x1e,
-	SIGSYS = 0x1f,
-	FPE_INTDIV = 0x1,
-	FPE_INTOVF = 0x2,
-	FPE_FLTDIV = 0x3,
-	FPE_FLTOVF = 0x4,
-	FPE_FLTUND = 0x5,
-	FPE_FLTRES = 0x6,
-	FPE_FLTINV = 0x7,
-	FPE_FLTSUB = 0x8,
-	BUS_ADRALN = 0x1,
-	BUS_ADRERR = 0x2,
-	BUS_OBJERR = 0x3,
-	SEGV_MAPERR = 0x1,
-	SEGV_ACCERR = 0x2,
-	ITIMER_REAL = 0,
-	ITIMER_PROF = 0x2,
-	ITIMER_VIRTUAL = 0x1,
-	O_RDONLY = 0,
-	O_CLOEXEC = 02000000,
-
-	EPOLLIN		= 0x1,
-	EPOLLOUT	= 0x4,
-	EPOLLERR	= 0x8,
-	EPOLLHUP	= 0x10,
-	EPOLLRDHUP	= 0x2000,
-	EPOLLET		= -0x80000000,
-	EPOLL_CLOEXEC	= 0x80000,
-	EPOLL_CTL_ADD	= 0x1,
-	EPOLL_CTL_DEL	= 0x2,
-	EPOLL_CTL_MOD	= 0x3,
-};
-
-// Types
-#pragma pack on
-
-typedef struct Timespec Timespec;
-struct Timespec {
-	int32 tv_sec;
-	int32 tv_nsec;
-};
-
-typedef struct SigaltstackT SigaltstackT;
-struct SigaltstackT {
-	void *ss_sp;
-	int32 ss_flags;
-	uint32 ss_size;
-};
-
-typedef struct Sigcontext Sigcontext;
-struct Sigcontext {
-	uint32 trap_no;
-	uint32 error_code;
-	uint32 oldmask;
-	uint32 arm_r0;
-	uint32 arm_r1;
-	uint32 arm_r2;
-	uint32 arm_r3;
-	uint32 arm_r4;
-	uint32 arm_r5;
-	uint32 arm_r6;
-	uint32 arm_r7;
-	uint32 arm_r8;
-	uint32 arm_r9;
-	uint32 arm_r10;
-	uint32 arm_fp;
-	uint32 arm_ip;
-	uint32 arm_sp;
-	uint32 arm_lr;
-	uint32 arm_pc;
-	uint32 arm_cpsr;
-	uint32 fault_address;
-};
-
-typedef struct Ucontext Ucontext;
-struct Ucontext {
-	uint32 uc_flags;
-	Ucontext *uc_link;
-	SigaltstackT uc_stack;
-	Sigcontext uc_mcontext;
-	uint32 uc_sigmask;
-	int32 __unused[31];
-	uint32 uc_regspace[128];
-};
-
-typedef struct Timeval Timeval;
-struct Timeval {
-	int32 tv_sec;
-	int32 tv_usec;
-};
-
-typedef struct Itimerval Itimerval;
-struct Itimerval {
-	Timeval it_interval;
-	Timeval it_value;
-};
-
-typedef struct Siginfo Siginfo;
-struct Siginfo {
-	int32 si_signo;
-	int32 si_errno;
-	int32 si_code;
-	uint8 _sifields[4];
-};
-
-typedef struct SigactionT SigactionT;
-struct SigactionT {
-	void *sa_handler;
-	uint32 sa_flags;
-	void *sa_restorer;
-	uint64 sa_mask;
-};
-
-typedef struct EpollEvent EpollEvent;
-struct EpollEvent {
-	uint32	events;
-	uint32	_pad;
-	byte	data[8]; // to match amd64
-};
-#pragma pack off
diff --git a/src/runtime/defs_nacl_386.go b/src/runtime/defs_nacl_386.go
new file mode 100644
index 0000000..4988829
--- /dev/null
+++ b/src/runtime/defs_nacl_386.go
@@ -0,0 +1,42 @@
+package runtime
+
+const (
+	// These values are referred to in the source code
+	// but really don't matter. Even so, use the standard numbers.
+	_SIGSEGV = 11
+	_SIGPROF = 27
+)
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int32
+}
+
+type excregs386 struct {
+	eax    uint32
+	ecx    uint32
+	edx    uint32
+	ebx    uint32
+	esp    uint32
+	ebp    uint32
+	esi    uint32
+	edi    uint32
+	eip    uint32
+	eflags uint32
+}
+
+type exccontext struct {
+	size                    uint32
+	portable_context_offset uint32
+	portable_context_size   uint32
+	arch                    uint32
+	regs_size               uint32
+	reserved                [11]uint32
+	regs                    excregs386
+}
+
+type excportablecontext struct {
+	pc uint32
+	sp uint32
+	fp uint32
+}
diff --git a/src/runtime/defs_nacl_386.h b/src/runtime/defs_nacl_386.h
deleted file mode 100644
index e8fbb38..0000000
--- a/src/runtime/defs_nacl_386.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Created by hand, not machine generated.
-
-enum
-{
-	// These values are referred to in the source code
-	// but really don't matter. Even so, use the standard numbers.
-	SIGSEGV = 11,
-	SIGPROF = 27,
-};
-
-typedef struct Siginfo Siginfo;
-
-// native_client/src/trusted/service_runtime/include/machine/_types.h
-typedef struct Timespec Timespec;
-
-struct Timespec
-{
-	int64 tv_sec;
-	int32 tv_nsec;
-};
-
-// native_client/src/trusted/service_runtime/nacl_exception.h
-// native_client/src/include/nacl/nacl_exception.h
-
-typedef struct ExcContext ExcContext;
-typedef struct ExcPortable ExcPortable;
-typedef struct ExcRegs386 ExcRegs386;
-
-struct ExcRegs386
-{
-	uint32	eax;
-	uint32	ecx;
-	uint32	edx;
-	uint32	ebx;
-	uint32	esp;
-	uint32	ebp;
-	uint32	esi;
-	uint32	edi;
-	uint32	eip;
-	uint32	eflags;
-};
-
-struct ExcContext
-{
-	uint32	size;
-	uint32	portable_context_offset;
-	uint32	portable_context_size;
-	uint32	arch;
-	uint32	regs_size;
-	uint32	reserved[11];
-	ExcRegs386	regs;
-};
-
-struct ExcPortableContext
-{
-	uint32	pc;
-	uint32	sp;
-	uint32	fp;
-};
diff --git a/src/runtime/defs_nacl_amd64p32.go b/src/runtime/defs_nacl_amd64p32.go
new file mode 100644
index 0000000..add11fe
--- /dev/null
+++ b/src/runtime/defs_nacl_amd64p32.go
@@ -0,0 +1,63 @@
+package runtime
+
+const (
+	// These values are referred to in the source code
+	// but really don't matter. Even so, use the standard numbers.
+	_SIGSEGV = 11
+	_SIGPROF = 27
+)
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int32
+}
+
+type excregs386 struct {
+	eax    uint32
+	ecx    uint32
+	edx    uint32
+	ebx    uint32
+	esp    uint32
+	ebp    uint32
+	esi    uint32
+	edi    uint32
+	eip    uint32
+	eflags uint32
+}
+
+type excregsamd64 struct {
+	rax    uint64
+	rcx    uint64
+	rdx    uint64
+	rbx    uint64
+	rsp    uint64
+	rbp    uint64
+	rsi    uint64
+	rdi    uint64
+	r8     uint64
+	r9     uint64
+	r10    uint64
+	r11    uint64
+	r12    uint64
+	r13    uint64
+	r14    uint64
+	r15    uint64
+	rip    uint64
+	rflags uint32
+}
+
+type exccontext struct {
+	size                    uint32
+	portable_context_offset uint32
+	portable_context_size   uint32
+	arch                    uint32
+	regs_size               uint32
+	reserved                [11]uint32
+	regs                    excregsamd64
+}
+
+type excportablecontext struct {
+	pc uint32
+	sp uint32
+	fp uint32
+}
diff --git a/src/runtime/defs_nacl_amd64p32.h b/src/runtime/defs_nacl_amd64p32.h
deleted file mode 100644
index 45663d40..0000000
--- a/src/runtime/defs_nacl_amd64p32.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Created by hand, not machine generated.
-
-enum
-{
-	// These values are referred to in the source code
-	// but really don't matter. Even so, use the standard numbers.
-	SIGSEGV = 11,
-	SIGPROF = 27,
-};
-
-typedef struct Siginfo Siginfo;
-
-
-// native_client/src/trusted/service_runtime/include/machine/_types.h
-typedef struct Timespec Timespec;
-
-struct Timespec
-{
-	int64 tv_sec;
-	int32 tv_nsec;
-};
-
-// native_client/src/trusted/service_runtime/nacl_exception.h
-// native_client/src/include/nacl/nacl_exception.h
-
-typedef struct ExcContext ExcContext;
-typedef struct ExcPortable ExcPortable;
-typedef struct ExcRegs386 ExcRegs386;
-typedef struct ExcRegsAmd64 ExcRegsAmd64;
-
-struct ExcRegs386
-{
-	uint32	eax;
-	uint32	ecx;
-	uint32	edx;
-	uint32	ebx;
-	uint32	esp;
-	uint32	ebp;
-	uint32	esi;
-	uint32	edi;
-	uint32	eip;
-	uint32	eflags;
-};
-
-struct ExcRegsAmd64
-{
-	uint64	rax;
-	uint64	rcx;
-	uint64	rdx;
-	uint64	rbx;
-	uint64	rsp;
-	uint64	rbp;
-	uint64	rsi;
-	uint64	rdi;
-	uint64	r8;
-	uint64	r9;
-	uint64	r10;
-	uint64	r11;
-	uint64	r12;
-	uint64	r13;
-	uint64	r14;
-	uint64	r15;
-	uint64	rip;
-	uint32	rflags;
-};
-
-struct ExcContext
-{
-	uint32	size;
-	uint32	portable_context_offset;
-	uint32	portable_context_size;
-	uint32	arch;
-	uint32	regs_size;
-	uint32	reserved[11];
-	union {
-		ExcRegs386	regs;
-		ExcRegsAmd64	regs64;
-	} regs;
-};
-
-struct ExcPortableContext
-{
-	uint32	pc;
-	uint32	sp;
-	uint32	fp;
-};
diff --git a/src/runtime/defs_nacl_arm.go b/src/runtime/defs_nacl_arm.go
new file mode 100644
index 0000000..c983cff
--- /dev/null
+++ b/src/runtime/defs_nacl_arm.go
@@ -0,0 +1,49 @@
+package runtime
+
+const (
+	// These values are referred to in the source code
+	// but really don't matter. Even so, use the standard numbers.
+	_SIGSEGV = 11
+	_SIGPROF = 27
+)
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int32
+}
+
+type excregsarm struct {
+	r0   uint32
+	r1   uint32
+	r2   uint32
+	r3   uint32
+	r4   uint32
+	r5   uint32
+	r6   uint32
+	r7   uint32
+	r8   uint32
+	r9   uint32 // the value reported here is undefined.
+	r10  uint32
+	r11  uint32
+	r12  uint32
+	sp   uint32 /* r13 */
+	lr   uint32 /* r14 */
+	pc   uint32 /* r15 */
+	cpsr uint32
+}
+
+type exccontext struct {
+	size                    uint32
+	portable_context_offset uint32
+	portable_context_size   uint32
+	arch                    uint32
+	regs_size               uint32
+	reserved                [11]uint32
+	regs                    excregsarm
+}
+
+type excportablecontext struct {
+	pc uint32
+	sp uint32
+	fp uint32
+}
diff --git a/src/runtime/defs_nacl_arm.h b/src/runtime/defs_nacl_arm.h
deleted file mode 100644
index 9ce07cc..0000000
--- a/src/runtime/defs_nacl_arm.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Created by hand, not machine generated.
-
-enum
-{
-	// These values are referred to in the source code
-	// but really don't matter. Even so, use the standard numbers.
-	SIGSEGV = 11,
-	SIGPROF = 27,
-};
-
-typedef struct Siginfo Siginfo;
-
-// native_client/src/trusted/service_runtime/include/machine/_types.h
-typedef struct Timespec Timespec;
-
-struct Timespec
-{
-	int64 tv_sec;
-	int32 tv_nsec;
-};
-
-// native_client/src/trusted/service_runtime/nacl_exception.h
-// native_client/src/include/nacl/nacl_exception.h
-
-typedef struct ExcContext ExcContext;
-typedef struct ExcPortable ExcPortable;
-typedef struct ExcRegsARM ExcRegsARM;
-
-struct ExcRegsARM
-{
-	uint32	r0;
-	uint32	r1;
-	uint32	r2;
-	uint32	r3;
-	uint32	r4;
-	uint32	r5;
-	uint32	r6;
-	uint32	r7;
-	uint32	r8;
-	uint32	r9;	// the value reported here is undefined.
-	uint32	r10;
-	uint32	r11;
-	uint32	r12;
-	uint32	sp;	/* r13 */
-	uint32	lr;	/* r14 */
-	uint32	pc;	/* r15 */
-	uint32	cpsr;
-};
-
-struct ExcContext
-{
-	uint32	size;
-	uint32	portable_context_offset;
-	uint32	portable_context_size;
-	uint32	arch;
-	uint32	regs_size;
-	uint32	reserved[11];
-	ExcRegsARM	regs;
-};
-
-struct ExcPortableContext
-{
-	uint32	pc;
-	uint32	sp;
-	uint32	fp;
-};
diff --git a/src/runtime/defs_netbsd_386.h b/src/runtime/defs_netbsd_386.h
deleted file mode 100644
index fd87804..0000000
--- a/src/runtime/defs_netbsd_386.h
+++ /dev/null
@@ -1,182 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_netbsd.go defs_netbsd_386.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x6,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= 0x0,
-	EVFILT_WRITE	= 0x1,
-};
-
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct Siginfo Siginfo;
-typedef struct StackT StackT;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct McontextT McontextT;
-typedef struct UcontextT UcontextT;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct SigaltstackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct Siginfo {
-	int32	_signo;
-	int32	_code;
-	int32	_errno;
-	byte	_reason[20];
-};
-
-struct StackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int32	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int32	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct McontextT {
-	int32	__gregs[19];
-	byte	__fpregs[644];
-	int32	_mc_tlsbase;
-};
-struct UcontextT {
-	uint32	uc_flags;
-	UcontextT	*uc_link;
-	Sigset	uc_sigmask;
-	StackT	uc_stack;
-	McontextT	uc_mcontext;
-	int32	__uc_pad[4];
-};
-
-struct KeventT {
-	uint32	ident;
-	uint32	filter;
-	uint32	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_netbsd.go defs_netbsd_386.go
-
-
-enum {
-	REG_GS		= 0x0,
-	REG_FS		= 0x1,
-	REG_ES		= 0x2,
-	REG_DS		= 0x3,
-	REG_EDI		= 0x4,
-	REG_ESI		= 0x5,
-	REG_EBP		= 0x6,
-	REG_ESP		= 0x7,
-	REG_EBX		= 0x8,
-	REG_EDX		= 0x9,
-	REG_ECX		= 0xa,
-	REG_EAX		= 0xb,
-	REG_TRAPNO	= 0xc,
-	REG_ERR		= 0xd,
-	REG_EIP		= 0xe,
-	REG_CS		= 0xf,
-	REG_EFL		= 0x10,
-	REG_UESP	= 0x11,
-	REG_SS		= 0x12,
-};
-
diff --git a/src/runtime/defs_netbsd_amd64.h b/src/runtime/defs_netbsd_amd64.h
deleted file mode 100644
index dac94b1..0000000
--- a/src/runtime/defs_netbsd_amd64.h
+++ /dev/null
@@ -1,194 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x6,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= 0x0,
-	EVFILT_WRITE	= 0x1,
-};
-
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct Siginfo Siginfo;
-typedef struct StackT StackT;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct McontextT McontextT;
-typedef struct UcontextT UcontextT;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct SigaltstackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct Siginfo {
-	int32	_signo;
-	int32	_code;
-	int32	_errno;
-	int32	_pad;
-	byte	_reason[24];
-};
-
-struct StackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int32	tv_usec;
-	byte	Pad_cgo_0[4];
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct McontextT {
-	uint64	__gregs[26];
-	uint64	_mc_tlsbase;
-	int8	__fpregs[512];
-};
-struct UcontextT {
-	uint32	uc_flags;
-	byte	Pad_cgo_0[4];
-	UcontextT	*uc_link;
-	Sigset	uc_sigmask;
-	StackT	uc_stack;
-	McontextT	uc_mcontext;
-};
-
-struct KeventT {
-	uint64	ident;
-	uint32	filter;
-	uint32	flags;
-	uint32	fflags;
-	byte	Pad_cgo_0[4];
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_netbsd.go defs_netbsd_amd64.go
-
-
-enum {
-	REG_RDI		= 0x0,
-	REG_RSI		= 0x1,
-	REG_RDX		= 0x2,
-	REG_RCX		= 0x3,
-	REG_R8		= 0x4,
-	REG_R9		= 0x5,
-	REG_R10		= 0x6,
-	REG_R11		= 0x7,
-	REG_R12		= 0x8,
-	REG_R13		= 0x9,
-	REG_R14		= 0xa,
-	REG_R15		= 0xb,
-	REG_RBP		= 0xc,
-	REG_RBX		= 0xd,
-	REG_RAX		= 0xe,
-	REG_GS		= 0xf,
-	REG_FS		= 0x10,
-	REG_ES		= 0x11,
-	REG_DS		= 0x12,
-	REG_TRAPNO	= 0x13,
-	REG_ERR		= 0x14,
-	REG_RIP		= 0x15,
-	REG_CS		= 0x16,
-	REG_RFLAGS	= 0x17,
-	REG_RSP		= 0x18,
-	REG_SS		= 0x19,
-};
-
diff --git a/src/runtime/defs_netbsd_arm.h b/src/runtime/defs_netbsd_arm.h
deleted file mode 100644
index 70f34af..0000000
--- a/src/runtime/defs_netbsd_arm.h
+++ /dev/null
@@ -1,184 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_netbsd.go defs_netbsd_arm.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x6,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_RECEIPT	= 0,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= 0x0,
-	EVFILT_WRITE	= 0x1,
-};
-
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct Siginfo Siginfo;
-typedef struct StackT StackT;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct McontextT McontextT;
-typedef struct UcontextT UcontextT;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct SigaltstackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-struct Sigset {
-	uint32	__bits[4];
-};
-struct Siginfo {
-	int32	_signo;
-	int32	_code;
-	int32	_errno;
-	byte	_reason[20];
-};
-
-struct StackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int32	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int32	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct McontextT {
-	uint32	__gregs[17];
-#ifdef __ARM_EABI__
-	byte	__fpu[4+8*32+4];
-#else
-	byte	__fpu[4+4*33+4];
-#endif
-	uint32	_mc_tlsbase;
-};
-struct UcontextT {
-	uint32	uc_flags;
-	UcontextT	*uc_link;
-	Sigset	uc_sigmask;
-	StackT	uc_stack;
-	McontextT	uc_mcontext;
-	int32	__uc_pad[2];
-};
-
-struct KeventT {
-	uint32	ident;
-	uint32	filter;
-	uint32	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_netbsd.go defs_netbsd_arm.go
-
-
-enum {
-	REG_R0		= 0x0,
-	REG_R1		= 0x1,
-	REG_R2		= 0x2,
-	REG_R3		= 0x3,
-	REG_R4		= 0x4,
-	REG_R5		= 0x5,
-	REG_R6		= 0x6,
-	REG_R7		= 0x7,
-	REG_R8		= 0x8,
-	REG_R9		= 0x9,
-	REG_R10		= 0xa,
-	REG_R11		= 0xb,
-	REG_R12		= 0xc,
-	REG_R13		= 0xd,
-	REG_R14		= 0xe,
-	REG_R15		= 0xf,
-	REG_CPSR	= 0x10,
-};
-
diff --git a/src/runtime/defs_openbsd_386.go b/src/runtime/defs_openbsd_386.go
new file mode 100644
index 0000000..d7cdbd2
--- /dev/null
+++ b/src/runtime/defs_openbsd_386.go
@@ -0,0 +1,170 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_openbsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x6
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type tforkt struct {
+	tf_tcb   unsafe.Pointer
+	tf_tid   *int32
+	tf_stack uintptr
+}
+
+type sigaltstackt struct {
+	ss_sp    uintptr
+	ss_size  uintptr
+	ss_flags int32
+}
+
+type sigcontext struct {
+	sc_gs       uint32
+	sc_fs       uint32
+	sc_es       uint32
+	sc_ds       uint32
+	sc_edi      uint32
+	sc_esi      uint32
+	sc_ebp      uint32
+	sc_ebx      uint32
+	sc_edx      uint32
+	sc_ecx      uint32
+	sc_eax      uint32
+	sc_eip      uint32
+	sc_cs       uint32
+	sc_eflags   uint32
+	sc_esp      uint32
+	sc_ss       uint32
+	__sc_unused uint32
+	sc_mask     uint32
+	sc_trapno   uint32
+	sc_err      uint32
+	sc_fpstate  unsafe.Pointer
+}
+
+type siginfo struct {
+	si_signo int32
+	si_code  int32
+	si_errno int32
+	_data    [116]byte
+}
+
+type stackt struct {
+	ss_sp    uintptr
+	ss_size  uintptr
+	ss_flags int32
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int32
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = int64(x)
+}
+
+func (ts *timespec) set_nsec(x int32) {
+	ts.tv_nsec = x
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int32
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = x
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint32
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int64
+	udata  *byte
+}
diff --git a/src/runtime/defs_openbsd_386.h b/src/runtime/defs_openbsd_386.h
deleted file mode 100644
index 6b77e00..0000000
--- a/src/runtime/defs_openbsd_386.h
+++ /dev/null
@@ -1,168 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_openbsd.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x6,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct TforkT TforkT;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigcontext Sigcontext;
-typedef struct Siginfo Siginfo;
-typedef struct StackT StackT;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct TforkT {
-	byte	*tf_tcb;
-	int32	*tf_tid;
-	byte	*tf_stack;
-};
-
-struct SigaltstackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-struct Sigcontext {
-	int32	sc_gs;
-	int32	sc_fs;
-	int32	sc_es;
-	int32	sc_ds;
-	int32	sc_edi;
-	int32	sc_esi;
-	int32	sc_ebp;
-	int32	sc_ebx;
-	int32	sc_edx;
-	int32	sc_ecx;
-	int32	sc_eax;
-	int32	sc_eip;
-	int32	sc_cs;
-	int32	sc_eflags;
-	int32	sc_esp;
-	int32	sc_ss;
-	int32	__sc_unused;
-	int32	sc_mask;
-	int32	sc_trapno;
-	int32	sc_err;
-	void	*sc_fpstate;
-};
-struct Siginfo {
-	int32	si_signo;
-	int32	si_code;
-	int32	si_errno;
-	byte	_data[116];
-};
-typedef	uint32	Sigset;
-typedef	byte	Sigval[4];
-
-struct StackT {
-	byte	*ss_sp;
-	uint32	ss_size;
-	int32	ss_flags;
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int32	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int32	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint32	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_openbsd_amd64.go b/src/runtime/defs_openbsd_amd64.go
new file mode 100644
index 0000000..122f46c
--- /dev/null
+++ b/src/runtime/defs_openbsd_amd64.go
@@ -0,0 +1,181 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_openbsd.go
+
+package runtime
+
+import "unsafe"
+
+const (
+	_EINTR  = 0x4
+	_EFAULT = 0xe
+
+	_PROT_NONE  = 0x0
+	_PROT_READ  = 0x1
+	_PROT_WRITE = 0x2
+	_PROT_EXEC  = 0x4
+
+	_MAP_ANON    = 0x1000
+	_MAP_PRIVATE = 0x2
+	_MAP_FIXED   = 0x10
+
+	_MADV_FREE = 0x6
+
+	_SA_SIGINFO = 0x40
+	_SA_RESTART = 0x2
+	_SA_ONSTACK = 0x1
+
+	_SIGHUP    = 0x1
+	_SIGINT    = 0x2
+	_SIGQUIT   = 0x3
+	_SIGILL    = 0x4
+	_SIGTRAP   = 0x5
+	_SIGABRT   = 0x6
+	_SIGEMT    = 0x7
+	_SIGFPE    = 0x8
+	_SIGKILL   = 0x9
+	_SIGBUS    = 0xa
+	_SIGSEGV   = 0xb
+	_SIGSYS    = 0xc
+	_SIGPIPE   = 0xd
+	_SIGALRM   = 0xe
+	_SIGTERM   = 0xf
+	_SIGURG    = 0x10
+	_SIGSTOP   = 0x11
+	_SIGTSTP   = 0x12
+	_SIGCONT   = 0x13
+	_SIGCHLD   = 0x14
+	_SIGTTIN   = 0x15
+	_SIGTTOU   = 0x16
+	_SIGIO     = 0x17
+	_SIGXCPU   = 0x18
+	_SIGXFSZ   = 0x19
+	_SIGVTALRM = 0x1a
+	_SIGPROF   = 0x1b
+	_SIGWINCH  = 0x1c
+	_SIGINFO   = 0x1d
+	_SIGUSR1   = 0x1e
+	_SIGUSR2   = 0x1f
+
+	_FPE_INTDIV = 0x1
+	_FPE_INTOVF = 0x2
+	_FPE_FLTDIV = 0x3
+	_FPE_FLTOVF = 0x4
+	_FPE_FLTUND = 0x5
+	_FPE_FLTRES = 0x6
+	_FPE_FLTINV = 0x7
+	_FPE_FLTSUB = 0x8
+
+	_BUS_ADRALN = 0x1
+	_BUS_ADRERR = 0x2
+	_BUS_OBJERR = 0x3
+
+	_SEGV_MAPERR = 0x1
+	_SEGV_ACCERR = 0x2
+
+	_ITIMER_REAL    = 0x0
+	_ITIMER_VIRTUAL = 0x1
+	_ITIMER_PROF    = 0x2
+
+	_EV_ADD       = 0x1
+	_EV_DELETE    = 0x2
+	_EV_CLEAR     = 0x20
+	_EV_ERROR     = 0x4000
+	_EVFILT_READ  = -0x1
+	_EVFILT_WRITE = -0x2
+)
+
+type tforkt struct {
+	tf_tcb   unsafe.Pointer
+	tf_tid   *int32
+	tf_stack uintptr
+}
+
+type sigaltstackt struct {
+	ss_sp     uintptr
+	ss_size   uintptr
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type sigcontext struct {
+	sc_rdi      uint64
+	sc_rsi      uint64
+	sc_rdx      uint64
+	sc_rcx      uint64
+	sc_r8       uint64
+	sc_r9       uint64
+	sc_r10      uint64
+	sc_r11      uint64
+	sc_r12      uint64
+	sc_r13      uint64
+	sc_r14      uint64
+	sc_r15      uint64
+	sc_rbp      uint64
+	sc_rbx      uint64
+	sc_rax      uint64
+	sc_gs       uint64
+	sc_fs       uint64
+	sc_es       uint64
+	sc_ds       uint64
+	sc_trapno   uint64
+	sc_err      uint64
+	sc_rip      uint64
+	sc_cs       uint64
+	sc_rflags   uint64
+	sc_rsp      uint64
+	sc_ss       uint64
+	sc_fpstate  unsafe.Pointer
+	__sc_unused int32
+	sc_mask     int32
+}
+
+type siginfo struct {
+	si_signo  int32
+	si_code   int32
+	si_errno  int32
+	pad_cgo_0 [4]byte
+	_data     [120]byte
+}
+
+type stackt struct {
+	ss_sp     uintptr
+	ss_size   uintptr
+	ss_flags  int32
+	pad_cgo_0 [4]byte
+}
+
+type timespec struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+func (ts *timespec) set_sec(x int32) {
+	ts.tv_sec = int64(x)
+}
+
+func (ts *timespec) set_nsec(x int32) {
+	ts.tv_nsec = int64(x)
+}
+
+type timeval struct {
+	tv_sec  int64
+	tv_usec int64
+}
+
+func (tv *timeval) set_usec(x int32) {
+	tv.tv_usec = int64(x)
+}
+
+type itimerval struct {
+	it_interval timeval
+	it_value    timeval
+}
+
+type keventt struct {
+	ident  uint64
+	filter int16
+	flags  uint16
+	fflags uint32
+	data   int64
+	udata  *byte
+}
diff --git a/src/runtime/defs_openbsd_amd64.h b/src/runtime/defs_openbsd_amd64.h
deleted file mode 100644
index 761e8e4..0000000
--- a/src/runtime/defs_openbsd_amd64.h
+++ /dev/null
@@ -1,179 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_openbsd.go
-
-
-enum {
-	EINTR	= 0x4,
-	EFAULT	= 0xe,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x1000,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x6,
-
-	SA_SIGINFO	= 0x40,
-	SA_RESTART	= 0x2,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x10,
-	SIGSTOP		= 0x11,
-	SIGTSTP		= 0x12,
-	SIGCONT		= 0x13,
-	SIGCHLD		= 0x14,
-	SIGTTIN		= 0x15,
-	SIGTTOU		= 0x16,
-	SIGIO		= 0x17,
-	SIGXCPU		= 0x18,
-	SIGXFSZ		= 0x19,
-	SIGVTALRM	= 0x1a,
-	SIGPROF		= 0x1b,
-	SIGWINCH	= 0x1c,
-	SIGINFO		= 0x1d,
-	SIGUSR1		= 0x1e,
-	SIGUSR2		= 0x1f,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	EV_ADD		= 0x1,
-	EV_DELETE	= 0x2,
-	EV_CLEAR	= 0x20,
-	EV_ERROR	= 0x4000,
-	EVFILT_READ	= -0x1,
-	EVFILT_WRITE	= -0x2,
-};
-
-typedef struct TforkT TforkT;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigcontext Sigcontext;
-typedef struct Siginfo Siginfo;
-typedef struct StackT StackT;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct KeventT KeventT;
-
-#pragma pack on
-
-struct TforkT {
-	byte	*tf_tcb;
-	int32	*tf_tid;
-	byte	*tf_stack;
-};
-
-struct SigaltstackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-struct Sigcontext {
-	int64	sc_rdi;
-	int64	sc_rsi;
-	int64	sc_rdx;
-	int64	sc_rcx;
-	int64	sc_r8;
-	int64	sc_r9;
-	int64	sc_r10;
-	int64	sc_r11;
-	int64	sc_r12;
-	int64	sc_r13;
-	int64	sc_r14;
-	int64	sc_r15;
-	int64	sc_rbp;
-	int64	sc_rbx;
-	int64	sc_rax;
-	int64	sc_gs;
-	int64	sc_fs;
-	int64	sc_es;
-	int64	sc_ds;
-	int64	sc_trapno;
-	int64	sc_err;
-	int64	sc_rip;
-	int64	sc_cs;
-	int64	sc_rflags;
-	int64	sc_rsp;
-	int64	sc_ss;
-	void	*sc_fpstate;
-	int32	__sc_unused;
-	int32	sc_mask;
-};
-struct Siginfo {
-	int32	si_signo;
-	int32	si_code;
-	int32	si_errno;
-	byte	Pad_cgo_0[4];
-	byte	_data[120];
-};
-typedef	uint32	Sigset;
-typedef	byte	Sigval[8];
-
-struct StackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int64	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct KeventT {
-	uint64	ident;
-	int16	filter;
-	uint16	flags;
-	uint32	fflags;
-	int64	data;
-	byte	*udata;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_plan9_386.go b/src/runtime/defs_plan9_386.go
new file mode 100644
index 0000000..170506b
--- /dev/null
+++ b/src/runtime/defs_plan9_386.go
@@ -0,0 +1,23 @@
+package runtime
+
+type ureg struct {
+	di    uint32 /* general registers */
+	si    uint32 /* ... */
+	bp    uint32 /* ... */
+	nsp   uint32
+	bx    uint32 /* ... */
+	dx    uint32 /* ... */
+	cx    uint32 /* ... */
+	ax    uint32 /* ... */
+	gs    uint32 /* data segments */
+	fs    uint32 /* ... */
+	es    uint32 /* ... */
+	ds    uint32 /* ... */
+	trap  uint32 /* trap _type */
+	ecode uint32 /* error code (or zero) */
+	pc    uint32 /* pc */
+	cs    uint32 /* old context */
+	flags uint32 /* old flags */
+	sp    uint32
+	ss    uint32 /* old stack segment */
+}
diff --git a/src/runtime/defs_plan9_386.h b/src/runtime/defs_plan9_386.h
deleted file mode 100644
index a762b85..0000000
--- a/src/runtime/defs_plan9_386.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#define PAGESIZE 0x1000
-
-typedef struct Ureg Ureg;
-
-struct Ureg
-{
-	uint32	di;		/* general registers */
-	uint32	si;		/* ... */
-	uint32	bp;		/* ... */
-	uint32	nsp;
-	uint32	bx;		/* ... */
-	uint32	dx;		/* ... */
-	uint32	cx;		/* ... */
-	uint32	ax;		/* ... */
-	uint32	gs;		/* data segments */
-	uint32	fs;		/* ... */
-	uint32	es;		/* ... */
-	uint32	ds;		/* ... */
-	uint32	trap;		/* trap type */
-	uint32	ecode;		/* error code (or zero) */
-	uint32	pc;		/* pc */
-	uint32	cs;		/* old context */
-	uint32	flags;		/* old flags */
-	uint32	sp;
-	uint32	ss;		/* old stack segment */
-};
diff --git a/src/runtime/defs_plan9_amd64.go b/src/runtime/defs_plan9_amd64.go
new file mode 100644
index 0000000..17becfb
--- /dev/null
+++ b/src/runtime/defs_plan9_amd64.go
@@ -0,0 +1,32 @@
+package runtime
+
+type ureg struct {
+	ax  uint64
+	bx  uint64
+	cx  uint64
+	dx  uint64
+	si  uint64
+	di  uint64
+	bp  uint64
+	r8  uint64
+	r9  uint64
+	r10 uint64
+	r11 uint64
+	r12 uint64
+	r13 uint64
+	r14 uint64
+	r15 uint64
+
+	ds uint16
+	es uint16
+	fs uint16
+	gs uint16
+
+	_type uint64
+	error uint64 /* error code (or zero) */
+	ip    uint64 /* pc */
+	cs    uint64 /* old context */
+	flags uint64 /* old flags */
+	sp    uint64 /* sp */
+	ss    uint64 /* old stack segment */
+}
diff --git a/src/runtime/defs_plan9_amd64.h b/src/runtime/defs_plan9_amd64.h
deleted file mode 100644
index 20bca47..0000000
--- a/src/runtime/defs_plan9_amd64.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#define PAGESIZE 0x1000
-
-typedef struct Ureg Ureg;
-
-struct Ureg {
-	uint64	ax;
-	uint64	bx;
-	uint64	cx;
-	uint64	dx;
-	uint64	si;
-	uint64	di;
-	uint64	bp;
-	uint64	r8;
-	uint64	r9;
-	uint64	r10;
-	uint64	r11;
-	uint64	r12;
-	uint64	r13;
-	uint64	r14;
-	uint64	r15;
-
-	uint16	ds;
-	uint16	es;
-	uint16	fs;
-	uint16	gs;
-
-	uint64	type;
-	uint64	error;				/* error code (or zero) */
-	uint64	ip;				/* pc */
-	uint64	cs;				/* old context */
-	uint64	flags;				/* old flags */
-	uint64	sp;				/* sp */
-	uint64	ss;				/* old stack segment */
-};
diff --git a/src/runtime/defs_solaris_amd64.h b/src/runtime/defs_solaris_amd64.h
deleted file mode 100644
index cb1cfea..0000000
--- a/src/runtime/defs_solaris_amd64.h
+++ /dev/null
@@ -1,254 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_solaris.go defs_solaris_amd64.go
-
-
-enum {
-	EINTR		= 0x4,
-	EBADF		= 0x9,
-	EFAULT		= 0xe,
-	EAGAIN		= 0xb,
-	ETIMEDOUT	= 0x91,
-	EWOULDBLOCK	= 0xb,
-	EINPROGRESS	= 0x96,
-
-	PROT_NONE	= 0x0,
-	PROT_READ	= 0x1,
-	PROT_WRITE	= 0x2,
-	PROT_EXEC	= 0x4,
-
-	MAP_ANON	= 0x100,
-	MAP_PRIVATE	= 0x2,
-	MAP_FIXED	= 0x10,
-
-	MADV_FREE	= 0x5,
-
-	SA_SIGINFO	= 0x8,
-	SA_RESTART	= 0x4,
-	SA_ONSTACK	= 0x1,
-
-	SIGHUP		= 0x1,
-	SIGINT		= 0x2,
-	SIGQUIT		= 0x3,
-	SIGILL		= 0x4,
-	SIGTRAP		= 0x5,
-	SIGABRT		= 0x6,
-	SIGEMT		= 0x7,
-	SIGFPE		= 0x8,
-	SIGKILL		= 0x9,
-	SIGBUS		= 0xa,
-	SIGSEGV		= 0xb,
-	SIGSYS		= 0xc,
-	SIGPIPE		= 0xd,
-	SIGALRM		= 0xe,
-	SIGTERM		= 0xf,
-	SIGURG		= 0x15,
-	SIGSTOP		= 0x17,
-	SIGTSTP		= 0x18,
-	SIGCONT		= 0x19,
-	SIGCHLD		= 0x12,
-	SIGTTIN		= 0x1a,
-	SIGTTOU		= 0x1b,
-	SIGIO		= 0x16,
-	SIGXCPU		= 0x1e,
-	SIGXFSZ		= 0x1f,
-	SIGVTALRM	= 0x1c,
-	SIGPROF		= 0x1d,
-	SIGWINCH	= 0x14,
-	SIGUSR1		= 0x10,
-	SIGUSR2		= 0x11,
-
-	FPE_INTDIV	= 0x1,
-	FPE_INTOVF	= 0x2,
-	FPE_FLTDIV	= 0x3,
-	FPE_FLTOVF	= 0x4,
-	FPE_FLTUND	= 0x5,
-	FPE_FLTRES	= 0x6,
-	FPE_FLTINV	= 0x7,
-	FPE_FLTSUB	= 0x8,
-
-	BUS_ADRALN	= 0x1,
-	BUS_ADRERR	= 0x2,
-	BUS_OBJERR	= 0x3,
-
-	SEGV_MAPERR	= 0x1,
-	SEGV_ACCERR	= 0x2,
-
-	ITIMER_REAL	= 0x0,
-	ITIMER_VIRTUAL	= 0x1,
-	ITIMER_PROF	= 0x2,
-
-	_SC_NPROCESSORS_ONLN	= 0xf,
-
-	PTHREAD_CREATE_DETACHED	= 0x40,
-
-	FORK_NOSIGCHLD	= 0x1,
-	FORK_WAITPID	= 0x2,
-
-	MAXHOSTNAMELEN	= 0x100,
-
-	O_NONBLOCK	= 0x80,
-	FD_CLOEXEC	= 0x1,
-	F_GETFL		= 0x3,
-	F_SETFL		= 0x4,
-	F_SETFD		= 0x2,
-
-	POLLIN	= 0x1,
-	POLLOUT	= 0x4,
-	POLLHUP	= 0x10,
-	POLLERR	= 0x8,
-
-	PORT_SOURCE_FD	= 0x4,
-};
-
-typedef struct SemT SemT;
-typedef struct SigaltstackT SigaltstackT;
-typedef struct Sigset Sigset;
-typedef struct StackT StackT;
-typedef struct Siginfo Siginfo;
-typedef struct SigactionT SigactionT;
-typedef struct Fpregset Fpregset;
-typedef struct Mcontext Mcontext;
-typedef struct Ucontext Ucontext;
-typedef struct Timespec Timespec;
-typedef struct Timeval Timeval;
-typedef struct Itimerval Itimerval;
-typedef struct PortEvent PortEvent;
-typedef struct PthreadAttr PthreadAttr;
-typedef struct Stat Stat;
-
-#pragma pack on
-
-struct SemT {
-	uint32	sem_count;
-	uint16	sem_type;
-	uint16	sem_magic;
-	uint64	sem_pad1[3];
-	uint64	sem_pad2[2];
-};
-
-struct SigaltstackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-struct Sigset {
-	uint32	__sigbits[4];
-};
-struct StackT {
-	byte	*ss_sp;
-	uint64	ss_size;
-	int32	ss_flags;
-	byte	Pad_cgo_0[4];
-};
-
-struct Siginfo {
-	int32	si_signo;
-	int32	si_code;
-	int32	si_errno;
-	int32	si_pad;
-	byte	__data[240];
-};
-struct SigactionT {
-	int32	sa_flags;
-	byte	Pad_cgo_0[4];
-	byte	_funcptr[8];
-	Sigset	sa_mask;
-};
-
-struct Fpregset {
-	byte	fp_reg_set[528];
-};
-struct Mcontext {
-	int64	gregs[28];
-	Fpregset	fpregs;
-};
-struct Ucontext {
-	uint64	uc_flags;
-	Ucontext	*uc_link;
-	Sigset	uc_sigmask;
-	StackT	uc_stack;
-	byte	Pad_cgo_0[8];
-	Mcontext	uc_mcontext;
-	int64	uc_filler[5];
-	byte	Pad_cgo_1[8];
-};
-
-struct Timespec {
-	int64	tv_sec;
-	int64	tv_nsec;
-};
-struct Timeval {
-	int64	tv_sec;
-	int64	tv_usec;
-};
-struct Itimerval {
-	Timeval	it_interval;
-	Timeval	it_value;
-};
-
-struct PortEvent {
-	int32	portev_events;
-	uint16	portev_source;
-	uint16	portev_pad;
-	uint64	portev_object;
-	byte	*portev_user;
-};
-typedef	uint32	Pthread;
-struct PthreadAttr {
-	byte	*__pthread_attrp;
-};
-
-struct Stat {
-	uint64	st_dev;
-	uint64	st_ino;
-	uint32	st_mode;
-	uint32	st_nlink;
-	uint32	st_uid;
-	uint32	st_gid;
-	uint64	st_rdev;
-	int64	st_size;
-	Timespec	st_atim;
-	Timespec	st_mtim;
-	Timespec	st_ctim;
-	int32	st_blksize;
-	byte	Pad_cgo_0[4];
-	int64	st_blocks;
-	int8	st_fstype[16];
-};
-
-
-#pragma pack off
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_solaris.go defs_solaris_amd64.go
-
-
-enum {
-	REG_RDI		= 0x8,
-	REG_RSI		= 0x9,
-	REG_RDX		= 0xc,
-	REG_RCX		= 0xd,
-	REG_R8		= 0x7,
-	REG_R9		= 0x6,
-	REG_R10		= 0x5,
-	REG_R11		= 0x4,
-	REG_R12		= 0x3,
-	REG_R13		= 0x2,
-	REG_R14		= 0x1,
-	REG_R15		= 0x0,
-	REG_RBP		= 0xa,
-	REG_RBX		= 0xb,
-	REG_RAX		= 0xe,
-	REG_GS		= 0x17,
-	REG_FS		= 0x16,
-	REG_ES		= 0x18,
-	REG_DS		= 0x19,
-	REG_TRAPNO	= 0xf,
-	REG_ERR		= 0x10,
-	REG_RIP		= 0x11,
-	REG_CS		= 0x12,
-	REG_RFLAGS	= 0x13,
-	REG_RSP		= 0x14,
-	REG_SS		= 0x15,
-};
-
diff --git a/src/runtime/defs_windows_386.go b/src/runtime/defs_windows_386.go
new file mode 100644
index 0000000..abec2d8
--- /dev/null
+++ b/src/runtime/defs_windows_386.go
@@ -0,0 +1,109 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_windows.go
+
+package runtime
+
+const (
+	_PROT_NONE  = 0
+	_PROT_READ  = 1
+	_PROT_WRITE = 2
+	_PROT_EXEC  = 4
+
+	_MAP_ANON    = 1
+	_MAP_PRIVATE = 2
+
+	_DUPLICATE_SAME_ACCESS   = 0x2
+	_THREAD_PRIORITY_HIGHEST = 0x2
+
+	_SIGINT           = 0x2
+	_CTRL_C_EVENT     = 0x0
+	_CTRL_BREAK_EVENT = 0x1
+
+	_CONTEXT_CONTROL = 0x10001
+	_CONTEXT_FULL    = 0x10007
+
+	_EXCEPTION_ACCESS_VIOLATION     = 0xc0000005
+	_EXCEPTION_BREAKPOINT           = 0x80000003
+	_EXCEPTION_FLT_DENORMAL_OPERAND = 0xc000008d
+	_EXCEPTION_FLT_DIVIDE_BY_ZERO   = 0xc000008e
+	_EXCEPTION_FLT_INEXACT_RESULT   = 0xc000008f
+	_EXCEPTION_FLT_OVERFLOW         = 0xc0000091
+	_EXCEPTION_FLT_UNDERFLOW        = 0xc0000093
+	_EXCEPTION_INT_DIVIDE_BY_ZERO   = 0xc0000094
+	_EXCEPTION_INT_OVERFLOW         = 0xc0000095
+
+	_INFINITE     = 0xffffffff
+	_WAIT_TIMEOUT = 0x102
+
+	_EXCEPTION_CONTINUE_EXECUTION = -0x1
+	_EXCEPTION_CONTINUE_SEARCH    = 0x0
+)
+
+type systeminfo struct {
+	anon0                       [4]byte
+	dwpagesize                  uint32
+	lpminimumapplicationaddress *byte
+	lpmaximumapplicationaddress *byte
+	dwactiveprocessormask       uint32
+	dwnumberofprocessors        uint32
+	dwprocessortype             uint32
+	dwallocationgranularity     uint32
+	wprocessorlevel             uint16
+	wprocessorrevision          uint16
+}
+
+type exceptionrecord struct {
+	exceptioncode        uint32
+	exceptionflags       uint32
+	exceptionrecord      *exceptionrecord
+	exceptionaddress     *byte
+	numberparameters     uint32
+	exceptioninformation [15]uint32
+}
+
+type floatingsavearea struct {
+	controlword   uint32
+	statusword    uint32
+	tagword       uint32
+	erroroffset   uint32
+	errorselector uint32
+	dataoffset    uint32
+	dataselector  uint32
+	registerarea  [80]uint8
+	cr0npxstate   uint32
+}
+
+type context struct {
+	contextflags      uint32
+	dr0               uint32
+	dr1               uint32
+	dr2               uint32
+	dr3               uint32
+	dr6               uint32
+	dr7               uint32
+	floatsave         floatingsavearea
+	seggs             uint32
+	segfs             uint32
+	seges             uint32
+	segds             uint32
+	edi               uint32
+	esi               uint32
+	ebx               uint32
+	edx               uint32
+	ecx               uint32
+	eax               uint32
+	ebp               uint32
+	eip               uint32
+	segcs             uint32
+	eflags            uint32
+	esp               uint32
+	segss             uint32
+	extendedregisters [512]uint8
+}
+
+type overlapped struct {
+	internal     uint32
+	internalhigh uint32
+	anon0        [8]byte
+	hevent       *byte
+}
diff --git a/src/runtime/defs_windows_386.h b/src/runtime/defs_windows_386.h
deleted file mode 100644
index 2317c04..0000000
--- a/src/runtime/defs_windows_386.h
+++ /dev/null
@@ -1,116 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_windows.go
-
-
-enum {
-	PROT_NONE	= 0,
-	PROT_READ	= 1,
-	PROT_WRITE	= 2,
-	PROT_EXEC	= 4,
-
-	MAP_ANON	= 1,
-	MAP_PRIVATE	= 2,
-
-	DUPLICATE_SAME_ACCESS	= 0x2,
-	THREAD_PRIORITY_HIGHEST	= 0x2,
-
-	SIGINT			= 0x2,
-	CTRL_C_EVENT		= 0x0,
-	CTRL_BREAK_EVENT	= 0x1,
-
-	CONTEXT_CONTROL	= 0x10001,
-	CONTEXT_FULL	= 0x10007,
-
-	EXCEPTION_ACCESS_VIOLATION	= 0xc0000005,
-	EXCEPTION_BREAKPOINT		= 0x80000003,
-	EXCEPTION_FLT_DENORMAL_OPERAND	= 0xc000008d,
-	EXCEPTION_FLT_DIVIDE_BY_ZERO	= 0xc000008e,
-	EXCEPTION_FLT_INEXACT_RESULT	= 0xc000008f,
-	EXCEPTION_FLT_OVERFLOW		= 0xc0000091,
-	EXCEPTION_FLT_UNDERFLOW		= 0xc0000093,
-	EXCEPTION_INT_DIVIDE_BY_ZERO	= 0xc0000094,
-	EXCEPTION_INT_OVERFLOW		= 0xc0000095,
-
-	INFINITE	= 0xffffffff,
-	WAIT_TIMEOUT	= 0x102,
-
-	EXCEPTION_CONTINUE_EXECUTION	= -0x1,
-	EXCEPTION_CONTINUE_SEARCH	= 0x0,
-};
-
-typedef struct SystemInfo SystemInfo;
-typedef struct ExceptionRecord ExceptionRecord;
-typedef struct FloatingSaveArea FloatingSaveArea;
-typedef struct M128a M128a;
-typedef struct Context Context;
-typedef struct Overlapped Overlapped;
-
-#pragma pack on
-
-struct SystemInfo {
-	byte	anon0[4];
-	uint32	dwPageSize;
-	byte	*lpMinimumApplicationAddress;
-	byte	*lpMaximumApplicationAddress;
-	uint32	dwActiveProcessorMask;
-	uint32	dwNumberOfProcessors;
-	uint32	dwProcessorType;
-	uint32	dwAllocationGranularity;
-	uint16	wProcessorLevel;
-	uint16	wProcessorRevision;
-};
-struct ExceptionRecord {
-	uint32	ExceptionCode;
-	uint32	ExceptionFlags;
-	ExceptionRecord	*ExceptionRecord;
-	byte	*ExceptionAddress;
-	uint32	NumberParameters;
-	uint32	ExceptionInformation[15];
-};
-struct FloatingSaveArea {
-	uint32	ControlWord;
-	uint32	StatusWord;
-	uint32	TagWord;
-	uint32	ErrorOffset;
-	uint32	ErrorSelector;
-	uint32	DataOffset;
-	uint32	DataSelector;
-	uint8	RegisterArea[80];
-	uint32	Cr0NpxState;
-};
-struct Context {
-	uint32	ContextFlags;
-	uint32	Dr0;
-	uint32	Dr1;
-	uint32	Dr2;
-	uint32	Dr3;
-	uint32	Dr6;
-	uint32	Dr7;
-	FloatingSaveArea	FloatSave;
-	uint32	SegGs;
-	uint32	SegFs;
-	uint32	SegEs;
-	uint32	SegDs;
-	uint32	Edi;
-	uint32	Esi;
-	uint32	Ebx;
-	uint32	Edx;
-	uint32	Ecx;
-	uint32	Eax;
-	uint32	Ebp;
-	uint32	Eip;
-	uint32	SegCs;
-	uint32	EFlags;
-	uint32	Esp;
-	uint32	SegSs;
-	uint8	ExtendedRegisters[512];
-};
-struct Overlapped {
-	uint32	Internal;
-	uint32	InternalHigh;
-	byte	anon0[8];
-	byte	*hEvent;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/defs_windows_amd64.go b/src/runtime/defs_windows_amd64.go
new file mode 100644
index 0000000..81b1359
--- /dev/null
+++ b/src/runtime/defs_windows_amd64.go
@@ -0,0 +1,124 @@
+// created by cgo -cdefs and then converted to Go
+// cgo -cdefs defs_windows.go
+
+package runtime
+
+const (
+	_PROT_NONE  = 0
+	_PROT_READ  = 1
+	_PROT_WRITE = 2
+	_PROT_EXEC  = 4
+
+	_MAP_ANON    = 1
+	_MAP_PRIVATE = 2
+
+	_DUPLICATE_SAME_ACCESS   = 0x2
+	_THREAD_PRIORITY_HIGHEST = 0x2
+
+	_SIGINT           = 0x2
+	_CTRL_C_EVENT     = 0x0
+	_CTRL_BREAK_EVENT = 0x1
+
+	_CONTEXT_CONTROL = 0x100001
+	_CONTEXT_FULL    = 0x10000b
+
+	_EXCEPTION_ACCESS_VIOLATION     = 0xc0000005
+	_EXCEPTION_BREAKPOINT           = 0x80000003
+	_EXCEPTION_FLT_DENORMAL_OPERAND = 0xc000008d
+	_EXCEPTION_FLT_DIVIDE_BY_ZERO   = 0xc000008e
+	_EXCEPTION_FLT_INEXACT_RESULT   = 0xc000008f
+	_EXCEPTION_FLT_OVERFLOW         = 0xc0000091
+	_EXCEPTION_FLT_UNDERFLOW        = 0xc0000093
+	_EXCEPTION_INT_DIVIDE_BY_ZERO   = 0xc0000094
+	_EXCEPTION_INT_OVERFLOW         = 0xc0000095
+
+	_INFINITE     = 0xffffffff
+	_WAIT_TIMEOUT = 0x102
+
+	_EXCEPTION_CONTINUE_EXECUTION = -0x1
+	_EXCEPTION_CONTINUE_SEARCH    = 0x0
+)
+
+type systeminfo struct {
+	anon0                       [4]byte
+	dwpagesize                  uint32
+	lpminimumapplicationaddress *byte
+	lpmaximumapplicationaddress *byte
+	dwactiveprocessormask       uint64
+	dwnumberofprocessors        uint32
+	dwprocessortype             uint32
+	dwallocationgranularity     uint32
+	wprocessorlevel             uint16
+	wprocessorrevision          uint16
+}
+
+type exceptionrecord struct {
+	exceptioncode        uint32
+	exceptionflags       uint32
+	exceptionrecord      *exceptionrecord
+	exceptionaddress     *byte
+	numberparameters     uint32
+	pad_cgo_0            [4]byte
+	exceptioninformation [15]uint64
+}
+
+type m128a struct {
+	low  uint64
+	high int64
+}
+
+type context struct {
+	p1home               uint64
+	p2home               uint64
+	p3home               uint64
+	p4home               uint64
+	p5home               uint64
+	p6home               uint64
+	contextflags         uint32
+	mxcsr                uint32
+	segcs                uint16
+	segds                uint16
+	seges                uint16
+	segfs                uint16
+	seggs                uint16
+	segss                uint16
+	eflags               uint32
+	dr0                  uint64
+	dr1                  uint64
+	dr2                  uint64
+	dr3                  uint64
+	dr6                  uint64
+	dr7                  uint64
+	rax                  uint64
+	rcx                  uint64
+	rdx                  uint64
+	rbx                  uint64
+	rsp                  uint64
+	rbp                  uint64
+	rsi                  uint64
+	rdi                  uint64
+	r8                   uint64
+	r9                   uint64
+	r10                  uint64
+	r11                  uint64
+	r12                  uint64
+	r13                  uint64
+	r14                  uint64
+	r15                  uint64
+	rip                  uint64
+	anon0                [512]byte
+	vectorregister       [26]m128a
+	vectorcontrol        uint64
+	debugcontrol         uint64
+	lastbranchtorip      uint64
+	lastbranchfromrip    uint64
+	lastexceptiontorip   uint64
+	lastexceptionfromrip uint64
+}
+
+type overlapped struct {
+	internal     uint64
+	internalhigh uint64
+	anon0        [8]byte
+	hevent       *byte
+}
diff --git a/src/runtime/defs_windows_amd64.h b/src/runtime/defs_windows_amd64.h
deleted file mode 100644
index 7f37a7a..0000000
--- a/src/runtime/defs_windows_amd64.h
+++ /dev/null
@@ -1,131 +0,0 @@
-// Created by cgo -cdefs - DO NOT EDIT
-// cgo -cdefs defs_windows.go
-
-
-enum {
-	PROT_NONE	= 0,
-	PROT_READ	= 1,
-	PROT_WRITE	= 2,
-	PROT_EXEC	= 4,
-
-	MAP_ANON	= 1,
-	MAP_PRIVATE	= 2,
-
-	DUPLICATE_SAME_ACCESS	= 0x2,
-	THREAD_PRIORITY_HIGHEST	= 0x2,
-
-	SIGINT			= 0x2,
-	CTRL_C_EVENT		= 0x0,
-	CTRL_BREAK_EVENT	= 0x1,
-
-	CONTEXT_CONTROL	= 0x100001,
-	CONTEXT_FULL	= 0x10000b,
-
-	EXCEPTION_ACCESS_VIOLATION	= 0xc0000005,
-	EXCEPTION_BREAKPOINT		= 0x80000003,
-	EXCEPTION_FLT_DENORMAL_OPERAND	= 0xc000008d,
-	EXCEPTION_FLT_DIVIDE_BY_ZERO	= 0xc000008e,
-	EXCEPTION_FLT_INEXACT_RESULT	= 0xc000008f,
-	EXCEPTION_FLT_OVERFLOW		= 0xc0000091,
-	EXCEPTION_FLT_UNDERFLOW		= 0xc0000093,
-	EXCEPTION_INT_DIVIDE_BY_ZERO	= 0xc0000094,
-	EXCEPTION_INT_OVERFLOW		= 0xc0000095,
-
-	INFINITE	= 0xffffffff,
-	WAIT_TIMEOUT	= 0x102,
-
-	EXCEPTION_CONTINUE_EXECUTION	= -0x1,
-	EXCEPTION_CONTINUE_SEARCH	= 0x0,
-};
-
-typedef struct SystemInfo SystemInfo;
-typedef struct ExceptionRecord ExceptionRecord;
-typedef struct FloatingSaveArea FloatingSaveArea;
-typedef struct M128a M128a;
-typedef struct Context Context;
-typedef struct Overlapped Overlapped;
-
-#pragma pack on
-
-struct SystemInfo {
-	byte	anon0[4];
-	uint32	dwPageSize;
-	byte	*lpMinimumApplicationAddress;
-	byte	*lpMaximumApplicationAddress;
-	uint64	dwActiveProcessorMask;
-	uint32	dwNumberOfProcessors;
-	uint32	dwProcessorType;
-	uint32	dwAllocationGranularity;
-	uint16	wProcessorLevel;
-	uint16	wProcessorRevision;
-};
-struct ExceptionRecord {
-	uint32	ExceptionCode;
-	uint32	ExceptionFlags;
-	ExceptionRecord	*ExceptionRecord;
-	byte	*ExceptionAddress;
-	uint32	NumberParameters;
-	byte	Pad_cgo_0[4];
-	uint64	ExceptionInformation[15];
-};
-struct M128a {
-	uint64	Low;
-	int64	High;
-};
-struct Context {
-	uint64	P1Home;
-	uint64	P2Home;
-	uint64	P3Home;
-	uint64	P4Home;
-	uint64	P5Home;
-	uint64	P6Home;
-	uint32	ContextFlags;
-	uint32	MxCsr;
-	uint16	SegCs;
-	uint16	SegDs;
-	uint16	SegEs;
-	uint16	SegFs;
-	uint16	SegGs;
-	uint16	SegSs;
-	uint32	EFlags;
-	uint64	Dr0;
-	uint64	Dr1;
-	uint64	Dr2;
-	uint64	Dr3;
-	uint64	Dr6;
-	uint64	Dr7;
-	uint64	Rax;
-	uint64	Rcx;
-	uint64	Rdx;
-	uint64	Rbx;
-	uint64	Rsp;
-	uint64	Rbp;
-	uint64	Rsi;
-	uint64	Rdi;
-	uint64	R8;
-	uint64	R9;
-	uint64	R10;
-	uint64	R11;
-	uint64	R12;
-	uint64	R13;
-	uint64	R14;
-	uint64	R15;
-	uint64	Rip;
-	byte	anon0[512];
-	M128a	VectorRegister[26];
-	uint64	VectorControl;
-	uint64	DebugControl;
-	uint64	LastBranchToRip;
-	uint64	LastBranchFromRip;
-	uint64	LastExceptionToRip;
-	uint64	LastExceptionFromRip;
-};
-struct Overlapped {
-	uint64	Internal;
-	uint64	InternalHigh;
-	byte	anon0[8];
-	byte	*hEvent;
-};
-
-
-#pragma pack off
diff --git a/src/runtime/env_posix.go b/src/runtime/env_posix.go
index dd57872..03c7a5a 100644
--- a/src/runtime/env_posix.go
+++ b/src/runtime/env_posix.go
@@ -8,8 +8,6 @@
 
 import "unsafe"
 
-func environ() []string
-
 func getenv(s *byte) *byte {
 	val := gogetenv(gostringnocopy(s))
 	if val == "" {
@@ -32,13 +30,13 @@
 	return ""
 }
 
-var _cgo_setenv uintptr   // pointer to C function
-var _cgo_unsetenv uintptr // pointer to C function
+var _cgo_setenv unsafe.Pointer   // pointer to C function
+var _cgo_unsetenv unsafe.Pointer // pointer to C function
 
 // Update the C environment if cgo is loaded.
 // Called from syscall.Setenv.
 func syscall_setenv_c(k string, v string) {
-	if _cgo_setenv == 0 {
+	if _cgo_setenv == nil {
 		return
 	}
 	arg := [2]unsafe.Pointer{cstring(k), cstring(v)}
@@ -48,7 +46,7 @@
 // Update the C environment if cgo is loaded.
 // Called from syscall.unsetenv.
 func syscall_unsetenv_c(k string) {
-	if _cgo_unsetenv == 0 {
+	if _cgo_unsetenv == nil {
 		return
 	}
 	arg := [1]unsafe.Pointer{cstring(k)}
diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go
index 65e918e..5ed2550 100644
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@@ -34,21 +34,11 @@
 func lfstackpop_m()
 
 func LFStackPush(head *uint64, node *LFNode) {
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(head)
-	mp.ptrarg[1] = unsafe.Pointer(node)
-	onM(lfstackpush_m)
-	releasem(mp)
+	lfstackpush(head, (*lfnode)(unsafe.Pointer(node)))
 }
 
 func LFStackPop(head *uint64) *LFNode {
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(head)
-	onM(lfstackpop_m)
-	node := (*LFNode)(unsafe.Pointer(mp.ptrarg[0]))
-	mp.ptrarg[0] = nil
-	releasem(mp)
-	return node
+	return (*LFNode)(unsafe.Pointer(lfstackpop(head)))
 }
 
 type ParFor struct {
@@ -68,69 +58,49 @@
 func parforiters_m()
 
 func NewParFor(nthrmax uint32) *ParFor {
-	mp := acquirem()
-	mp.scalararg[0] = uintptr(nthrmax)
-	onM(newparfor_m)
-	desc := (*ParFor)(mp.ptrarg[0])
-	mp.ptrarg[0] = nil
-	releasem(mp)
+	var desc *ParFor
+	systemstack(func() {
+		desc = (*ParFor)(unsafe.Pointer(parforalloc(nthrmax)))
+	})
 	return desc
 }
 
 func ParForSetup(desc *ParFor, nthr, n uint32, ctx *byte, wait bool, body func(*ParFor, uint32)) {
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(desc)
-	mp.ptrarg[1] = unsafe.Pointer(ctx)
-	mp.ptrarg[2] = unsafe.Pointer(funcPC(body)) // TODO(rsc): Should be a scalar.
-	mp.scalararg[0] = uintptr(nthr)
-	mp.scalararg[1] = uintptr(n)
-	mp.scalararg[2] = 0
-	if wait {
-		mp.scalararg[2] = 1
-	}
-	onM(parforsetup_m)
-	releasem(mp)
+	systemstack(func() {
+		parforsetup((*parfor)(unsafe.Pointer(desc)), nthr, n, unsafe.Pointer(ctx), wait,
+			*(*func(*parfor, uint32))(unsafe.Pointer(&body)))
+	})
 }
 
 func ParForDo(desc *ParFor) {
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(desc)
-	onM(parfordo_m)
-	releasem(mp)
+	systemstack(func() {
+		parfordo((*parfor)(unsafe.Pointer(desc)))
+	})
 }
 
 func ParForIters(desc *ParFor, tid uint32) (uint32, uint32) {
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(desc)
-	mp.scalararg[0] = uintptr(tid)
-	onM(parforiters_m)
-	begin := uint32(mp.scalararg[0])
-	end := uint32(mp.scalararg[1])
-	releasem(mp)
-	return begin, end
+	desc1 := (*parfor)(unsafe.Pointer(desc))
+	pos := desc_thr_index(desc1, tid).pos
+	return uint32(pos), uint32(pos >> 32)
 }
 
-// in mgc0.c
-//go:noescape
-func getgcmask(data unsafe.Pointer, typ *_type, array **byte, len *uint)
-
 func GCMask(x interface{}) (ret []byte) {
 	e := (*eface)(unsafe.Pointer(&x))
 	s := (*slice)(unsafe.Pointer(&ret))
-	onM(func() {
-		getgcmask(e.data, e._type, &s.array, &s.len)
+	systemstack(func() {
+		var len uintptr
+		getgcmask(e.data, e._type, &s.array, &len)
+		s.len = uint(len)
 		s.cap = s.len
 	})
 	return
 }
 
-func testSchedLocalQueue()
-func testSchedLocalQueueSteal()
 func RunSchedLocalQueueTest() {
-	onM(testSchedLocalQueue)
+	systemstack(testSchedLocalQueue)
 }
 func RunSchedLocalQueueStealTest() {
-	onM(testSchedLocalQueueSteal)
+	systemstack(testSchedLocalQueueSteal)
 }
 
 var HaveGoodHash = haveGoodHash
@@ -149,13 +119,9 @@
 	return _RuntimeGogoBytes
 }
 
-// in string.c
-//go:noescape
-func gostringw(w *uint16) string
-
 // entry point for testing
 func GostringW(w []uint16) (s string) {
-	onM(func() {
+	systemstack(func() {
 		s = gostringw(&w[0])
 	})
 	return
diff --git a/src/runtime/extern.go b/src/runtime/extern.go
index 6cc5df8..34fdeb2 100644
--- a/src/runtime/extern.go
+++ b/src/runtime/extern.go
@@ -112,7 +112,8 @@
 	if xpc > f.entry && (g == nil || g.entry != funcPC(sigpanic)) {
 		xpc--
 	}
-	line = int(funcline(f, xpc, &file))
+	file, line32 := funcline(f, xpc)
+	line = int(line32)
 	ok = true
 	return
 }
diff --git a/src/runtime/float.c b/src/runtime/float.c
deleted file mode 100644
index 42082e4..0000000
--- a/src/runtime/float.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-
-// used as float64 via runtime· names
-uint64	·nan		= 0x7FF8000000000001ULL;
-uint64	·posinf	= 0x7FF0000000000000ULL;
-uint64	·neginf	= 0xFFF0000000000000ULL;
diff --git a/src/runtime/funcdata.h b/src/runtime/funcdata.h
index d6c14fc..ce62dab 100644
--- a/src/runtime/funcdata.h
+++ b/src/runtime/funcdata.h
@@ -3,9 +3,10 @@
 // license that can be found in the LICENSE file.
 
 // This file defines the IDs for PCDATA and FUNCDATA instructions
-// in Go binaries. It is included by both C and assembly, so it must
-// be written using #defines. It is included by the runtime package
-// as well as the compilers.
+// in Go binaries. It is included by assembly sources, so it must
+// be written using #defines.
+//
+// The Go compiler also #includes this file, for now.
 //
 // symtab.go also contains a copy of these constants.
 
@@ -50,8 +51,7 @@
 
 /*c2go
 enum {
-	PCDATA_ArgSize = 0,
-	PCDATA_StackMapIndex = 1,
+	PCDATA_StackMapIndex = 0,
 	FUNCDATA_ArgsPointerMaps = 0,
 	FUNCDATA_LocalsPointerMaps = 1,
 	FUNCDATA_DeadValueMaps = 2,
diff --git a/src/runtime/futex_test.go b/src/runtime/futex_test.go
index f57fc52..b85249a 100644
--- a/src/runtime/futex_test.go
+++ b/src/runtime/futex_test.go
@@ -44,9 +44,9 @@
 	start := time.Now()
 	for _, tt := range futexsleepTests {
 		go func(tt futexsleepTest) {
-			runtime.Entersyscall()
+			runtime.Entersyscall(0)
 			runtime.Futexsleep(&tt.mtx, tt.mtx, tt.ns)
-			runtime.Exitsyscall()
+			runtime.Exitsyscall(0)
 			tt.ch <- tt
 		}(tt)
 	}
diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go
index 662b754..2b45c81 100644
--- a/src/runtime/gcinfo_test.go
+++ b/src/runtime/gcinfo_test.go
@@ -62,12 +62,10 @@
 func nonStackInfo(mask []byte) []byte {
 	// BitsDead is replaced with BitsScalar everywhere except stacks.
 	mask1 := make([]byte, len(mask))
-	mw := false
 	for i, v := range mask {
-		if !mw && v == BitsDead {
+		if v == BitsDead {
 			v = BitsScalar
 		}
-		mw = !mw && v == BitsMultiWord
 		mask1[i] = v
 	}
 	return mask1
@@ -84,7 +82,6 @@
 	BitsDead = iota
 	BitsScalar
 	BitsPointer
-	BitsMultiWord
 )
 
 const (
diff --git a/src/runtime/go_tls.h b/src/runtime/go_tls.h
new file mode 100644
index 0000000..6a707cf
--- /dev/null
+++ b/src/runtime/go_tls.h
@@ -0,0 +1,22 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+#ifdef GOARCH_arm
+#define LR R14
+#endif
+
+#ifdef GOARCH_amd64
+#define	get_tls(r)	MOVQ TLS, r
+#define	g(r)	0(r)(TLS*1)
+#endif
+
+#ifdef GOARCH_amd64p32
+#define	get_tls(r)	MOVL TLS, r
+#define	g(r)	0(r)(TLS*1)
+#endif
+
+#ifdef GOARCH_386
+#define	get_tls(r)	MOVL TLS, r
+#define	g(r)	0(r)(TLS*1)
+#endif
diff --git a/src/runtime/heapdump.c b/src/runtime/heapdump.c
deleted file mode 100644
index da14f2d..0000000
--- a/src/runtime/heapdump.c
+++ /dev/null
@@ -1,851 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Implementation of runtime/debug.WriteHeapDump.  Writes all
-// objects in the heap plus additional info (roots, threads,
-// finalizers, etc.) to a file.
-
-// The format of the dumped file is described at
-// http://code.google.com/p/go-wiki/wiki/heapdump14
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "mgc0.h"
-#include "type.h"
-#include "typekind.h"
-#include "funcdata.h"
-#include "zaexperiment.h"
-#include "textflag.h"
-
-extern byte runtime·data[];
-extern byte runtime·edata[];
-extern byte runtime·bss[];
-extern byte runtime·ebss[];
-
-enum {
-	FieldKindEol = 0,
-	FieldKindPtr = 1,
-	FieldKindIface = 2,
-	FieldKindEface = 3,
-
-	TagEOF = 0,
-	TagObject = 1,
-	TagOtherRoot = 2,
-	TagType = 3,
-	TagGoRoutine = 4,
-	TagStackFrame = 5,
-	TagParams = 6,
-	TagFinalizer = 7,
-	TagItab = 8,
-	TagOSThread = 9,
-	TagMemStats = 10,
-	TagQueuedFinalizer = 11,
-	TagData = 12,
-	TagBss = 13,
-	TagDefer = 14,
-	TagPanic = 15,
-	TagMemProf = 16,
-	TagAllocSample = 17,
-};
-
-static uintptr* playgcprog(uintptr offset, uintptr *prog, void (*callback)(void*,uintptr,uintptr), void *arg);
-static void dumpfields(BitVector bv);
-static void dumpbvtypes(BitVector *bv, byte *base);
-static BitVector makeheapobjbv(byte *p, uintptr size);
-
-// fd to write the dump to.
-static uintptr	dumpfd;
-
-#pragma dataflag NOPTR /* tmpbuf not a heap pointer at least */
-static byte	*tmpbuf;
-static uintptr	tmpbufsize;
-
-// buffer of pending write data
-enum {
-	BufSize = 4096,
-};
-#pragma dataflag NOPTR
-static byte buf[BufSize];
-static uintptr nbuf;
-
-static void
-write(byte *data, uintptr len)
-{
-	if(len + nbuf <= BufSize) {
-		runtime·memmove(buf + nbuf, data, len);
-		nbuf += len;
-		return;
-	}
-	runtime·write(dumpfd, buf, nbuf);
-	if(len >= BufSize) {
-		runtime·write(dumpfd, data, len);
-		nbuf = 0;
-	} else {
-		runtime·memmove(buf, data, len);
-		nbuf = len;
-	}
-}
-
-static void
-flush(void)
-{
-	runtime·write(dumpfd, buf, nbuf);
-	nbuf = 0;
-}
-
-// Cache of types that have been serialized already.
-// We use a type's hash field to pick a bucket.
-// Inside a bucket, we keep a list of types that
-// have been serialized so far, most recently used first.
-// Note: when a bucket overflows we may end up
-// serializing a type more than once.  That's ok.
-enum {
-	TypeCacheBuckets = 256, // must be a power of 2
-	TypeCacheAssoc = 4,
-};
-typedef struct TypeCacheBucket TypeCacheBucket;
-struct TypeCacheBucket {
-	Type *t[TypeCacheAssoc];
-};
-#pragma dataflag NOPTR /* only initialized and used while world is stopped */
-static TypeCacheBucket typecache[TypeCacheBuckets];
-
-// dump a uint64 in a varint format parseable by encoding/binary
-static void
-dumpint(uint64 v)
-{
-	byte buf[10];
-	int32 n;
-	n = 0;
-	while(v >= 0x80) {
-		buf[n++] = v | 0x80;
-		v >>= 7;
-	}
-	buf[n++] = v;
-	write(buf, n);
-}
-
-static void
-dumpbool(bool b)
-{
-	dumpint(b ? 1 : 0);
-}
-
-// dump varint uint64 length followed by memory contents
-static void
-dumpmemrange(byte *data, uintptr len)
-{
-	dumpint(len);
-	write(data, len);
-}
-
-static void
-dumpstr(String s)
-{
-	dumpmemrange(s.str, s.len);
-}
-
-static void
-dumpcstr(int8 *c)
-{
-	dumpmemrange((byte*)c, runtime·findnull((byte*)c));
-}
-
-// dump information for a type
-static void
-dumptype(Type *t)
-{
-	TypeCacheBucket *b;
-	int32 i, j;
-
-	if(t == nil) {
-		return;
-	}
-
-	// If we've definitely serialized the type before,
-	// no need to do it again.
-	b = &typecache[t->hash & (TypeCacheBuckets-1)];
-	if(t == b->t[0]) return;
-	for(i = 1; i < TypeCacheAssoc; i++) {
-		if(t == b->t[i]) {
-			// Move-to-front
-			for(j = i; j > 0; j--) {
-				b->t[j] = b->t[j-1];
-			}
-			b->t[0] = t;
-			return;
-		}
-	}
-	// Might not have been dumped yet.  Dump it and
-	// remember we did so.
-	for(j = TypeCacheAssoc-1; j > 0; j--) {
-		b->t[j] = b->t[j-1];
-	}
-	b->t[0] = t;
-	
-	// dump the type
-	dumpint(TagType);
-	dumpint((uintptr)t);
-	dumpint(t->size);
-	if(t->x == nil || t->x->pkgPath == nil || t->x->name == nil) {
-		dumpstr(*t->string);
-	} else {
-		dumpint(t->x->pkgPath->len + 1 + t->x->name->len);
-		write(t->x->pkgPath->str, t->x->pkgPath->len);
-		write((byte*)".", 1);
-		write(t->x->name->str, t->x->name->len);
-	}
-	dumpbool((t->kind & KindDirectIface) == 0 || (t->kind & KindNoPointers) == 0);
-}
-
-// dump an object
-static void
-dumpobj(byte *obj, uintptr size, BitVector bv)
-{
-	dumpbvtypes(&bv, obj);
-	dumpint(TagObject);
-	dumpint((uintptr)obj);
-	dumpmemrange(obj, size);
-	dumpfields(bv);
-}
-
-static void
-dumpotherroot(int8 *description, byte *to)
-{
-	dumpint(TagOtherRoot);
-	dumpcstr(description);
-	dumpint((uintptr)to);
-}
-
-static void
-dumpfinalizer(byte *obj, FuncVal *fn, Type* fint, PtrType *ot)
-{
-	dumpint(TagFinalizer);
-	dumpint((uintptr)obj);
-	dumpint((uintptr)fn);
-	dumpint((uintptr)fn->fn);
-	dumpint((uintptr)fint);
-	dumpint((uintptr)ot);
-}
-
-typedef struct ChildInfo ChildInfo;
-struct ChildInfo {
-	// Information passed up from the callee frame about
-	// the layout of the outargs region.
-	uintptr argoff;     // where the arguments start in the frame
-	uintptr arglen;     // size of args region
-	BitVector args;    // if args.n >= 0, pointer map of args region
-
-	byte *sp;           // callee sp
-	uintptr depth;      // depth in call stack (0 == most recent)
-};
-
-// dump kinds & offsets of interesting fields in bv
-static void
-dumpbv(BitVector *bv, uintptr offset)
-{
-	uintptr i;
-
-	for(i = 0; i < bv->n; i += BitsPerPointer) {
-		switch(bv->bytedata[i/8] >> i%8 & 3) {
-		case BitsDead:
-			// BitsDead has already been processed in makeheapobjbv.
-			// We should only see it in stack maps, in which case we should continue processing.
-			break;
-		case BitsScalar:
-			break;
-		case BitsPointer:
-			dumpint(FieldKindPtr);
-			dumpint(offset + i / BitsPerPointer * PtrSize);
-			break;
-		case BitsMultiWord:
-			runtime·throw("bumpbv unexpected garbage collection bits");
-		}
-	}
-}
-
-static bool
-dumpframe(Stkframe *s, void *arg)
-{
-	Func *f;
-	ChildInfo *child;
-	uintptr pc, off, size;
-	int32 pcdata;
-	StackMap *stackmap;
-	int8 *name;
-	BitVector bv;
-
-	child = (ChildInfo*)arg;
-	f = s->fn;
-
-	// Figure out what we can about our stack map
-	pc = s->pc;
-	if(pc != f->entry)
-		pc--;
-	pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, pc);
-	if(pcdata == -1) {
-		// We do not have a valid pcdata value but there might be a
-		// stackmap for this function.  It is likely that we are looking
-		// at the function prologue, assume so and hope for the best.
-		pcdata = 0;
-	}
-	stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-
-	// Dump any types we will need to resolve Efaces.
-	if(child->args.n >= 0)
-		dumpbvtypes(&child->args, (byte*)s->sp + child->argoff);
-	if(stackmap != nil && stackmap->n > 0) {
-		bv = runtime·stackmapdata(stackmap, pcdata);
-		dumpbvtypes(&bv, (byte*)(s->varp - bv.n / BitsPerPointer * PtrSize));
-	} else {
-		bv.n = -1;
-	}
-
-	// Dump main body of stack frame.
-	dumpint(TagStackFrame);
-	dumpint(s->sp); // lowest address in frame
-	dumpint(child->depth); // # of frames deep on the stack
-	dumpint((uintptr)child->sp); // sp of child, or 0 if bottom of stack
-	dumpmemrange((byte*)s->sp, s->fp - s->sp);  // frame contents
-	dumpint(f->entry);
-	dumpint(s->pc);
-	dumpint(s->continpc);
-	name = runtime·funcname(f);
-	if(name == nil)
-		name = "unknown function";
-	dumpcstr(name);
-
-	// Dump fields in the outargs section
-	if(child->args.n >= 0) {
-		dumpbv(&child->args, child->argoff);
-	} else {
-		// conservative - everything might be a pointer
-		for(off = child->argoff; off < child->argoff + child->arglen; off += PtrSize) {
-			dumpint(FieldKindPtr);
-			dumpint(off);
-		}
-	}
-
-	// Dump fields in the local vars section
-	if(stackmap == nil) {
-		// No locals information, dump everything.
-		for(off = child->arglen; off < s->varp - s->sp; off += PtrSize) {
-			dumpint(FieldKindPtr);
-			dumpint(off);
-		}
-	} else if(stackmap->n < 0) {
-		// Locals size information, dump just the locals.
-		size = -stackmap->n;
-		for(off = s->varp - size - s->sp; off <  s->varp - s->sp; off += PtrSize) {
-			dumpint(FieldKindPtr);
-			dumpint(off);
-		}
-	} else if(stackmap->n > 0) {
-		// Locals bitmap information, scan just the pointers in
-		// locals.
-		dumpbv(&bv, s->varp - bv.n / BitsPerPointer * PtrSize - s->sp);
-	}
-	dumpint(FieldKindEol);
-
-	// Record arg info for parent.
-	child->argoff = s->argp - s->fp;
-	child->arglen = s->arglen;
-	child->sp = (byte*)s->sp;
-	child->depth++;
-	stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
-	if(stackmap != nil)
-		child->args = runtime·stackmapdata(stackmap, pcdata);
-	else
-		child->args.n = -1;
-	return true;
-}
-
-static void
-dumpgoroutine(G *gp)
-{
-	uintptr sp, pc, lr;
-	ChildInfo child;
-	Defer *d;
-	Panic *p;
-	bool (*fn)(Stkframe*, void*);
-
-	if(gp->syscallsp != (uintptr)nil) {
-		sp = gp->syscallsp;
-		pc = gp->syscallpc;
-		lr = 0;
-	} else {
-		sp = gp->sched.sp;
-		pc = gp->sched.pc;
-		lr = gp->sched.lr;
-	}
-
-	dumpint(TagGoRoutine);
-	dumpint((uintptr)gp);
-	dumpint((uintptr)sp);
-	dumpint(gp->goid);
-	dumpint(gp->gopc);
-	dumpint(runtime·readgstatus(gp));
-	dumpbool(gp->issystem);
-	dumpbool(false);  // isbackground
-	dumpint(gp->waitsince);
-	dumpstr(gp->waitreason);
-	dumpint((uintptr)gp->sched.ctxt);
-	dumpint((uintptr)gp->m);
-	dumpint((uintptr)gp->defer);
-	dumpint((uintptr)gp->panic);
-
-	// dump stack
-	child.args.n = -1;
-	child.arglen = 0;
-	child.sp = nil;
-	child.depth = 0;
-	fn = dumpframe;
-	runtime·gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, &fn, &child, 0);
-
-	// dump defer & panic records
-	for(d = gp->defer; d != nil; d = d->link) {
-		dumpint(TagDefer);
-		dumpint((uintptr)d);
-		dumpint((uintptr)gp);
-		dumpint((uintptr)d->argp);
-		dumpint((uintptr)d->pc);
-		dumpint((uintptr)d->fn);
-		dumpint((uintptr)d->fn->fn);
-		dumpint((uintptr)d->link);
-	}
-	for (p = gp->panic; p != nil; p = p->link) {
-		dumpint(TagPanic);
-		dumpint((uintptr)p);
-		dumpint((uintptr)gp);
-		dumpint((uintptr)p->arg.type);
-		dumpint((uintptr)p->arg.data);
-		dumpint(0); // was p->defer, no longer recorded
-		dumpint((uintptr)p->link);
-	}
-}
-
-static void
-dumpgs(void)
-{
-	G *gp;
-	uint32 i;
-	uint32 status;
-
-	// goroutines & stacks
-	for(i = 0; i < runtime·allglen; i++) {
-		gp = runtime·allg[i];
-		status = runtime·readgstatus(gp); // The world is stopped so gp will not be in a scan state.
-		switch(status){
-		default:
-			runtime·printf("runtime: unexpected G.status %d\n", status);
-			runtime·throw("dumpgs in STW - bad status");
-		case Gdead:
-			break;
-		case Grunnable:
-		case Gsyscall:
-		case Gwaiting:
-			dumpgoroutine(gp);
-			break;
-		}
-	}
-}
-
-static void
-finq_callback(FuncVal *fn, byte *obj, uintptr nret, Type *fint, PtrType *ot)
-{
-	dumpint(TagQueuedFinalizer);
-	dumpint((uintptr)obj);
-	dumpint((uintptr)fn);
-	dumpint((uintptr)fn->fn);
-	dumpint((uintptr)fint);
-	dumpint((uintptr)ot);
-	USED(&nret);
-}
-
-
-static void
-dumproots(void)
-{
-	MSpan *s, **allspans;
-	uint32 spanidx;
-	Special *sp;
-	SpecialFinalizer *spf;
-	byte *p;
-
-	// data segment
-	dumpbvtypes(&runtime·gcdatamask, runtime·data);
-	dumpint(TagData);
-	dumpint((uintptr)runtime·data);
-	dumpmemrange(runtime·data, runtime·edata - runtime·data);
-	dumpfields(runtime·gcdatamask);
-
-	// bss segment
-	dumpbvtypes(&runtime·gcbssmask, runtime·bss);
-	dumpint(TagBss);
-	dumpint((uintptr)runtime·bss);
-	dumpmemrange(runtime·bss, runtime·ebss - runtime·bss);
-	dumpfields(runtime·gcbssmask);
-
-	// MSpan.types
-	allspans = runtime·mheap.allspans;
-	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
-		s = allspans[spanidx];
-		if(s->state == MSpanInUse) {
-			// Finalizers
-			for(sp = s->specials; sp != nil; sp = sp->next) {
-				if(sp->kind != KindSpecialFinalizer)
-					continue;
-				spf = (SpecialFinalizer*)sp;
-				p = (byte*)((s->start << PageShift) + spf->special.offset);
-				dumpfinalizer(p, spf->fn, spf->fint, spf->ot);
-			}
-		}
-	}
-
-	// Finalizer queue
-	runtime·iterate_finq(finq_callback);
-}
-
-// Bit vector of free marks.	
-// Needs to be as big as the largest number of objects per span.	
-#pragma dataflag NOPTR
-static byte free[PageSize/8];	
-
-static void
-dumpobjs(void)
-{
-	uintptr i, j, size, n;
-	MSpan *s;
-	MLink *l;
-	byte *p;
-
-	for(i = 0; i < runtime·mheap.nspan; i++) {
-		s = runtime·mheap.allspans[i];
-		if(s->state != MSpanInUse)
-			continue;
-		p = (byte*)(s->start << PageShift);
-		size = s->elemsize;
-		n = (s->npages << PageShift) / size;
-		if(n > nelem(free))	
-			runtime·throw("free array doesn't have enough entries");	
-		for(l = s->freelist; l != nil; l = l->next)
-			free[((byte*)l - p) / size] = true;	
-		for(j = 0; j < n; j++, p += size) {
-			if(free[j]) {	
-				free[j] = false;	
-				continue;	
-			}
-			dumpobj(p, size, makeheapobjbv(p, size));
-		}
-	}
-}
-
-static void
-dumpparams(void)
-{
-	byte *x;
-
-	dumpint(TagParams);
-	x = (byte*)1;
-	if(*(byte*)&x == 1)
-		dumpbool(false); // little-endian ptrs
-	else
-		dumpbool(true); // big-endian ptrs
-	dumpint(PtrSize);
-	dumpint((uintptr)runtime·mheap.arena_start);
-	dumpint((uintptr)runtime·mheap.arena_used);
-	dumpint(thechar);
-	dumpcstr(GOEXPERIMENT);
-	dumpint(runtime·ncpu);
-}
-
-static void
-itab_callback(Itab *tab)
-{
-	Type *t;
-
-	t = tab->type;
-	// Dump a map from itab* to the type of its data field.
-	// We want this map so we can deduce types of interface referents.
-	if((t->kind & KindDirectIface) == 0) {
-		// indirect - data slot is a pointer to t.
-		dumptype(t->ptrto);
-		dumpint(TagItab);
-		dumpint((uintptr)tab);
-		dumpint((uintptr)t->ptrto);
-	} else if((t->kind & KindNoPointers) == 0) {
-		// t is pointer-like - data slot is a t.
-		dumptype(t);
-		dumpint(TagItab);
-		dumpint((uintptr)tab);
-		dumpint((uintptr)t);
-	} else {
-		// Data slot is a scalar.  Dump type just for fun.
-		// With pointer-only interfaces, this shouldn't happen.
-		dumptype(t);
-		dumpint(TagItab);
-		dumpint((uintptr)tab);
-		dumpint((uintptr)t);
-	}
-}
-
-static void
-dumpitabs(void)
-{
-	void (*fn)(Itab*);
-	
-	fn = itab_callback;
-	runtime·iterate_itabs(&fn);
-}
-
-static void
-dumpms(void)
-{
-	M *mp;
-
-	for(mp = runtime·allm; mp != nil; mp = mp->alllink) {
-		dumpint(TagOSThread);
-		dumpint((uintptr)mp);
-		dumpint(mp->id);
-		dumpint(mp->procid);
-	}
-}
-
-static void
-dumpmemstats(void)
-{
-	int32 i;
-
-	dumpint(TagMemStats);
-	dumpint(mstats.alloc);
-	dumpint(mstats.total_alloc);
-	dumpint(mstats.sys);
-	dumpint(mstats.nlookup);
-	dumpint(mstats.nmalloc);
-	dumpint(mstats.nfree);
-	dumpint(mstats.heap_alloc);
-	dumpint(mstats.heap_sys);
-	dumpint(mstats.heap_idle);
-	dumpint(mstats.heap_inuse);
-	dumpint(mstats.heap_released);
-	dumpint(mstats.heap_objects);
-	dumpint(mstats.stacks_inuse);
-	dumpint(mstats.stacks_sys);
-	dumpint(mstats.mspan_inuse);
-	dumpint(mstats.mspan_sys);
-	dumpint(mstats.mcache_inuse);
-	dumpint(mstats.mcache_sys);
-	dumpint(mstats.buckhash_sys);
-	dumpint(mstats.gc_sys);
-	dumpint(mstats.other_sys);
-	dumpint(mstats.next_gc);
-	dumpint(mstats.last_gc);
-	dumpint(mstats.pause_total_ns);
-	for(i = 0; i < 256; i++)
-		dumpint(mstats.pause_ns[i]);
-	dumpint(mstats.numgc);
-}
-
-static void
-dumpmemprof_callback(Bucket *b, uintptr nstk, uintptr *stk, uintptr size, uintptr allocs, uintptr frees)
-{
-	uintptr i, pc;
-	Func *f;
-	byte buf[20];
-	String file;
-	int32 line;
-
-	dumpint(TagMemProf);
-	dumpint((uintptr)b);
-	dumpint(size);
-	dumpint(nstk);
-	for(i = 0; i < nstk; i++) {
-		pc = stk[i];
-		f = runtime·findfunc(pc);
-		if(f == nil) {
-			runtime·snprintf(buf, sizeof(buf), "%X", (uint64)pc);
-			dumpcstr((int8*)buf);
-			dumpcstr("?");
-			dumpint(0);
-		} else {
-			dumpcstr(runtime·funcname(f));
-			// TODO: Why do we need to back up to a call instruction here?
-			// Maybe profiler should do this.
-			if(i > 0 && pc > f->entry) {
-				if(thechar == '6' || thechar == '8')
-					pc--;
-				else
-					pc -= 4; // arm, etc
-			}
-			line = runtime·funcline(f, pc, &file);
-			dumpstr(file);
-			dumpint(line);
-		}
-	}
-	dumpint(allocs);
-	dumpint(frees);
-}
-
-static void
-dumpmemprof(void)
-{
-	MSpan *s, **allspans;
-	uint32 spanidx;
-	Special *sp;
-	SpecialProfile *spp;
-	byte *p;
-	void (*fn)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr);
-	
-	fn = dumpmemprof_callback;
-	runtime·iterate_memprof(&fn);
-
-	allspans = runtime·mheap.allspans;
-	for(spanidx=0; spanidx<runtime·mheap.nspan; spanidx++) {
-		s = allspans[spanidx];
-		if(s->state != MSpanInUse)
-			continue;
-		for(sp = s->specials; sp != nil; sp = sp->next) {
-			if(sp->kind != KindSpecialProfile)
-				continue;
-			spp = (SpecialProfile*)sp;
-			p = (byte*)((s->start << PageShift) + spp->special.offset);
-			dumpint(TagAllocSample);
-			dumpint((uintptr)p);
-			dumpint((uintptr)spp->b);
-		}
-	}
-}
-
-static void
-mdump(void)
-{
-	byte *hdr;
-	uintptr i;
-	MSpan *s;
-
-	// make sure we're done sweeping
-	for(i = 0; i < runtime·mheap.nspan; i++) {
-		s = runtime·mheap.allspans[i];
-		if(s->state == MSpanInUse)
-			runtime·MSpan_EnsureSwept(s);
-	}
-
-	runtime·memclr((byte*)&typecache[0], sizeof(typecache));
-	hdr = (byte*)"go1.4 heap dump\n";
-	write(hdr, runtime·findnull(hdr));
-	dumpparams();
-	dumpitabs();
-	dumpobjs();
-	dumpgs();
-	dumpms();
-	dumproots();
-	dumpmemstats();
-	dumpmemprof();
-	dumpint(TagEOF);
-	flush();
-}
-
-void
-runtime·writeheapdump_m(void)
-{
-	uintptr fd;
-	
-	fd = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-
-	runtime·casgstatus(g->m->curg, Grunning, Gwaiting);
-	g->waitreason = runtime·gostringnocopy((byte*)"dumping heap");
-
-	// Update stats so we can dump them.
-	// As a side effect, flushes all the MCaches so the MSpan.freelist
-	// lists contain all the free objects.
-	runtime·updatememstats(nil);
-
-	// Set dump file.
-	dumpfd = fd;
-
-	// Call dump routine.
-	mdump();
-
-	// Reset dump file.
-	dumpfd = 0;
-	if(tmpbuf != nil) {
-		runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
-		tmpbuf = nil;
-		tmpbufsize = 0;
-	}
-
-	runtime·casgstatus(g->m->curg, Gwaiting, Grunning);
-}
-
-// dumpint() the kind & offset of each field in an object.
-static void
-dumpfields(BitVector bv)
-{
-	dumpbv(&bv, 0);
-	dumpint(FieldKindEol);
-}
-
-// The heap dump reader needs to be able to disambiguate
-// Eface entries.  So it needs to know every type that might
-// appear in such an entry.  The following routine accomplishes that.
-
-// Dump all the types that appear in the type field of
-// any Eface described by this bit vector.
-static void
-dumpbvtypes(BitVector *bv, byte *base)
-{
-	uintptr i;
-
-	for(i = 0; i < bv->n; i += BitsPerPointer) {
-		if((bv->bytedata[i/8] >> i%8 & 3) != BitsMultiWord)
-			continue;
-		switch(bv->bytedata[(i+BitsPerPointer)/8] >> (i+BitsPerPointer)%8 & 3) {
-		default:
-			runtime·throw("unexpected garbage collection bits");
-		case BitsIface:
-			i += BitsPerPointer;
-			break;
-		case BitsEface:
-			dumptype(*(Type**)(base + i / BitsPerPointer * PtrSize));
-			i += BitsPerPointer;
-			break;
-		}
-	}
-}
-
-static BitVector
-makeheapobjbv(byte *p, uintptr size)
-{
-	uintptr off, nptr, i;
-	byte shift, *bitp, bits;
-	bool mw;
-
-	// Extend the temp buffer if necessary.
-	nptr = size/PtrSize;
-	if(tmpbufsize < nptr*BitsPerPointer/8+1) {
-		if(tmpbuf != nil)
-			runtime·SysFree(tmpbuf, tmpbufsize, &mstats.other_sys);
-		tmpbufsize = nptr*BitsPerPointer/8+1;
-		tmpbuf = runtime·sysAlloc(tmpbufsize, &mstats.other_sys);
-		if(tmpbuf == nil)
-			runtime·throw("heapdump: out of memory");
-	}
-
-	// Copy and compact the bitmap.
-	mw = false;
-	for(i = 0; i < nptr; i++) {
-		off = (uintptr*)(p + i*PtrSize) - (uintptr*)runtime·mheap.arena_start;
-		bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-		shift = (off % wordsPerBitmapByte) * gcBits;
-		bits = (*bitp >> (shift + 2)) & BitsMask;
-		if(!mw && bits == BitsDead)
-			break;  // end of heap object
-		mw = !mw && bits == BitsMultiWord;
-		tmpbuf[i*BitsPerPointer/8] &= ~(BitsMask<<((i*BitsPerPointer)%8));
-		tmpbuf[i*BitsPerPointer/8] |= bits<<((i*BitsPerPointer)%8);
-	}
-	return (BitVector){i*BitsPerPointer, tmpbuf};
-}
diff --git a/src/runtime/heapdump.go b/src/runtime/heapdump.go
new file mode 100644
index 0000000..c942e01
--- /dev/null
+++ b/src/runtime/heapdump.go
@@ -0,0 +1,729 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implementation of runtime/debug.WriteHeapDump.  Writes all
+// objects in the heap plus additional info (roots, threads,
+// finalizers, etc.) to a file.
+
+// The format of the dumped file is described at
+// http://code.google.com/p/go-wiki/wiki/heapdump14
+
+package runtime
+
+import "unsafe"
+
+const (
+	fieldKindEol       = 0
+	fieldKindPtr       = 1
+	fieldKindIface     = 2
+	fieldKindEface     = 3
+	tagEOF             = 0
+	tagObject          = 1
+	tagOtherRoot       = 2
+	tagType            = 3
+	tagGoroutine       = 4
+	tagStackFrame      = 5
+	tagParams          = 6
+	tagFinalizer       = 7
+	tagItab            = 8
+	tagOSThread        = 9
+	tagMemStats        = 10
+	tagQueuedFinalizer = 11
+	tagData            = 12
+	tagBSS             = 13
+	tagDefer           = 14
+	tagPanic           = 15
+	tagMemProf         = 16
+	tagAllocSample     = 17
+)
+
+var dumpfd uintptr // fd to write the dump to.
+var tmpbuf []byte
+
+// buffer of pending write data
+const (
+	bufSize = 4096
+)
+
+var buf [bufSize]byte
+var nbuf uintptr
+
+func dwrite(data unsafe.Pointer, len uintptr) {
+	if len == 0 {
+		return
+	}
+	if nbuf+len <= bufSize {
+		copy(buf[nbuf:], (*[bufSize]byte)(data)[:len])
+		nbuf += len
+		return
+	}
+
+	write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
+	if len >= bufSize {
+		write(dumpfd, data, int32(len))
+		nbuf = 0
+	} else {
+		copy(buf[:], (*[bufSize]byte)(data)[:len])
+		nbuf = len
+	}
+}
+
+func dwritebyte(b byte) {
+	dwrite(unsafe.Pointer(&b), 1)
+}
+
+func flush() {
+	write(dumpfd, (unsafe.Pointer)(&buf), int32(nbuf))
+	nbuf = 0
+}
+
+// Cache of types that have been serialized already.
+// We use a type's hash field to pick a bucket.
+// Inside a bucket, we keep a list of types that
+// have been serialized so far, most recently used first.
+// Note: when a bucket overflows we may end up
+// serializing a type more than once.  That's ok.
+const (
+	typeCacheBuckets = 256
+	typeCacheAssoc   = 4
+)
+
+type typeCacheBucket struct {
+	t [typeCacheAssoc]*_type
+}
+
+var typecache [typeCacheBuckets]typeCacheBucket
+
+// dump a uint64 in a varint format parseable by encoding/binary
+func dumpint(v uint64) {
+	var buf [10]byte
+	var n int
+	for v >= 0x80 {
+		buf[n] = byte(v | 0x80)
+		n++
+		v >>= 7
+	}
+	buf[n] = byte(v)
+	n++
+	dwrite(unsafe.Pointer(&buf), uintptr(n))
+}
+
+func dumpbool(b bool) {
+	if b {
+		dumpint(1)
+	} else {
+		dumpint(0)
+	}
+}
+
+// dump varint uint64 length followed by memory contents
+func dumpmemrange(data unsafe.Pointer, len uintptr) {
+	dumpint(uint64(len))
+	dwrite(data, len)
+}
+
+func dumpslice(b []byte) {
+	dumpint(uint64(len(b)))
+	if len(b) > 0 {
+		dwrite(unsafe.Pointer(&b[0]), uintptr(len(b)))
+	}
+}
+
+func dumpstr(s string) {
+	sp := (*stringStruct)(unsafe.Pointer(&s))
+	dumpmemrange(sp.str, uintptr(sp.len))
+}
+
+// dump information for a type
+func dumptype(t *_type) {
+	if t == nil {
+		return
+	}
+
+	// If we've definitely serialized the type before,
+	// no need to do it again.
+	b := &typecache[t.hash&(typeCacheBuckets-1)]
+	if t == b.t[0] {
+		return
+	}
+	for i := 1; i < typeCacheAssoc; i++ {
+		if t == b.t[i] {
+			// Move-to-front
+			for j := i; j > 0; j-- {
+				b.t[j] = b.t[j-1]
+			}
+			b.t[0] = t
+			return
+		}
+	}
+
+	// Might not have been dumped yet.  Dump it and
+	// remember we did so.
+	for j := typeCacheAssoc - 1; j > 0; j-- {
+		b.t[j] = b.t[j-1]
+	}
+	b.t[0] = t
+
+	// dump the type
+	dumpint(tagType)
+	dumpint(uint64(uintptr(unsafe.Pointer(t))))
+	dumpint(uint64(t.size))
+	if t.x == nil || t.x.pkgpath == nil || t.x.name == nil {
+		dumpstr(*t._string)
+	} else {
+		pkgpath := (*stringStruct)(unsafe.Pointer(&t.x.pkgpath))
+		name := (*stringStruct)(unsafe.Pointer(&t.x.name))
+		dumpint(uint64(uintptr(pkgpath.len) + 1 + uintptr(name.len)))
+		dwrite(pkgpath.str, uintptr(pkgpath.len))
+		dwritebyte('.')
+		dwrite(name.str, uintptr(name.len))
+	}
+	dumpbool(t.kind&kindDirectIface == 0 || t.kind&kindNoPointers == 0)
+}
+
+// dump an object
+func dumpobj(obj unsafe.Pointer, size uintptr, bv bitvector) {
+	dumpbvtypes(&bv, obj)
+	dumpint(tagObject)
+	dumpint(uint64(uintptr(obj)))
+	dumpmemrange(obj, size)
+	dumpfields(bv)
+}
+
+func dumpotherroot(description string, to unsafe.Pointer) {
+	dumpint(tagOtherRoot)
+	dumpstr(description)
+	dumpint(uint64(uintptr(to)))
+}
+
+func dumpfinalizer(obj unsafe.Pointer, fn *funcval, fint *_type, ot *ptrtype) {
+	dumpint(tagFinalizer)
+	dumpint(uint64(uintptr(obj)))
+	dumpint(uint64(uintptr(unsafe.Pointer(fn))))
+	dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
+	dumpint(uint64(uintptr(unsafe.Pointer(fint))))
+	dumpint(uint64(uintptr(unsafe.Pointer(ot))))
+}
+
+type childInfo struct {
+	// Information passed up from the callee frame about
+	// the layout of the outargs region.
+	argoff uintptr   // where the arguments start in the frame
+	arglen uintptr   // size of args region
+	args   bitvector // if args.n >= 0, pointer map of args region
+	sp     *uint8    // callee sp
+	depth  uintptr   // depth in call stack (0 == most recent)
+}
+
+// dump kinds & offsets of interesting fields in bv
+func dumpbv(cbv *bitvector, offset uintptr) {
+	bv := gobv(*cbv)
+	for i := uintptr(0); i < uintptr(bv.n); i += bitsPerPointer {
+		switch bv.bytedata[i/8] >> (i % 8) & 3 {
+		default:
+			gothrow("unexpected pointer bits")
+		case _BitsDead:
+			// BitsDead has already been processed in makeheapobjbv.
+			// We should only see it in stack maps, in which case we should continue processing.
+		case _BitsScalar:
+			// ok
+		case _BitsPointer:
+			dumpint(fieldKindPtr)
+			dumpint(uint64(offset + i/_BitsPerPointer*ptrSize))
+		}
+	}
+}
+
+func dumpframe(s *stkframe, arg unsafe.Pointer) bool {
+	child := (*childInfo)(arg)
+	f := s.fn
+
+	// Figure out what we can about our stack map
+	pc := s.pc
+	if pc != f.entry {
+		pc--
+	}
+	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, pc)
+	if pcdata == -1 {
+		// We do not have a valid pcdata value but there might be a
+		// stackmap for this function.  It is likely that we are looking
+		// at the function prologue, assume so and hope for the best.
+		pcdata = 0
+	}
+	stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+
+	// Dump any types we will need to resolve Efaces.
+	if child.args.n >= 0 {
+		dumpbvtypes(&child.args, unsafe.Pointer(s.sp+child.argoff))
+	}
+	var bv bitvector
+	if stkmap != nil && stkmap.n > 0 {
+		bv = stackmapdata(stkmap, pcdata)
+		dumpbvtypes(&bv, unsafe.Pointer(s.varp-uintptr(bv.n/_BitsPerPointer*ptrSize)))
+	} else {
+		bv.n = -1
+	}
+
+	// Dump main body of stack frame.
+	dumpint(tagStackFrame)
+	dumpint(uint64(s.sp))                              // lowest address in frame
+	dumpint(uint64(child.depth))                       // # of frames deep on the stack
+	dumpint(uint64(uintptr(unsafe.Pointer(child.sp)))) // sp of child, or 0 if bottom of stack
+	dumpmemrange(unsafe.Pointer(s.sp), s.fp-s.sp)      // frame contents
+	dumpint(uint64(f.entry))
+	dumpint(uint64(s.pc))
+	dumpint(uint64(s.continpc))
+	name := gofuncname(f)
+	if name == "" {
+		name = "unknown function"
+	}
+	dumpstr(name)
+
+	// Dump fields in the outargs section
+	if child.args.n >= 0 {
+		dumpbv(&child.args, child.argoff)
+	} else {
+		// conservative - everything might be a pointer
+		for off := child.argoff; off < child.argoff+child.arglen; off += ptrSize {
+			dumpint(fieldKindPtr)
+			dumpint(uint64(off))
+		}
+	}
+
+	// Dump fields in the local vars section
+	if stkmap == nil {
+		// No locals information, dump everything.
+		for off := child.arglen; off < s.varp-s.sp; off += ptrSize {
+			dumpint(fieldKindPtr)
+			dumpint(uint64(off))
+		}
+	} else if stkmap.n < 0 {
+		// Locals size information, dump just the locals.
+		size := uintptr(-stkmap.n)
+		for off := s.varp - size - s.sp; off < s.varp-s.sp; off += ptrSize {
+			dumpint(fieldKindPtr)
+			dumpint(uint64(off))
+		}
+	} else if stkmap.n > 0 {
+		// Locals bitmap information, scan just the pointers in
+		// locals.
+		dumpbv(&bv, s.varp-uintptr(bv.n)/_BitsPerPointer*ptrSize-s.sp)
+	}
+	dumpint(fieldKindEol)
+
+	// Record arg info for parent.
+	child.argoff = s.argp - s.fp
+	child.arglen = s.arglen
+	child.sp = (*uint8)(unsafe.Pointer(s.sp))
+	child.depth++
+	stkmap = (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+	if stkmap != nil {
+		child.args = stackmapdata(stkmap, pcdata)
+	} else {
+		child.args.n = -1
+	}
+	return true
+}
+
+func dumpgoroutine(gp *g) {
+	var sp, pc, lr uintptr
+	if gp.syscallsp != 0 {
+		sp = gp.syscallsp
+		pc = gp.syscallpc
+		lr = 0
+	} else {
+		sp = gp.sched.sp
+		pc = gp.sched.pc
+		lr = gp.sched.lr
+	}
+
+	dumpint(tagGoroutine)
+	dumpint(uint64(uintptr(unsafe.Pointer(gp))))
+	dumpint(uint64(sp))
+	dumpint(uint64(gp.goid))
+	dumpint(uint64(gp.gopc))
+	dumpint(uint64(readgstatus(gp)))
+	dumpbool(gp.issystem)
+	dumpbool(false) // isbackground
+	dumpint(uint64(gp.waitsince))
+	dumpstr(gp.waitreason)
+	dumpint(uint64(uintptr(gp.sched.ctxt)))
+	dumpint(uint64(uintptr(unsafe.Pointer(gp.m))))
+	dumpint(uint64(uintptr(unsafe.Pointer(gp._defer))))
+	dumpint(uint64(uintptr(unsafe.Pointer(gp._panic))))
+
+	// dump stack
+	var child childInfo
+	child.args.n = -1
+	child.arglen = 0
+	child.sp = nil
+	child.depth = 0
+	gentraceback(pc, sp, lr, gp, 0, nil, 0x7fffffff, dumpframe, noescape(unsafe.Pointer(&child)), 0)
+
+	// dump defer & panic records
+	for d := gp._defer; d != nil; d = d.link {
+		dumpint(tagDefer)
+		dumpint(uint64(uintptr(unsafe.Pointer(d))))
+		dumpint(uint64(uintptr(unsafe.Pointer(gp))))
+		dumpint(uint64(d.argp))
+		dumpint(uint64(d.pc))
+		dumpint(uint64(uintptr(unsafe.Pointer(d.fn))))
+		dumpint(uint64(uintptr(unsafe.Pointer(d.fn.fn))))
+		dumpint(uint64(uintptr(unsafe.Pointer(d.link))))
+	}
+	for p := gp._panic; p != nil; p = p.link {
+		dumpint(tagPanic)
+		dumpint(uint64(uintptr(unsafe.Pointer(p))))
+		dumpint(uint64(uintptr(unsafe.Pointer(gp))))
+		eface := (*eface)(unsafe.Pointer(&p.arg))
+		dumpint(uint64(uintptr(unsafe.Pointer(eface._type))))
+		dumpint(uint64(uintptr(unsafe.Pointer(eface.data))))
+		dumpint(0) // was p->defer, no longer recorded
+		dumpint(uint64(uintptr(unsafe.Pointer(p.link))))
+	}
+}
+
+func dumpgs() {
+	// goroutines & stacks
+	for i := 0; uintptr(i) < allglen; i++ {
+		gp := allgs[i]
+		status := readgstatus(gp) // The world is stopped so gp will not be in a scan state.
+		switch status {
+		default:
+			print("runtime: unexpected G.status ", hex(status), "\n")
+			gothrow("dumpgs in STW - bad status")
+		case _Gdead:
+			// ok
+		case _Grunnable,
+			_Gsyscall,
+			_Gwaiting:
+			dumpgoroutine(gp)
+		}
+	}
+}
+
+func finq_callback(fn *funcval, obj unsafe.Pointer, nret uintptr, fint *_type, ot *ptrtype) {
+	dumpint(tagQueuedFinalizer)
+	dumpint(uint64(uintptr(obj)))
+	dumpint(uint64(uintptr(unsafe.Pointer(fn))))
+	dumpint(uint64(uintptr(unsafe.Pointer(fn.fn))))
+	dumpint(uint64(uintptr(unsafe.Pointer(fint))))
+	dumpint(uint64(uintptr(unsafe.Pointer(ot))))
+}
+
+func dumproots() {
+	// data segment
+	dumpbvtypes(&gcdatamask, unsafe.Pointer(&data))
+	dumpint(tagData)
+	dumpint(uint64(uintptr(unsafe.Pointer(&data))))
+	dumpmemrange(unsafe.Pointer(&data), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
+	dumpfields(gcdatamask)
+
+	// bss segment
+	dumpbvtypes(&gcbssmask, unsafe.Pointer(&bss))
+	dumpint(tagBSS)
+	dumpint(uint64(uintptr(unsafe.Pointer(&bss))))
+	dumpmemrange(unsafe.Pointer(&bss), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
+	dumpfields(gcbssmask)
+
+	// MSpan.types
+	allspans := h_allspans
+	for spanidx := uint32(0); spanidx < mheap_.nspan; spanidx++ {
+		s := allspans[spanidx]
+		if s.state == _MSpanInUse {
+			// Finalizers
+			for sp := s.specials; sp != nil; sp = sp.next {
+				if sp.kind != _KindSpecialFinalizer {
+					continue
+				}
+				spf := (*specialfinalizer)(unsafe.Pointer(sp))
+				p := unsafe.Pointer((uintptr(s.start) << _PageShift) + uintptr(spf.special.offset))
+				dumpfinalizer(p, spf.fn, spf.fint, spf.ot)
+			}
+		}
+	}
+
+	// Finalizer queue
+	iterate_finq(finq_callback)
+}
+
+// Bit vector of free marks.
+// Needs to be as big as the largest number of objects per span.
+var freemark [_PageSize / 8]bool
+
+func dumpobjs() {
+	for i := uintptr(0); i < uintptr(mheap_.nspan); i++ {
+		s := h_allspans[i]
+		if s.state != _MSpanInUse {
+			continue
+		}
+		p := uintptr(s.start << _PageShift)
+		size := s.elemsize
+		n := (s.npages << _PageShift) / size
+		if n > uintptr(len(freemark)) {
+			gothrow("freemark array doesn't have enough entries")
+		}
+		for l := s.freelist; l != nil; l = l.next {
+			freemark[(uintptr(unsafe.Pointer(l))-p)/size] = true
+		}
+		for j := uintptr(0); j < n; j, p = j+1, p+size {
+			if freemark[j] {
+				freemark[j] = false
+				continue
+			}
+			dumpobj(unsafe.Pointer(p), size, makeheapobjbv(p, size))
+		}
+	}
+}
+
+func dumpparams() {
+	dumpint(tagParams)
+	x := uintptr(1)
+	if *(*byte)(unsafe.Pointer(&x)) == 1 {
+		dumpbool(false) // little-endian ptrs
+	} else {
+		dumpbool(true) // big-endian ptrs
+	}
+	dumpint(ptrSize)
+	dumpint(uint64(mheap_.arena_start))
+	dumpint(uint64(mheap_.arena_used))
+	dumpint(thechar)
+	dumpstr(goexperiment)
+	dumpint(uint64(ncpu))
+}
+
+func itab_callback(tab *itab) {
+	t := tab._type
+	// Dump a map from itab* to the type of its data field.
+	// We want this map so we can deduce types of interface referents.
+	if t.kind&kindDirectIface == 0 {
+		// indirect - data slot is a pointer to t.
+		dumptype(t.ptrto)
+		dumpint(tagItab)
+		dumpint(uint64(uintptr(unsafe.Pointer(tab))))
+		dumpint(uint64(uintptr(unsafe.Pointer(t.ptrto))))
+	} else if t.kind&kindNoPointers == 0 {
+		// t is pointer-like - data slot is a t.
+		dumptype(t)
+		dumpint(tagItab)
+		dumpint(uint64(uintptr(unsafe.Pointer(tab))))
+		dumpint(uint64(uintptr(unsafe.Pointer(t))))
+	} else {
+		// Data slot is a scalar.  Dump type just for fun.
+		// With pointer-only interfaces, this shouldn't happen.
+		dumptype(t)
+		dumpint(tagItab)
+		dumpint(uint64(uintptr(unsafe.Pointer(tab))))
+		dumpint(uint64(uintptr(unsafe.Pointer(t))))
+	}
+}
+
+func dumpitabs() {
+	iterate_itabs(itab_callback)
+}
+
+func dumpms() {
+	for mp := allm; mp != nil; mp = mp.alllink {
+		dumpint(tagOSThread)
+		dumpint(uint64(uintptr(unsafe.Pointer(mp))))
+		dumpint(uint64(mp.id))
+		dumpint(mp.procid)
+	}
+}
+
+func dumpmemstats() {
+	dumpint(tagMemStats)
+	dumpint(memstats.alloc)
+	dumpint(memstats.total_alloc)
+	dumpint(memstats.sys)
+	dumpint(memstats.nlookup)
+	dumpint(memstats.nmalloc)
+	dumpint(memstats.nfree)
+	dumpint(memstats.heap_alloc)
+	dumpint(memstats.heap_sys)
+	dumpint(memstats.heap_idle)
+	dumpint(memstats.heap_inuse)
+	dumpint(memstats.heap_released)
+	dumpint(memstats.heap_objects)
+	dumpint(memstats.stacks_inuse)
+	dumpint(memstats.stacks_sys)
+	dumpint(memstats.mspan_inuse)
+	dumpint(memstats.mspan_sys)
+	dumpint(memstats.mcache_inuse)
+	dumpint(memstats.mcache_sys)
+	dumpint(memstats.buckhash_sys)
+	dumpint(memstats.gc_sys)
+	dumpint(memstats.other_sys)
+	dumpint(memstats.next_gc)
+	dumpint(memstats.last_gc)
+	dumpint(memstats.pause_total_ns)
+	for i := 0; i < 256; i++ {
+		dumpint(memstats.pause_ns[i])
+	}
+	dumpint(uint64(memstats.numgc))
+}
+
+func dumpmemprof_callback(b *bucket, nstk uintptr, pstk *uintptr, size, allocs, frees uintptr) {
+	stk := (*[100000]uintptr)(unsafe.Pointer(pstk))
+	dumpint(tagMemProf)
+	dumpint(uint64(uintptr(unsafe.Pointer(b))))
+	dumpint(uint64(size))
+	dumpint(uint64(nstk))
+	for i := uintptr(0); i < nstk; i++ {
+		pc := stk[i]
+		f := findfunc(pc)
+		if f == nil {
+			var buf [64]byte
+			n := len(buf)
+			n--
+			buf[n] = ')'
+			if pc == 0 {
+				n--
+				buf[n] = '0'
+			} else {
+				for pc > 0 {
+					n--
+					buf[n] = "0123456789abcdef"[pc&15]
+					pc >>= 4
+				}
+			}
+			n--
+			buf[n] = 'x'
+			n--
+			buf[n] = '0'
+			n--
+			buf[n] = '('
+			dumpslice(buf[n:])
+			dumpstr("?")
+			dumpint(0)
+		} else {
+			dumpstr(gofuncname(f))
+			if i > 0 && pc > f.entry {
+				pc--
+			}
+			file, line := funcline(f, pc)
+			dumpstr(file)
+			dumpint(uint64(line))
+		}
+	}
+	dumpint(uint64(allocs))
+	dumpint(uint64(frees))
+}
+
+func dumpmemprof() {
+	iterate_memprof(dumpmemprof_callback)
+	allspans := h_allspans
+	for spanidx := uint32(0); spanidx < mheap_.nspan; spanidx++ {
+		s := allspans[spanidx]
+		if s.state != _MSpanInUse {
+			continue
+		}
+		for sp := s.specials; sp != nil; sp = sp.next {
+			if sp.kind != _KindSpecialProfile {
+				continue
+			}
+			spp := (*specialprofile)(unsafe.Pointer(sp))
+			p := uintptr(s.start<<_PageShift) + uintptr(spp.special.offset)
+			dumpint(tagAllocSample)
+			dumpint(uint64(p))
+			dumpint(uint64(uintptr(unsafe.Pointer(spp.b))))
+		}
+	}
+}
+
+var dumphdr = []byte("go1.4 heap dump\n")
+
+func mdump() {
+	// make sure we're done sweeping
+	for i := uintptr(0); i < uintptr(mheap_.nspan); i++ {
+		s := h_allspans[i]
+		if s.state == _MSpanInUse {
+			mSpan_EnsureSwept(s)
+		}
+	}
+	memclr(unsafe.Pointer(&typecache), unsafe.Sizeof(typecache))
+	dwrite(unsafe.Pointer(&dumphdr[0]), uintptr(len(dumphdr)))
+	dumpparams()
+	dumpitabs()
+	dumpobjs()
+	dumpgs()
+	dumpms()
+	dumproots()
+	dumpmemstats()
+	dumpmemprof()
+	dumpint(tagEOF)
+	flush()
+}
+
+func writeheapdump_m(fd uintptr) {
+	_g_ := getg()
+	casgstatus(_g_.m.curg, _Grunning, _Gwaiting)
+	_g_.waitreason = "dumping heap"
+
+	// Update stats so we can dump them.
+	// As a side effect, flushes all the MCaches so the MSpan.freelist
+	// lists contain all the free objects.
+	updatememstats(nil)
+
+	// Set dump file.
+	dumpfd = fd
+
+	// Call dump routine.
+	mdump()
+
+	// Reset dump file.
+	dumpfd = 0
+	if tmpbuf != nil {
+		sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
+		tmpbuf = nil
+	}
+
+	casgstatus(_g_.m.curg, _Gwaiting, _Grunning)
+}
+
+// dumpint() the kind & offset of each field in an object.
+func dumpfields(bv bitvector) {
+	dumpbv(&bv, 0)
+	dumpint(fieldKindEol)
+}
+
+// The heap dump reader needs to be able to disambiguate
+// Eface entries.  So it needs to know every type that might
+// appear in such an entry.  The following routine accomplishes that.
+// TODO(rsc, khr): Delete - no longer possible.
+
+// Dump all the types that appear in the type field of
+// any Eface described by this bit vector.
+func dumpbvtypes(bv *bitvector, base unsafe.Pointer) {
+}
+
+func makeheapobjbv(p uintptr, size uintptr) bitvector {
+	// Extend the temp buffer if necessary.
+	nptr := size / ptrSize
+	if uintptr(len(tmpbuf)) < nptr*_BitsPerPointer/8+1 {
+		if tmpbuf != nil {
+			sysFree(unsafe.Pointer(&tmpbuf[0]), uintptr(len(tmpbuf)), &memstats.other_sys)
+		}
+		n := nptr*_BitsPerPointer/8 + 1
+		p := sysAlloc(n, &memstats.other_sys)
+		if p == nil {
+			gothrow("heapdump: out of memory")
+		}
+		tmpbuf = (*[1 << 30]byte)(p)[:n]
+	}
+	// Copy and compact the bitmap.
+	var i uintptr
+	for i = 0; i < nptr; i++ {
+		off := (p + i*ptrSize - mheap_.arena_start) / ptrSize
+		bitp := (*uint8)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1))
+		shift := uint8((off % wordsPerBitmapByte) * gcBits)
+		bits := (*bitp >> (shift + 2)) & _BitsMask
+		if bits == _BitsDead {
+			break // end of heap object
+		}
+		tmpbuf[i*_BitsPerPointer/8] &^= (_BitsMask << ((i * _BitsPerPointer) % 8))
+		tmpbuf[i*_BitsPerPointer/8] |= bits << ((i * _BitsPerPointer) % 8)
+	}
+	return bitvector{int32(i * _BitsPerPointer), &tmpbuf[0]}
+}
diff --git a/src/runtime/lfstack.c b/src/runtime/lfstack.c
deleted file mode 100644
index 0ced839..0000000
--- a/src/runtime/lfstack.c
+++ /dev/null
@@ -1,85 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Lock-free stack.
-// The following code runs only on g0 stack.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-
-#ifdef _64BIT
-// Amd64 uses 48-bit virtual addresses, 47-th bit is used as kernel/user flag.
-// So we use 17msb of pointers as ABA counter.
-# define PTR_BITS 47
-#else
-# define PTR_BITS 32
-#endif
-#define PTR_MASK ((1ull<<PTR_BITS)-1)
-#define CNT_MASK (0ull-1)
-
-#ifdef _64BIT
-#ifdef GOOS_solaris
-// SPARC64 and Solaris on AMD64 uses all 64 bits of virtual addresses.
-// Use low-order three bits as ABA counter.
-// http://docs.oracle.com/cd/E19120-01/open.solaris/816-5138/6mba6ua5p/index.html
-#undef PTR_BITS
-#undef CNT_MASK
-#undef PTR_MASK
-#define PTR_BITS 0
-#define CNT_MASK 7
-#define PTR_MASK ((0ull-1)<<3)
-#endif
-#endif
-
-void
-runtime·lfstackpush(uint64 *head, LFNode *node)
-{
-	uint64 old, new;
-
-	if((uintptr)node != ((uintptr)node&PTR_MASK)) {
-		runtime·printf("p=%p\n", node);
-		runtime·throw("runtime·lfstackpush: invalid pointer");
-	}
-
-	node->pushcnt++;
-	new = (uint64)(uintptr)node|(((uint64)node->pushcnt&CNT_MASK)<<PTR_BITS);
-	for(;;) {
-		old = runtime·atomicload64(head);
-		node->next = old;
-		if(runtime·cas64(head, old, new))
-			break;
-	}
-}
-
-LFNode*
-runtime·lfstackpop(uint64 *head)
-{
-	LFNode *node;
-	uint64 old, next;
-
-	for(;;) {
-		old = runtime·atomicload64(head);
-		if(old == 0)
-			return nil;
-		node = (LFNode*)(uintptr)(old&PTR_MASK);
-		next = runtime·atomicload64(&node->next);
-
-		if(runtime·cas64(head, old, next))
-			return node;
-	}
-}
-
-void
-runtime·lfstackpush_m(void)
-{
-	runtime·lfstackpush(g->m->ptrarg[0], g->m->ptrarg[1]);
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-}
-
-void
-runtime·lfstackpop_m(void)
-{
-	g->m->ptrarg[0] = runtime·lfstackpop(g->m->ptrarg[0]);
-}
diff --git a/src/runtime/lfstack.go b/src/runtime/lfstack.go
new file mode 100644
index 0000000..a4ad8a1
--- /dev/null
+++ b/src/runtime/lfstack.go
@@ -0,0 +1,36 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Lock-free stack.
+// The following code runs only on g0 stack.
+
+package runtime
+
+import "unsafe"
+
+func lfstackpush(head *uint64, node *lfnode) {
+	node.pushcnt++
+	new := lfstackPack(node, node.pushcnt)
+	for {
+		old := atomicload64(head)
+		node.next = old
+		if cas64(head, old, new) {
+			break
+		}
+	}
+}
+
+func lfstackpop(head *uint64) unsafe.Pointer {
+	for {
+		old := atomicload64(head)
+		if old == 0 {
+			return nil
+		}
+		node, _ := lfstackUnpack(old)
+		next := atomicload64(&node.next)
+		if cas64(head, old, next) {
+			return unsafe.Pointer(node)
+		}
+	}
+}
diff --git a/src/runtime/lfstack_32bit.go b/src/runtime/lfstack_32bit.go
new file mode 100644
index 0000000..61d8678
--- /dev/null
+++ b/src/runtime/lfstack_32bit.go
@@ -0,0 +1,21 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 arm
+
+package runtime
+
+import "unsafe"
+
+// On 32-bit systems, the stored uint64 has a 32-bit pointer and 32-bit count.
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<32 | uint64(cnt)
+}
+
+func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
+	node = (*lfnode)(unsafe.Pointer(uintptr(val >> 32)))
+	cnt = uintptr(val)
+	return
+}
diff --git a/src/runtime/lfstack_amd64.go b/src/runtime/lfstack_amd64.go
new file mode 100644
index 0000000..84e2851
--- /dev/null
+++ b/src/runtime/lfstack_amd64.go
@@ -0,0 +1,24 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// On AMD64, virtual addresses are 48-bit numbers sign extended to 64.
+// We shift the address left 16 to eliminate the sign extended part and make
+// room in the bottom for the count.
+// In addition to the 16 bits taken from the top, we can take 3 from the
+// bottom, because node must be pointer-aligned, giving a total of 19 bits
+// of count.
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<16 | uint64(cnt&(1<<19-1))
+}
+
+func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
+	node = (*lfnode)(unsafe.Pointer(uintptr(int64(val) >> 19 << 3)))
+	cnt = uintptr(val & (1<<19 - 1))
+	return
+}
diff --git a/src/runtime/lfstack_linux_power64x.go b/src/runtime/lfstack_linux_power64x.go
new file mode 100644
index 0000000..7a122bf
--- /dev/null
+++ b/src/runtime/lfstack_linux_power64x.go
@@ -0,0 +1,26 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build power64 power64le
+// +build linux
+
+package runtime
+
+import "unsafe"
+
+// On Power64, Linux limits the user address space to 43 bits.
+// (https://www.kernel.org/doc/ols/2001/ppc64.pdf)
+// In addition to the 21 bits taken from the top, we can take 3 from the
+// bottom, because node must be pointer-aligned, giving a total of 24 bits
+// of count.
+
+func lfstackPack(node *lfnode, cnt uintptr) uint64 {
+	return uint64(uintptr(unsafe.Pointer(node)))<<21 | uint64(cnt&(1<<24-1))
+}
+
+func lfstackUnpack(val uint64) (node *lfnode, cnt uintptr) {
+	node = (*lfnode)(unsafe.Pointer(uintptr(val >> 24 << 3)))
+	cnt = uintptr(val & (1<<24 - 1))
+	return
+}
diff --git a/src/runtime/lock_futex.go b/src/runtime/lock_futex.go
index 7259623..11c3a3f 100644
--- a/src/runtime/lock_futex.go
+++ b/src/runtime/lock_futex.go
@@ -34,9 +34,6 @@
 // Note that there can be spinning threads during all states - they do not
 // affect mutex's state.
 
-func futexsleep(addr *uint32, val uint32, ns int64)
-func futexwakeup(addr *uint32, cnt uint32)
-
 // We use the uintptr mutex.key and note.key as a uint32.
 func key32(p *uintptr) *uint32 {
 	return (*uint32)(unsafe.Pointer(p))
@@ -198,8 +195,8 @@
 		gothrow("notetsleepg on g0")
 	}
 
-	entersyscallblock()
+	entersyscallblock(0)
 	ok := notetsleep_internal(n, ns)
-	exitsyscall()
+	exitsyscall(0)
 	return ok
 }
diff --git a/src/runtime/lock_sema.go b/src/runtime/lock_sema.go
index d136b828..a2a87ba 100644
--- a/src/runtime/lock_sema.go
+++ b/src/runtime/lock_sema.go
@@ -31,10 +31,6 @@
 	passive_spin    = 1
 )
 
-func semacreate() uintptr
-func semasleep(int64) int32
-func semawakeup(mp *m)
-
 func lock(l *mutex) {
 	gp := getg()
 	if gp.m.locks < 0 {
@@ -263,8 +259,8 @@
 	if gp.m.waitsema == 0 {
 		gp.m.waitsema = semacreate()
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	ok := notetsleep_internal(n, ns, nil, 0)
-	exitsyscall()
+	exitsyscall(0)
 	return ok
 }
diff --git a/src/runtime/malloc.c b/src/runtime/malloc.c
deleted file mode 100644
index b79c30b..0000000
--- a/src/runtime/malloc.c
+++ /dev/null
@@ -1,396 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// See malloc.h for overview.
-//
-// TODO(rsc): double-check stats.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "type.h"
-#include "typekind.h"
-#include "race.h"
-#include "stack.h"
-#include "textflag.h"
-
-// Mark mheap as 'no pointers', it does not contain interesting pointers but occupies ~45K.
-#pragma dataflag NOPTR
-MHeap runtime·mheap;
-#pragma dataflag NOPTR
-MStats runtime·memstats;
-
-int32
-runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **sp)
-{
-	uintptr n, i;
-	byte *p;
-	MSpan *s;
-
-	g->m->mcache->local_nlookup++;
-	if (sizeof(void*) == 4 && g->m->mcache->local_nlookup >= (1<<30)) {
-		// purge cache stats to prevent overflow
-		runtime·lock(&runtime·mheap.lock);
-		runtime·purgecachedstats(g->m->mcache);
-		runtime·unlock(&runtime·mheap.lock);
-	}
-
-	s = runtime·MHeap_LookupMaybe(&runtime·mheap, v);
-	if(sp)
-		*sp = s;
-	if(s == nil) {
-		if(base)
-			*base = nil;
-		if(size)
-			*size = 0;
-		return 0;
-	}
-
-	p = (byte*)((uintptr)s->start<<PageShift);
-	if(s->sizeclass == 0) {
-		// Large object.
-		if(base)
-			*base = p;
-		if(size)
-			*size = s->npages<<PageShift;
-		return 1;
-	}
-
-	n = s->elemsize;
-	if(base) {
-		i = ((byte*)v - p)/n;
-		*base = p + i*n;
-	}
-	if(size)
-		*size = n;
-
-	return 1;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·purgecachedstats(MCache *c)
-{
-	MHeap *h;
-	int32 i;
-
-	// Protected by either heap or GC lock.
-	h = &runtime·mheap;
-	mstats.heap_alloc += c->local_cachealloc;
-	c->local_cachealloc = 0;
-	mstats.tinyallocs += c->local_tinyallocs;
-	c->local_tinyallocs = 0;
-	mstats.nlookup += c->local_nlookup;
-	c->local_nlookup = 0;
-	h->largefree += c->local_largefree;
-	c->local_largefree = 0;
-	h->nlargefree += c->local_nlargefree;
-	c->local_nlargefree = 0;
-	for(i=0; i<nelem(c->local_nsmallfree); i++) {
-		h->nsmallfree[i] += c->local_nsmallfree[i];
-		c->local_nsmallfree[i] = 0;
-	}
-}
-
-// Size of the trailing by_size array differs between Go and C,
-// and all data after by_size is local to C, not exported to Go.
-// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
-// sizeof_C_MStats is what C thinks about size of Go struct.
-uintptr runtime·sizeof_C_MStats = offsetof(MStats, by_size[61]);
-
-#define MaxArena32 (2U<<30)
-
-// For use by Go. If it were a C enum it would be made available automatically,
-// but the value of MaxMem is too large for enum.
-uintptr runtime·maxmem = MaxMem;
-
-void
-runtime·mallocinit(void)
-{
-	byte *p, *p1;
-	uintptr arena_size, bitmap_size, spans_size, p_size;
-	extern byte runtime·end[];
-	uintptr limit;
-	uint64 i;
-	bool reserved;
-
-	p = nil;
-	p_size = 0;
-	arena_size = 0;
-	bitmap_size = 0;
-	spans_size = 0;
-	reserved = false;
-
-	// for 64-bit build
-	USED(p);
-	USED(p_size);
-	USED(arena_size);
-	USED(bitmap_size);
-	USED(spans_size);
-
-	runtime·InitSizes();
-
-	if(runtime·class_to_size[TinySizeClass] != TinySize)
-		runtime·throw("bad TinySizeClass");
-
-	// limit = runtime·memlimit();
-	// See https://code.google.com/p/go/issues/detail?id=5049
-	// TODO(rsc): Fix after 1.1.
-	limit = 0;
-
-	// Set up the allocation arena, a contiguous area of memory where
-	// allocated data will be found.  The arena begins with a bitmap large
-	// enough to hold 4 bits per allocated word.
-	if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
-		// On a 64-bit machine, allocate from a single contiguous reservation.
-		// 128 GB (MaxMem) should be big enough for now.
-		//
-		// The code will work with the reservation at any address, but ask
-		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
-		// Allocating a 128 GB region takes away 37 bits, and the amd64
-		// doesn't let us choose the top 17 bits, so that leaves the 11 bits
-		// in the middle of 0x00c0 for us to choose.  Choosing 0x00c0 means
-		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
-		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
-		// UTF-8 sequences, and they are otherwise as far away from 
-		// ff (likely a common byte) as possible.  If that fails, we try other 0xXXc0
-		// addresses.  An earlier attempt to use 0x11f8 caused out of memory errors
-		// on OS X during thread allocations.  0x00c0 causes conflicts with
-		// AddressSanitizer which reserves all memory up to 0x0100.
-		// These choices are both for debuggability and to reduce the
-		// odds of the conservative garbage collector not collecting memory
-		// because some non-pointer block of memory had a bit pattern
-		// that matched a memory address.
-		//
-		// Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
-		// but it hardly matters: e0 00 is not valid UTF-8 either.
-		//
-		// If this fails we fall back to the 32 bit memory mechanism
-		arena_size = MaxMem;
-		bitmap_size = arena_size / (sizeof(void*)*8/4);
-		spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
-		spans_size = ROUND(spans_size, PageSize);
-		for(i = 0; i <= 0x7f; i++) {
-			p = (void*)(i<<40 | 0x00c0ULL<<32);
-			p_size = bitmap_size + spans_size + arena_size + PageSize;
-			p = runtime·SysReserve(p, p_size, &reserved);
-			if(p != nil)
-				break;
-		}
-	}
-	if (p == nil) {
-		// On a 32-bit machine, we can't typically get away
-		// with a giant virtual address space reservation.
-		// Instead we map the memory information bitmap
-		// immediately after the data segment, large enough
-		// to handle another 2GB of mappings (256 MB),
-		// along with a reservation for another 512 MB of memory.
-		// When that gets used up, we'll start asking the kernel
-		// for any memory anywhere and hope it's in the 2GB
-		// following the bitmap (presumably the executable begins
-		// near the bottom of memory, so we'll have to use up
-		// most of memory before the kernel resorts to giving out
-		// memory before the beginning of the text segment).
-		//
-		// Alternatively we could reserve 512 MB bitmap, enough
-		// for 4GB of mappings, and then accept any memory the
-		// kernel threw at us, but normally that's a waste of 512 MB
-		// of address space, which is probably too much in a 32-bit world.
-		bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
-		arena_size = 512<<20;
-		spans_size = MaxArena32 / PageSize * sizeof(runtime·mheap.spans[0]);
-		if(limit > 0 && arena_size+bitmap_size+spans_size > limit) {
-			bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
-			arena_size = bitmap_size * 8;
-			spans_size = arena_size / PageSize * sizeof(runtime·mheap.spans[0]);
-		}
-		spans_size = ROUND(spans_size, PageSize);
-
-		// SysReserve treats the address we ask for, end, as a hint,
-		// not as an absolute requirement.  If we ask for the end
-		// of the data segment but the operating system requires
-		// a little more space before we can start allocating, it will
-		// give out a slightly higher pointer.  Except QEMU, which
-		// is buggy, as usual: it won't adjust the pointer upward.
-		// So adjust it upward a little bit ourselves: 1/4 MB to get
-		// away from the running binary image and then round up
-		// to a MB boundary.
-		p = (byte*)ROUND((uintptr)runtime·end + (1<<18), 1<<20);
-		p_size = bitmap_size + spans_size + arena_size + PageSize;
-		p = runtime·SysReserve(p, p_size, &reserved);
-		if(p == nil)
-			runtime·throw("runtime: cannot reserve arena virtual address space");
-	}
-
-	// PageSize can be larger than OS definition of page size,
-	// so SysReserve can give us a PageSize-unaligned pointer.
-	// To overcome this we ask for PageSize more and round up the pointer.
-	p1 = (byte*)ROUND((uintptr)p, PageSize);
-
-	runtime·mheap.spans = (MSpan**)p1;
-	runtime·mheap.bitmap = p1 + spans_size;
-	runtime·mheap.arena_start = p1 + spans_size + bitmap_size;
-	runtime·mheap.arena_used = runtime·mheap.arena_start;
-	runtime·mheap.arena_end = p + p_size;
-	runtime·mheap.arena_reserved = reserved;
-
-	if(((uintptr)runtime·mheap.arena_start & (PageSize-1)) != 0)
-		runtime·throw("misrounded allocation in mallocinit");
-
-	// Initialize the rest of the allocator.	
-	runtime·MHeap_Init(&runtime·mheap);
-	g->m->mcache = runtime·allocmcache();
-}
-
-void*
-runtime·MHeap_SysAlloc(MHeap *h, uintptr n)
-{
-	byte *p, *p_end;
-	uintptr p_size;
-	bool reserved;
-
-	if(n > h->arena_end - h->arena_used) {
-		// We are in 32-bit mode, maybe we didn't use all possible address space yet.
-		// Reserve some more space.
-		byte *new_end;
-
-		p_size = ROUND(n + PageSize, 256<<20);
-		new_end = h->arena_end + p_size;
-		if(new_end <= h->arena_start + MaxArena32) {
-			// TODO: It would be bad if part of the arena
-			// is reserved and part is not.
-			p = runtime·SysReserve(h->arena_end, p_size, &reserved);
-			if(p == h->arena_end) {
-				h->arena_end = new_end;
-				h->arena_reserved = reserved;
-			}
-			else if(p+p_size <= h->arena_start + MaxArena32) {
-				// Keep everything page-aligned.
-				// Our pages are bigger than hardware pages.
-				h->arena_end = p+p_size;
-				h->arena_used = p + (-(uintptr)p&(PageSize-1));
-				h->arena_reserved = reserved;
-			} else {
-				uint64 stat;
-				stat = 0;
-				runtime·SysFree(p, p_size, &stat);
-			}
-		}
-	}
-	if(n <= h->arena_end - h->arena_used) {
-		// Keep taking from our reservation.
-		p = h->arena_used;
-		runtime·SysMap(p, n, h->arena_reserved, &mstats.heap_sys);
-		h->arena_used += n;
-		runtime·MHeap_MapBits(h);
-		runtime·MHeap_MapSpans(h);
-		if(raceenabled)
-			runtime·racemapshadow(p, n);
-		
-		if(((uintptr)p & (PageSize-1)) != 0)
-			runtime·throw("misrounded allocation in MHeap_SysAlloc");
-		return p;
-	}
-	
-	// If using 64-bit, our reservation is all we have.
-	if(h->arena_end - h->arena_start >= MaxArena32)
-		return nil;
-
-	// On 32-bit, once the reservation is gone we can
-	// try to get memory at a location chosen by the OS
-	// and hope that it is in the range we allocated bitmap for.
-	p_size = ROUND(n, PageSize) + PageSize;
-	p = runtime·sysAlloc(p_size, &mstats.heap_sys);
-	if(p == nil)
-		return nil;
-
-	if(p < h->arena_start || p+p_size - h->arena_start >= MaxArena32) {
-		runtime·printf("runtime: memory allocated by OS (%p) not in usable range [%p,%p)\n",
-			p, h->arena_start, h->arena_start+MaxArena32);
-		runtime·SysFree(p, p_size, &mstats.heap_sys);
-		return nil;
-	}
-	
-	p_end = p + p_size;
-	p += -(uintptr)p & (PageSize-1);
-	if(p+n > h->arena_used) {
-		h->arena_used = p+n;
-		if(p_end > h->arena_end)
-			h->arena_end = p_end;
-		runtime·MHeap_MapBits(h);
-		runtime·MHeap_MapSpans(h);
-		if(raceenabled)
-			runtime·racemapshadow(p, n);
-	}
-	
-	if(((uintptr)p & (PageSize-1)) != 0)
-		runtime·throw("misrounded allocation in MHeap_SysAlloc");
-	return p;
-}
-
-void
-runtime·setFinalizer_m(void)
-{
-	FuncVal *fn;
-	void *arg;
-	uintptr nret;
-	Type *fint;
-	PtrType *ot;
-
-	fn = g->m->ptrarg[0];
-	arg = g->m->ptrarg[1];
-	nret = g->m->scalararg[0];
-	fint = g->m->ptrarg[2];
-	ot = g->m->ptrarg[3];
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-	g->m->ptrarg[2] = nil;
-	g->m->ptrarg[3] = nil;
-
-	g->m->scalararg[0] = runtime·addfinalizer(arg, fn, nret, fint, ot);
-}
-
-void
-runtime·removeFinalizer_m(void)
-{
-	void *p;
-
-	p = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	runtime·removefinalizer(p);
-}
-
-// mcallable cache refill
-void 
-runtime·mcacheRefill_m(void)
-{
-	runtime·MCache_Refill(g->m->mcache, (int32)g->m->scalararg[0]);
-}
-
-void
-runtime·largeAlloc_m(void)
-{
-	uintptr npages, size;
-	MSpan *s;
-	void *v;
-	int32 flag;
-
-	//runtime·printf("largeAlloc size=%D\n", g->m->scalararg[0]);
-	// Allocate directly from heap.
-	size = g->m->scalararg[0];
-	flag = (int32)g->m->scalararg[1];
-	if(size + PageSize < size)
-		runtime·throw("out of memory");
-	npages = size >> PageShift;
-	if((size & PageMask) != 0)
-		npages++;
-	s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero));
-	if(s == nil)
-		runtime·throw("out of memory");
-	s->limit = (byte*)(s->start<<PageShift) + size;
-	v = (void*)(s->start << PageShift);
-	// setup for mark sweep
-	runtime·markspan(v, 0, 0, true);
-	g->m->ptrarg[0] = s;
-}
diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go
index fab8cf2..f90a8f8 100644
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@@ -26,10 +26,11 @@
 	maxGCMask       = _MaxGCMask
 	bitsDead        = _BitsDead
 	bitsPointer     = _BitsPointer
+	bitsScalar      = _BitsScalar
 
 	mSpanInUse = _MSpanInUse
 
-	concurrentSweep = _ConcurrentSweep != 0
+	concurrentSweep = _ConcurrentSweep
 )
 
 // Page number (address>>pageShift)
@@ -54,7 +55,7 @@
 	// This function must be atomic wrt GC, but for performance reasons
 	// we don't acquirem/releasem on fast path. The code below does not have
 	// split stack checks, so it can't be preempted by GC.
-	// Functions like roundup/add are inlined. And onM/racemalloc are nosplit.
+	// Functions like roundup/add are inlined. And systemstack/racemalloc are nosplit.
 	// If debugMalloc = true, these assumptions are checked below.
 	if debugMalloc {
 		mp := acquirem()
@@ -140,10 +141,9 @@
 			s = c.alloc[tinySizeClass]
 			v := s.freelist
 			if v == nil {
-				mp := acquirem()
-				mp.scalararg[0] = tinySizeClass
-				onM(mcacheRefill_m)
-				releasem(mp)
+				systemstack(func() {
+					mCache_Refill(c, tinySizeClass)
+				})
 				s = c.alloc[tinySizeClass]
 				v = s.freelist
 			}
@@ -171,10 +171,9 @@
 			s = c.alloc[sizeclass]
 			v := s.freelist
 			if v == nil {
-				mp := acquirem()
-				mp.scalararg[0] = uintptr(sizeclass)
-				onM(mcacheRefill_m)
-				releasem(mp)
+				systemstack(func() {
+					mCache_Refill(c, int32(sizeclass))
+				})
 				s = c.alloc[sizeclass]
 				v = s.freelist
 			}
@@ -191,13 +190,10 @@
 		}
 		c.local_cachealloc += intptr(size)
 	} else {
-		mp := acquirem()
-		mp.scalararg[0] = uintptr(size)
-		mp.scalararg[1] = uintptr(flags)
-		onM(largeAlloc_m)
-		s = (*mspan)(mp.ptrarg[0])
-		mp.ptrarg[0] = nil
-		releasem(mp)
+		var s *mspan
+		systemstack(func() {
+			s = largeAlloc(size, uint32(flags))
+		})
 		x = unsafe.Pointer(uintptr(s.start << pageShift))
 		size = uintptr(s.elemsize)
 	}
@@ -251,13 +247,9 @@
 				// into the GC bitmap. It's 7 times slower than copying
 				// from the pre-unrolled mask, but saves 1/16 of type size
 				// memory for the mask.
-				mp := acquirem()
-				mp.ptrarg[0] = x
-				mp.ptrarg[1] = unsafe.Pointer(typ)
-				mp.scalararg[0] = uintptr(size)
-				mp.scalararg[1] = uintptr(size0)
-				onM(unrollgcproginplace_m)
-				releasem(mp)
+				systemstack(func() {
+					unrollgcproginplace_m(x, typ, size, size0)
+				})
 				goto marked
 			}
 			ptrmask = (*uint8)(unsafe.Pointer(uintptr(typ.gc[0])))
@@ -265,10 +257,9 @@
 			// by checking if the unroll flag byte is set
 			maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
 			if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
-				mp := acquirem()
-				mp.ptrarg[0] = unsafe.Pointer(typ)
-				onM(unrollgcprog_m)
-				releasem(mp)
+				systemstack(func() {
+					unrollgcprog_m(typ)
+				})
 			}
 			ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
 		} else {
@@ -312,10 +303,9 @@
 	// This may be racing with GC so do it atomically if there can be
 	// a race marking the bit.
 	if gcphase == _GCmarktermination {
-		mp := acquirem()
-		mp.ptrarg[0] = x
-		onM(gcmarknewobject_m)
-		releasem(mp)
+		systemstack(func() {
+			gcmarknewobject_m(uintptr(x))
+		})
 	}
 
 	if raceenabled {
@@ -377,10 +367,9 @@
 		// by checking if the unroll flag byte is set
 		maskword := uintptr(atomicloadp(unsafe.Pointer(ptrmask)))
 		if *(*uint8)(unsafe.Pointer(&maskword)) == 0 {
-			mp := acquirem()
-			mp.ptrarg[0] = unsafe.Pointer(typ)
-			onM(unrollgcprog_m)
-			releasem(mp)
+			systemstack(func() {
+				unrollgcprog_m(typ)
+			})
 		}
 		ptrmask = (*uint8)(add(unsafe.Pointer(ptrmask), 1)) // skip the unroll flag byte
 	} else {
@@ -404,7 +393,7 @@
 	if typ.kind&kindNoPointers != 0 {
 		flags |= flagNoScan
 	}
-	if int(n) < 0 || (typ.size > 0 && n > maxmem/uintptr(typ.size)) {
+	if int(n) < 0 || (typ.size > 0 && n > _MaxMem/uintptr(typ.size)) {
 		panic("runtime: allocation size out of range")
 	}
 	return mallocgc(uintptr(typ.size)*n, typ, flags)
@@ -484,19 +473,20 @@
 	mp.gcing = 1
 	releasem(mp)
 
-	onM(stoptheworld)
-	onM(finishsweep_m) // finish sweep before we start concurrent scan.
-	if false {         // To turn on concurrent scan and mark set to true...
-		onM(starttheworld)
+	systemstack(stoptheworld)
+	systemstack(finishsweep_m) // finish sweep before we start concurrent scan.
+	if false {                 // To turn on concurrent scan and mark set to true...
+		systemstack(starttheworld)
 		// Do a concurrent heap scan before we stop the world.
-		onM(gcscan_m)
-		onM(stoptheworld)
-		onM(gcinstallmarkwb_m)
-		onM(starttheworld)
-		onM(gcmark_m)
-		onM(stoptheworld)
-		onM(gcinstalloffwb_m)
+		systemstack(gcscan_m)
+		systemstack(stoptheworld)
+		systemstack(gcinstallmarkwb_m)
+		systemstack(starttheworld)
+		systemstack(gcmark_m)
+		systemstack(stoptheworld)
+		systemstack(gcinstalloffwb_m)
 	}
+
 	if mp != acquirem() {
 		gothrow("gogc: rescheduled")
 	}
@@ -512,27 +502,25 @@
 	if debug.gctrace > 1 {
 		n = 2
 	}
+	eagersweep := force >= 2
 	for i := 0; i < n; i++ {
 		if i > 0 {
 			startTime = nanotime()
 		}
 		// switch to g0, call gc, then switch back
-		mp.scalararg[0] = uintptr(uint32(startTime)) // low 32 bits
-		mp.scalararg[1] = uintptr(startTime >> 32)   // high 32 bits
-		if force >= 2 {
-			mp.scalararg[2] = 1 // eagersweep
-		} else {
-			mp.scalararg[2] = 0
-		}
-		onM(gc_m)
+		systemstack(func() {
+			gc_m(startTime, eagersweep)
+		})
 	}
 
-	onM(gccheckmark_m)
+	systemstack(func() {
+		gccheckmark_m(startTime, eagersweep)
+	})
 
 	// all done
 	mp.gcing = 0
 	semrelease(&worldsema)
-	onM(starttheworld)
+	systemstack(starttheworld)
 	releasem(mp)
 	mp = nil
 
@@ -544,11 +532,11 @@
 }
 
 func GCcheckmarkenable() {
-	onM(gccheckmarkenable_m)
+	systemstack(gccheckmarkenable_m)
 }
 
 func GCcheckmarkdisable() {
-	onM(gccheckmarkdisable_m)
+	systemstack(gccheckmarkdisable_m)
 }
 
 // GC runs a garbage collection.
@@ -652,11 +640,10 @@
 	f := (*eface)(unsafe.Pointer(&finalizer))
 	ftyp := f._type
 	if ftyp == nil {
-		// switch to M stack and remove finalizer
-		mp := acquirem()
-		mp.ptrarg[0] = e.data
-		onM(removeFinalizer_m)
-		releasem(mp)
+		// switch to system stack and remove finalizer
+		systemstack(func() {
+			removefinalizer(e.data)
+		})
 		return
 	}
 
@@ -701,18 +688,11 @@
 	// make sure we have a finalizer goroutine
 	createfing()
 
-	// switch to M stack to add finalizer record
-	mp := acquirem()
-	mp.ptrarg[0] = f.data
-	mp.ptrarg[1] = e.data
-	mp.scalararg[0] = nret
-	mp.ptrarg[2] = unsafe.Pointer(fint)
-	mp.ptrarg[3] = unsafe.Pointer(ot)
-	onM(setFinalizer_m)
-	if mp.scalararg[0] != 1 {
-		gothrow("runtime.SetFinalizer: finalizer already set")
-	}
-	releasem(mp)
+	systemstack(func() {
+		if !addfinalizer(e.data, (*funcval)(f.data), nret, fint, ot) {
+			gothrow("runtime.SetFinalizer: finalizer already set")
+		}
+	})
 }
 
 // round n up to a multiple of a.  a must be a power of 2.
diff --git a/src/runtime/malloc.h b/src/runtime/malloc.h
deleted file mode 100644
index 522b11b..0000000
--- a/src/runtime/malloc.h
+++ /dev/null
@@ -1,620 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Memory allocator, based on tcmalloc.
-// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
-
-// The main allocator works in runs of pages.
-// Small allocation sizes (up to and including 32 kB) are
-// rounded to one of about 100 size classes, each of which
-// has its own free list of objects of exactly that size.
-// Any free page of memory can be split into a set of objects
-// of one size class, which are then managed using free list
-// allocators.
-//
-// The allocator's data structures are:
-//
-//	FixAlloc: a free-list allocator for fixed-size objects,
-//		used to manage storage used by the allocator.
-//	MHeap: the malloc heap, managed at page (4096-byte) granularity.
-//	MSpan: a run of pages managed by the MHeap.
-//	MCentral: a shared free list for a given size class.
-//	MCache: a per-thread (in Go, per-P) cache for small objects.
-//	MStats: allocation statistics.
-//
-// Allocating a small object proceeds up a hierarchy of caches:
-//
-//	1. Round the size up to one of the small size classes
-//	   and look in the corresponding MCache free list.
-//	   If the list is not empty, allocate an object from it.
-//	   This can all be done without acquiring a lock.
-//
-//	2. If the MCache free list is empty, replenish it by
-//	   taking a bunch of objects from the MCentral free list.
-//	   Moving a bunch amortizes the cost of acquiring the MCentral lock.
-//
-//	3. If the MCentral free list is empty, replenish it by
-//	   allocating a run of pages from the MHeap and then
-//	   chopping that memory into a objects of the given size.
-//	   Allocating many objects amortizes the cost of locking
-//	   the heap.
-//
-//	4. If the MHeap is empty or has no page runs large enough,
-//	   allocate a new group of pages (at least 1MB) from the
-//	   operating system.  Allocating a large run of pages
-//	   amortizes the cost of talking to the operating system.
-//
-// Freeing a small object proceeds up the same hierarchy:
-//
-//	1. Look up the size class for the object and add it to
-//	   the MCache free list.
-//
-//	2. If the MCache free list is too long or the MCache has
-//	   too much memory, return some to the MCentral free lists.
-//
-//	3. If all the objects in a given span have returned to
-//	   the MCentral list, return that span to the page heap.
-//
-//	4. If the heap has too much memory, return some to the
-//	   operating system.
-//
-//	TODO(rsc): Step 4 is not implemented.
-//
-// Allocating and freeing a large object uses the page heap
-// directly, bypassing the MCache and MCentral free lists.
-//
-// The small objects on the MCache and MCentral free lists
-// may or may not be zeroed.  They are zeroed if and only if
-// the second word of the object is zero.  A span in the
-// page heap is zeroed unless s->needzero is set. When a span
-// is allocated to break into small objects, it is zeroed if needed
-// and s->needzero is set. There are two main benefits to delaying the
-// zeroing this way:
-//
-//	1. stack frames allocated from the small object lists
-//	   or the page heap can avoid zeroing altogether.
-//	2. the cost of zeroing when reusing a small object is
-//	   charged to the mutator, not the garbage collector.
-//
-// This C code was written with an eye toward translating to Go
-// in the future.  Methods have the form Type_Method(Type *t, ...).
-
-typedef struct MCentral	MCentral;
-typedef struct MHeap	MHeap;
-typedef struct MSpan	MSpan;
-typedef struct MStats	MStats;
-typedef struct MLink	MLink;
-typedef struct GCStats	GCStats;
-typedef struct Workbuf  Workbuf;
-
-enum
-{
-	PageShift	= 13,
-	PageSize	= 1<<PageShift,
-	PageMask	= PageSize - 1,
-};
-typedef	uintptr	pageID;		// address >> PageShift
-
-enum
-{
-	// Computed constant.  The definition of MaxSmallSize and the
-	// algorithm in msize.c produce some number of different allocation
-	// size classes.  NumSizeClasses is that number.  It's needed here
-	// because there are static arrays of this length; when msize runs its
-	// size choosing algorithm it double-checks that NumSizeClasses agrees.
-	NumSizeClasses = 67,
-
-	// Tunable constants.
-	MaxSmallSize = 32<<10,
-
-	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
-	TinySize = 16,
-	TinySizeClass = 2,
-
-	FixAllocChunk = 16<<10,		// Chunk size for FixAlloc
-	MaxMHeapList = 1<<(20 - PageShift),	// Maximum page length for fixed-size list in MHeap.
-	HeapAllocChunk = 1<<20,		// Chunk size for heap growth
-
-	// Per-P, per order stack segment cache size.
-	StackCacheSize = 32*1024,
-	// Number of orders that get caching.  Order 0 is FixedStack
-	// and each successive order is twice as large.
-	NumStackOrders = 3,
-
-	// Number of bits in page to span calculations (4k pages).
-	// On Windows 64-bit we limit the arena to 32GB or 35 bits (see below for reason).
-	// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
-	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
-#ifdef _64BIT
-#ifdef GOOS_windows
-	// Windows counts memory used by page table into committed memory
-	// of the process, so we can't reserve too much memory.
-	// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
-	MHeapMap_Bits = 35 - PageShift,
-#else
-	MHeapMap_Bits = 37 - PageShift,
-#endif
-#else
-	MHeapMap_Bits = 32 - PageShift,
-#endif
-
-	// Max number of threads to run garbage collection.
-	// 2, 3, and 4 are all plausible maximums depending
-	// on the hardware details of the machine.  The garbage
-	// collector scales well to 32 cpus.
-	MaxGcproc = 32,
-};
-
-// Maximum memory allocation size, a hint for callers.
-// This must be a #define instead of an enum because it
-// is so large.
-#ifdef _64BIT
-#define	MaxMem	(1ULL<<(MHeapMap_Bits+PageShift))	/* 128 GB or 32 GB */
-#else
-#define	MaxMem	((uintptr)-1)
-#endif
-
-// A generic linked list of blocks.  (Typically the block is bigger than sizeof(MLink).)
-struct MLink
-{
-	MLink *next;
-};
-
-// sysAlloc obtains a large chunk of zeroed memory from the
-// operating system, typically on the order of a hundred kilobytes
-// or a megabyte.
-// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysAlloc.
-//
-// SysUnused notifies the operating system that the contents
-// of the memory region are no longer needed and can be reused
-// for other purposes.
-// SysUsed notifies the operating system that the contents
-// of the memory region are needed again.
-//
-// SysFree returns it unconditionally; this is only used if
-// an out-of-memory error has been detected midway through
-// an allocation.  It is okay if SysFree is a no-op.
-//
-// SysReserve reserves address space without allocating memory.
-// If the pointer passed to it is non-nil, the caller wants the
-// reservation there, but SysReserve can still choose another
-// location if that one is unavailable.  On some systems and in some
-// cases SysReserve will simply check that the address space is
-// available and not actually reserve it.  If SysReserve returns
-// non-nil, it sets *reserved to true if the address space is
-// reserved, false if it has merely been checked.
-// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
-// may use larger alignment, so the caller must be careful to realign the
-// memory obtained by sysAlloc.
-//
-// SysMap maps previously reserved address space for use.
-// The reserved argument is true if the address space was really
-// reserved, not merely checked.
-//
-// SysFault marks a (already sysAlloc'd) region to fault
-// if accessed.  Used only for debugging the runtime.
-
-void*	runtime·sysAlloc(uintptr nbytes, uint64 *stat);
-void	runtime·SysFree(void *v, uintptr nbytes, uint64 *stat);
-void	runtime·SysUnused(void *v, uintptr nbytes);
-void	runtime·SysUsed(void *v, uintptr nbytes);
-void	runtime·SysMap(void *v, uintptr nbytes, bool reserved, uint64 *stat);
-void*	runtime·SysReserve(void *v, uintptr nbytes, bool *reserved);
-void	runtime·SysFault(void *v, uintptr nbytes);
-
-// FixAlloc is a simple free-list allocator for fixed size objects.
-// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
-// MCache and MSpan objects.
-//
-// Memory returned by FixAlloc_Alloc is not zeroed.
-// The caller is responsible for locking around FixAlloc calls.
-// Callers can keep state in the object but the first word is
-// smashed by freeing and reallocating.
-struct FixAlloc
-{
-	uintptr	size;
-	void	(*first)(void *arg, byte *p);	// called first time p is returned
-	void*	arg;
-	MLink*	list;
-	byte*	chunk;
-	uint32	nchunk;
-	uintptr	inuse;	// in-use bytes now
-	uint64*	stat;
-};
-
-void	runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat);
-void*	runtime·FixAlloc_Alloc(FixAlloc *f);
-void	runtime·FixAlloc_Free(FixAlloc *f, void *p);
-
-
-// Statistics.
-// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
-struct MStats
-{
-	// General statistics.
-	uint64	alloc;		// bytes allocated and still in use
-	uint64	total_alloc;	// bytes allocated (even if freed)
-	uint64	sys;		// bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
-	uint64	nlookup;	// number of pointer lookups
-	uint64	nmalloc;	// number of mallocs
-	uint64	nfree;  // number of frees
-
-	// Statistics about malloc heap.
-	// protected by mheap.lock
-	uint64	heap_alloc;	// bytes allocated and still in use
-	uint64	heap_sys;	// bytes obtained from system
-	uint64	heap_idle;	// bytes in idle spans
-	uint64	heap_inuse;	// bytes in non-idle spans
-	uint64	heap_released;	// bytes released to the OS
-	uint64	heap_objects;	// total number of allocated objects
-
-	// Statistics about allocation of low-level fixed-size structures.
-	// Protected by FixAlloc locks.
-	uint64	stacks_inuse;	// this number is included in heap_inuse above
-	uint64	stacks_sys;	// always 0 in mstats
-	uint64	mspan_inuse;	// MSpan structures
-	uint64	mspan_sys;
-	uint64	mcache_inuse;	// MCache structures
-	uint64	mcache_sys;
-	uint64	buckhash_sys;	// profiling bucket hash table
-	uint64	gc_sys;
-	uint64	other_sys;
-
-	// Statistics about garbage collector.
-	// Protected by mheap or stopping the world during GC.
-	uint64	next_gc;	// next GC (in heap_alloc time)
-	uint64  last_gc;	// last GC (in absolute time)
-	uint64	pause_total_ns;
-	uint64	pause_ns[256];  // circular buffer of recent GC pause lengths
-	uint64	pause_end[256]; // circular buffer of recent GC end times (nanoseconds since 1970)
-	uint32	numgc;
-	bool	enablegc;
-	bool	debuggc;
-
-	// Statistics about allocation size classes.
-	
-	struct MStatsBySize {
-		uint32 size;
-		uint64 nmalloc;
-		uint64 nfree;
-	} by_size[NumSizeClasses];
-	
-	uint64	tinyallocs;	// number of tiny allocations that didn't cause actual allocation; not exported to Go directly
-};
-
-
-#define mstats runtime·memstats
-extern MStats mstats;
-void	runtime·updatememstats(GCStats *stats);
-void	runtime·ReadMemStats(MStats *stats);
-
-// Size classes.  Computed and initialized by InitSizes.
-//
-// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
-//	1 <= sizeclass < NumSizeClasses, for n.
-//	Size class 0 is reserved to mean "not small".
-//
-// class_to_size[i] = largest size in class i
-// class_to_allocnpages[i] = number of pages to allocate when
-//	making new objects in class i
-
-int32	runtime·SizeToClass(int32);
-uintptr	runtime·roundupsize(uintptr);
-extern	int32	runtime·class_to_size[NumSizeClasses];
-extern	int32	runtime·class_to_allocnpages[NumSizeClasses];
-extern	int8	runtime·size_to_class8[1024/8 + 1];
-extern	int8	runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
-extern	void	runtime·InitSizes(void);
-
-typedef struct MCacheList MCacheList;
-struct MCacheList
-{
-	MLink *list;
-	uint32 nlist;
-};
-
-typedef struct StackFreeList StackFreeList;
-struct StackFreeList
-{
-	MLink *list;  // linked list of free stacks
-	uintptr size; // total size of stacks in list
-};
-
-typedef struct SudoG SudoG;
-
-// Per-thread (in Go, per-P) cache for small objects.
-// No locking needed because it is per-thread (per-P).
-struct MCache
-{
-	// The following members are accessed on every malloc,
-	// so they are grouped here for better caching.
-	int32 next_sample;		// trigger heap sample after allocating this many bytes
-	intptr local_cachealloc;	// bytes allocated (or freed) from cache since last lock of heap
-	// Allocator cache for tiny objects w/o pointers.
-	// See "Tiny allocator" comment in malloc.goc.
-	byte*	tiny;
-	uintptr	tinysize;
-	uintptr	local_tinyallocs;	// number of tiny allocs not counted in other stats
-	// The rest is not accessed on every malloc.
-	MSpan*	alloc[NumSizeClasses];	// spans to allocate from
-
-	StackFreeList stackcache[NumStackOrders];
-
-	SudoG*	sudogcache;
-
-	// Local allocator stats, flushed during GC.
-	uintptr local_nlookup;		// number of pointer lookups
-	uintptr local_largefree;	// bytes freed for large objects (>MaxSmallSize)
-	uintptr local_nlargefree;	// number of frees for large objects (>MaxSmallSize)
-	uintptr local_nsmallfree[NumSizeClasses];	// number of frees for small objects (<=MaxSmallSize)
-};
-
-MSpan*	runtime·MCache_Refill(MCache *c, int32 sizeclass);
-void	runtime·MCache_ReleaseAll(MCache *c);
-void	runtime·stackcache_clear(MCache *c);
-void	runtime·gcworkbuffree(Workbuf *b);
-
-enum
-{
-	KindSpecialFinalizer = 1,
-	KindSpecialProfile = 2,
-	// Note: The finalizer special must be first because if we're freeing
-	// an object, a finalizer special will cause the freeing operation
-	// to abort, and we want to keep the other special records around
-	// if that happens.
-};
-
-typedef struct Special Special;
-struct Special
-{
-	Special*	next;	// linked list in span
-	uint16		offset;	// span offset of object
-	byte		kind;	// kind of Special
-};
-
-// The described object has a finalizer set for it.
-typedef struct SpecialFinalizer SpecialFinalizer;
-struct SpecialFinalizer
-{
-	Special		special;
-	FuncVal*	fn;
-	uintptr		nret;
-	Type*		fint;
-	PtrType*	ot;
-};
-
-// The described object is being heap profiled.
-typedef struct Bucket Bucket; // from mprof.h
-typedef struct SpecialProfile SpecialProfile;
-struct SpecialProfile
-{
-	Special	special;
-	Bucket*	b;
-};
-
-// An MSpan is a run of pages.
-enum
-{
-	MSpanInUse = 0, // allocated for garbage collected heap
-	MSpanStack,     // allocated for use by stack allocator
-	MSpanFree,
-	MSpanListHead,
-	MSpanDead,
-};
-struct MSpan
-{
-	MSpan	*next;		// in a span linked list
-	MSpan	*prev;		// in a span linked list
-	pageID	start;		// starting page number
-	uintptr	npages;		// number of pages in span
-	MLink	*freelist;	// list of free objects
-	// sweep generation:
-	// if sweepgen == h->sweepgen - 2, the span needs sweeping
-	// if sweepgen == h->sweepgen - 1, the span is currently being swept
-	// if sweepgen == h->sweepgen, the span is swept and ready to use
-	// h->sweepgen is incremented by 2 after every GC
-	uint32	sweepgen;
-	uint16	ref;		// capacity - number of objects in freelist
-	uint8	sizeclass;	// size class
-	bool	incache;	// being used by an MCache
-	uint8	state;		// MSpanInUse etc
-	uint8	needzero;	// needs to be zeroed before allocation
-	uintptr	elemsize;	// computed from sizeclass or from npages
-	int64   unusedsince;	// First time spotted by GC in MSpanFree state
-	uintptr npreleased;	// number of pages released to the OS
-	byte	*limit;		// end of data in span
-	Mutex	specialLock;	// guards specials list
-	Special	*specials;	// linked list of special records sorted by offset.
-};
-
-void	runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages);
-void	runtime·MSpan_EnsureSwept(MSpan *span);
-bool	runtime·MSpan_Sweep(MSpan *span, bool preserve);
-
-// Every MSpan is in one doubly-linked list,
-// either one of the MHeap's free lists or one of the
-// MCentral's span lists.  We use empty MSpan structures as list heads.
-void	runtime·MSpanList_Init(MSpan *list);
-bool	runtime·MSpanList_IsEmpty(MSpan *list);
-void	runtime·MSpanList_Insert(MSpan *list, MSpan *span);
-void	runtime·MSpanList_InsertBack(MSpan *list, MSpan *span);
-void	runtime·MSpanList_Remove(MSpan *span);	// from whatever list it is in
-
-
-// Central list of free objects of a given size.
-struct MCentral
-{
-	Mutex  lock;
-	int32 sizeclass;
-	MSpan nonempty;	// list of spans with a free object
-	MSpan empty;	// list of spans with no free objects (or cached in an MCache)
-};
-
-void	runtime·MCentral_Init(MCentral *c, int32 sizeclass);
-MSpan*	runtime·MCentral_CacheSpan(MCentral *c);
-void	runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s);
-bool	runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve);
-
-// Main malloc heap.
-// The heap itself is the "free[]" and "large" arrays,
-// but all the other global data is here too.
-struct MHeap
-{
-	Mutex  lock;
-	MSpan free[MaxMHeapList];	// free lists of given length
-	MSpan freelarge;		// free lists length >= MaxMHeapList
-	MSpan busy[MaxMHeapList];	// busy lists of large objects of given length
-	MSpan busylarge;		// busy lists of large objects length >= MaxMHeapList
-	MSpan **allspans;		// all spans out there
-	MSpan **gcspans;		// copy of allspans referenced by GC marker or sweeper
-	uint32	nspan;
-	uint32	nspancap;
-	uint32	sweepgen;		// sweep generation, see comment in MSpan
-	uint32	sweepdone;		// all spans are swept
-
-	// span lookup
-	MSpan**	spans;
-	uintptr	spans_mapped;
-
-	// range of addresses we might see in the heap
-	byte *bitmap;
-	uintptr bitmap_mapped;
-	byte *arena_start;
-	byte *arena_used;
-	byte *arena_end;
-	bool arena_reserved;
-
-	// central free lists for small size classes.
-	// the padding makes sure that the MCentrals are
-	// spaced CacheLineSize bytes apart, so that each MCentral.lock
-	// gets its own cache line.
-	struct MHeapCentral {
-		MCentral mcentral;
-		byte pad[CacheLineSize];
-	} central[NumSizeClasses];
-
-	FixAlloc spanalloc;	// allocator for Span*
-	FixAlloc cachealloc;	// allocator for MCache*
-	FixAlloc specialfinalizeralloc;	// allocator for SpecialFinalizer*
-	FixAlloc specialprofilealloc;	// allocator for SpecialProfile*
-	Mutex speciallock; // lock for sepcial record allocators.
-
-	// Malloc stats.
-	uint64 largefree;	// bytes freed for large objects (>MaxSmallSize)
-	uint64 nlargefree;	// number of frees for large objects (>MaxSmallSize)
-	uint64 nsmallfree[NumSizeClasses];	// number of frees for small objects (<=MaxSmallSize)
-};
-#define runtime·mheap runtime·mheap_
-extern MHeap runtime·mheap;
-
-void	runtime·MHeap_Init(MHeap *h);
-MSpan*	runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero);
-MSpan*	runtime·MHeap_AllocStack(MHeap *h, uintptr npage);
-void	runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct);
-void	runtime·MHeap_FreeStack(MHeap *h, MSpan *s);
-MSpan*	runtime·MHeap_Lookup(MHeap *h, void *v);
-MSpan*	runtime·MHeap_LookupMaybe(MHeap *h, void *v);
-void*	runtime·MHeap_SysAlloc(MHeap *h, uintptr n);
-void	runtime·MHeap_MapBits(MHeap *h);
-void	runtime·MHeap_MapSpans(MHeap *h);
-void	runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit);
-
-void*	runtime·persistentalloc(uintptr size, uintptr align, uint64 *stat);
-int32	runtime·mlookup(void *v, byte **base, uintptr *size, MSpan **s);
-uintptr	runtime·sweepone(void);
-void	runtime·markspan(void *v, uintptr size, uintptr n, bool leftover);
-void	runtime·unmarkspan(void *v, uintptr size);
-void	runtime·purgecachedstats(MCache*);
-void	runtime·tracealloc(void*, uintptr, Type*);
-void	runtime·tracefree(void*, uintptr);
-void	runtime·tracegc(void);
-
-int32	runtime·gcpercent;
-int32	runtime·readgogc(void);
-void	runtime·clearpools(void);
-
-enum
-{
-	// flags to malloc
-	FlagNoScan	= 1<<0,	// GC doesn't have to scan object
-	FlagNoZero	= 1<<1, // don't zero memory
-};
-
-void	runtime·mProf_Malloc(void*, uintptr);
-void	runtime·mProf_Free(Bucket*, uintptr, bool);
-void	runtime·mProf_GC(void);
-void	runtime·iterate_memprof(void (**callback)(Bucket*, uintptr, uintptr*, uintptr, uintptr, uintptr));
-int32	runtime·gcprocs(void);
-void	runtime·helpgc(int32 nproc);
-void	runtime·gchelper(void);
-void	runtime·createfing(void);
-G*	runtime·wakefing(void);
-void	runtime·getgcmask(byte*, Type*, byte**, uintptr*);
-
-// NOTE: Layout known to queuefinalizer.
-typedef struct Finalizer Finalizer;
-struct Finalizer
-{
-	FuncVal *fn;	// function to call
-	void *arg;	// ptr to object
-	uintptr nret;	// bytes of return values from fn
-	Type *fint;	// type of first argument of fn
-	PtrType *ot;	// type of ptr to object
-};
-
-typedef struct FinBlock FinBlock;
-struct FinBlock
-{
-	FinBlock *alllink;
-	FinBlock *next;
-	int32 cnt;
-	int32 cap;
-	Finalizer fin[1];
-};
-extern Mutex	runtime·finlock;	// protects the following variables
-extern G*	runtime·fing;
-extern bool	runtime·fingwait;
-extern bool	runtime·fingwake;
-extern FinBlock	*runtime·finq;		// list of finalizers that are to be executed
-extern FinBlock	*runtime·finc;		// cache of free blocks
-
-void	runtime·setprofilebucket_m(void);
-
-bool	runtime·addfinalizer(void*, FuncVal *fn, uintptr, Type*, PtrType*);
-void	runtime·removefinalizer(void*);
-void	runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot);
-bool	runtime·freespecial(Special *s, void *p, uintptr size, bool freed);
-
-// Information from the compiler about the layout of stack frames.
-struct BitVector
-{
-	int32 n; // # of bits
-	uint8 *bytedata;
-};
-typedef struct StackMap StackMap;
-struct StackMap
-{
-	int32 n; // number of bitmaps
-	int32 nbit; // number of bits in each bitmap
-	uint8 bytedata[]; // bitmaps, each starting on a 32-bit boundary
-};
-// Returns pointer map data for the given stackmap index
-// (the index is encoded in PCDATA_StackMapIndex).
-BitVector	runtime·stackmapdata(StackMap *stackmap, int32 n);
-
-extern	BitVector	runtime·gcdatamask;
-extern	BitVector	runtime·gcbssmask;
-
-// defined in mgc0.go
-void	runtime·gc_m_ptr(Eface*);
-void	runtime·gc_g_ptr(Eface*);
-void	runtime·gc_itab_ptr(Eface*);
-
-void  runtime·setgcpercent_m(void);
-
-// Value we use to mark dead pointers when GODEBUG=gcdead=1.
-#define PoisonGC ((uintptr)0xf969696969696969ULL)
-#define PoisonStack ((uintptr)0x6868686868686868ULL)
diff --git a/src/runtime/malloc1.go b/src/runtime/malloc1.go
new file mode 100644
index 0000000..db02d9c
--- /dev/null
+++ b/src/runtime/malloc1.go
@@ -0,0 +1,318 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// See malloc.h for overview.
+//
+// TODO(rsc): double-check stats.
+
+package runtime
+
+import "unsafe"
+
+const _MaxArena32 = 2 << 30
+
+// For use by Go. If it were a C enum it would be made available automatically,
+// but the value of MaxMem is too large for enum.
+// XXX - uintptr runtime·maxmem = MaxMem;
+
+func mlookup(v uintptr, base *uintptr, size *uintptr, sp **mspan) int32 {
+	_g_ := getg()
+
+	_g_.m.mcache.local_nlookup++
+	if ptrSize == 4 && _g_.m.mcache.local_nlookup >= 1<<30 {
+		// purge cache stats to prevent overflow
+		lock(&mheap_.lock)
+		purgecachedstats(_g_.m.mcache)
+		unlock(&mheap_.lock)
+	}
+
+	s := mHeap_LookupMaybe(&mheap_, unsafe.Pointer(v))
+	if sp != nil {
+		*sp = s
+	}
+	if s == nil {
+		if base != nil {
+			*base = 0
+		}
+		if size != nil {
+			*size = 0
+		}
+		return 0
+	}
+
+	p := uintptr(s.start) << _PageShift
+	if s.sizeclass == 0 {
+		// Large object.
+		if base != nil {
+			*base = p
+		}
+		if size != nil {
+			*size = s.npages << _PageShift
+		}
+		return 1
+	}
+
+	n := s.elemsize
+	if base != nil {
+		i := (uintptr(v) - uintptr(p)) / n
+		*base = p + i*n
+	}
+	if size != nil {
+		*size = n
+	}
+
+	return 1
+}
+
+//go:nosplit
+func purgecachedstats(c *mcache) {
+	// Protected by either heap or GC lock.
+	h := &mheap_
+	memstats.heap_alloc += uint64(c.local_cachealloc)
+	c.local_cachealloc = 0
+	memstats.tinyallocs += uint64(c.local_tinyallocs)
+	c.local_tinyallocs = 0
+	memstats.nlookup += uint64(c.local_nlookup)
+	c.local_nlookup = 0
+	h.largefree += uint64(c.local_largefree)
+	c.local_largefree = 0
+	h.nlargefree += uint64(c.local_nlargefree)
+	c.local_nlargefree = 0
+	for i := 0; i < len(c.local_nsmallfree); i++ {
+		h.nsmallfree[i] += uint64(c.local_nsmallfree[i])
+		c.local_nsmallfree[i] = 0
+	}
+}
+
+func mallocinit() {
+	initSizes()
+
+	if class_to_size[_TinySizeClass] != _TinySize {
+		gothrow("bad TinySizeClass")
+	}
+
+	var p, arena_size, bitmap_size, spans_size, p_size, limit uintptr
+	var reserved bool
+
+	// limit = runtime.memlimit();
+	// See https://code.google.com/p/go/issues/detail?id=5049
+	// TODO(rsc): Fix after 1.1.
+	limit = 0
+
+	// Set up the allocation arena, a contiguous area of memory where
+	// allocated data will be found.  The arena begins with a bitmap large
+	// enough to hold 4 bits per allocated word.
+	if ptrSize == 8 && (limit == 0 || limit > 1<<30) {
+		// On a 64-bit machine, allocate from a single contiguous reservation.
+		// 128 GB (MaxMem) should be big enough for now.
+		//
+		// The code will work with the reservation at any address, but ask
+		// SysReserve to use 0x0000XXc000000000 if possible (XX=00...7f).
+		// Allocating a 128 GB region takes away 37 bits, and the amd64
+		// doesn't let us choose the top 17 bits, so that leaves the 11 bits
+		// in the middle of 0x00c0 for us to choose.  Choosing 0x00c0 means
+		// that the valid memory addresses will begin 0x00c0, 0x00c1, ..., 0x00df.
+		// In little-endian, that's c0 00, c1 00, ..., df 00. None of those are valid
+		// UTF-8 sequences, and they are otherwise as far away from
+		// ff (likely a common byte) as possible.  If that fails, we try other 0xXXc0
+		// addresses.  An earlier attempt to use 0x11f8 caused out of memory errors
+		// on OS X during thread allocations.  0x00c0 causes conflicts with
+		// AddressSanitizer which reserves all memory up to 0x0100.
+		// These choices are both for debuggability and to reduce the
+		// odds of the conservative garbage collector not collecting memory
+		// because some non-pointer block of memory had a bit pattern
+		// that matched a memory address.
+		//
+		// Actually we reserve 136 GB (because the bitmap ends up being 8 GB)
+		// but it hardly matters: e0 00 is not valid UTF-8 either.
+		//
+		// If this fails we fall back to the 32 bit memory mechanism
+		arena_size = round(_MaxMem, _PageSize)
+		bitmap_size = arena_size / (ptrSize * 8 / 4)
+		spans_size = arena_size / _PageSize * ptrSize
+		spans_size = round(spans_size, _PageSize)
+		for i := 0; i <= 0x7f; i++ {
+			p = uintptr(i)<<40 | uintptrMask&(0x00c0<<32)
+			p_size = bitmap_size + spans_size + arena_size + _PageSize
+			p = uintptr(sysReserve(unsafe.Pointer(p), p_size, &reserved))
+			if p != 0 {
+				break
+			}
+		}
+	}
+
+	if p == 0 {
+		// On a 32-bit machine, we can't typically get away
+		// with a giant virtual address space reservation.
+		// Instead we map the memory information bitmap
+		// immediately after the data segment, large enough
+		// to handle another 2GB of mappings (256 MB),
+		// along with a reservation for another 512 MB of memory.
+		// When that gets used up, we'll start asking the kernel
+		// for any memory anywhere and hope it's in the 2GB
+		// following the bitmap (presumably the executable begins
+		// near the bottom of memory, so we'll have to use up
+		// most of memory before the kernel resorts to giving out
+		// memory before the beginning of the text segment).
+		//
+		// Alternatively we could reserve 512 MB bitmap, enough
+		// for 4GB of mappings, and then accept any memory the
+		// kernel threw at us, but normally that's a waste of 512 MB
+		// of address space, which is probably too much in a 32-bit world.
+		bitmap_size = _MaxArena32 / (ptrSize * 8 / 4)
+		arena_size = 512 << 20
+		spans_size = _MaxArena32 / _PageSize * ptrSize
+		if limit > 0 && arena_size+bitmap_size+spans_size > limit {
+			bitmap_size = (limit / 9) &^ ((1 << _PageShift) - 1)
+			arena_size = bitmap_size * 8
+			spans_size = arena_size / _PageSize * ptrSize
+		}
+		spans_size = round(spans_size, _PageSize)
+
+		// SysReserve treats the address we ask for, end, as a hint,
+		// not as an absolute requirement.  If we ask for the end
+		// of the data segment but the operating system requires
+		// a little more space before we can start allocating, it will
+		// give out a slightly higher pointer.  Except QEMU, which
+		// is buggy, as usual: it won't adjust the pointer upward.
+		// So adjust it upward a little bit ourselves: 1/4 MB to get
+		// away from the running binary image and then round up
+		// to a MB boundary.
+		p = round(uintptr(unsafe.Pointer(&end))+(1<<18), 1<<20)
+		p_size = bitmap_size + spans_size + arena_size + _PageSize
+		p = uintptr(sysReserve(unsafe.Pointer(p), p_size, &reserved))
+		if p == 0 {
+			gothrow("runtime: cannot reserve arena virtual address space")
+		}
+	}
+
+	// PageSize can be larger than OS definition of page size,
+	// so SysReserve can give us a PageSize-unaligned pointer.
+	// To overcome this we ask for PageSize more and round up the pointer.
+	p1 := round(p, _PageSize)
+
+	mheap_.spans = (**mspan)(unsafe.Pointer(p1))
+	mheap_.bitmap = p1 + spans_size
+	mheap_.arena_start = p1 + (spans_size + bitmap_size)
+	mheap_.arena_used = mheap_.arena_start
+	mheap_.arena_end = p + p_size
+	mheap_.arena_reserved = reserved
+
+	if mheap_.arena_start&(_PageSize-1) != 0 {
+		println("bad pagesize", hex(p), hex(p1), hex(spans_size), hex(bitmap_size), hex(_PageSize), "start", hex(mheap_.arena_start))
+		gothrow("misrounded allocation in mallocinit")
+	}
+
+	// Initialize the rest of the allocator.
+	mHeap_Init(&mheap_, spans_size)
+	_g_ := getg()
+	_g_.m.mcache = allocmcache()
+}
+
+func mHeap_SysAlloc(h *mheap, n uintptr) unsafe.Pointer {
+	if n > uintptr(h.arena_end)-uintptr(h.arena_used) {
+		// We are in 32-bit mode, maybe we didn't use all possible address space yet.
+		// Reserve some more space.
+		p_size := round(n+_PageSize, 256<<20)
+		new_end := h.arena_end + p_size
+		if new_end <= h.arena_start+_MaxArena32 {
+			// TODO: It would be bad if part of the arena
+			// is reserved and part is not.
+			var reserved bool
+			p := uintptr(sysReserve((unsafe.Pointer)(h.arena_end), p_size, &reserved))
+			if p == h.arena_end {
+				h.arena_end = new_end
+				h.arena_reserved = reserved
+			} else if p+p_size <= h.arena_start+_MaxArena32 {
+				// Keep everything page-aligned.
+				// Our pages are bigger than hardware pages.
+				h.arena_end = p + p_size
+				h.arena_used = p + (-uintptr(p) & (_PageSize - 1))
+				h.arena_reserved = reserved
+			} else {
+				var stat uint64
+				sysFree((unsafe.Pointer)(p), p_size, &stat)
+			}
+		}
+	}
+
+	if n <= uintptr(h.arena_end)-uintptr(h.arena_used) {
+		// Keep taking from our reservation.
+		p := h.arena_used
+		sysMap((unsafe.Pointer)(p), n, h.arena_reserved, &memstats.heap_sys)
+		h.arena_used += n
+		mHeap_MapBits(h)
+		mHeap_MapSpans(h)
+		if raceenabled {
+			racemapshadow((unsafe.Pointer)(p), n)
+		}
+
+		if uintptr(p)&(_PageSize-1) != 0 {
+			gothrow("misrounded allocation in MHeap_SysAlloc")
+		}
+		return (unsafe.Pointer)(p)
+	}
+
+	// If using 64-bit, our reservation is all we have.
+	if uintptr(h.arena_end)-uintptr(h.arena_start) >= _MaxArena32 {
+		return nil
+	}
+
+	// On 32-bit, once the reservation is gone we can
+	// try to get memory at a location chosen by the OS
+	// and hope that it is in the range we allocated bitmap for.
+	p_size := round(n, _PageSize) + _PageSize
+	p := uintptr(sysAlloc(p_size, &memstats.heap_sys))
+	if p == 0 {
+		return nil
+	}
+
+	if p < h.arena_start || uintptr(p)+p_size-uintptr(h.arena_start) >= _MaxArena32 {
+		print("runtime: memory allocated by OS (", p, ") not in usable range [", hex(h.arena_start), ",", hex(h.arena_start+_MaxArena32), ")\n")
+		sysFree((unsafe.Pointer)(p), p_size, &memstats.heap_sys)
+		return nil
+	}
+
+	p_end := p + p_size
+	p += -p & (_PageSize - 1)
+	if uintptr(p)+n > uintptr(h.arena_used) {
+		h.arena_used = p + n
+		if p_end > h.arena_end {
+			h.arena_end = p_end
+		}
+		mHeap_MapBits(h)
+		mHeap_MapSpans(h)
+		if raceenabled {
+			racemapshadow((unsafe.Pointer)(p), n)
+		}
+	}
+
+	if uintptr(p)&(_PageSize-1) != 0 {
+		gothrow("misrounded allocation in MHeap_SysAlloc")
+	}
+	return (unsafe.Pointer)(p)
+}
+
+var end struct{}
+
+func largeAlloc(size uintptr, flag uint32) *mspan {
+	// print("largeAlloc size=", size, "\n")
+
+	if size+_PageSize < size {
+		gothrow("out of memory")
+	}
+	npages := size >> _PageShift
+	if size&_PageMask != 0 {
+		npages++
+	}
+	s := mHeap_Alloc(&mheap_, npages, 0, true, flag&_FlagNoZero == 0)
+	if s == nil {
+		gothrow("out of memory")
+	}
+	s.limit = uintptr(s.start)<<_PageShift + size
+	v := unsafe.Pointer(uintptr(s.start) << _PageShift)
+	// setup for mark sweep
+	markspan(v, 0, 0, true)
+	return s
+}
diff --git a/src/runtime/malloc2.go b/src/runtime/malloc2.go
new file mode 100644
index 0000000..4ac0207
--- /dev/null
+++ b/src/runtime/malloc2.go
@@ -0,0 +1,473 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Memory allocator, based on tcmalloc.
+// http://goog-perftools.sourceforge.net/doc/tcmalloc.html
+
+// The main allocator works in runs of pages.
+// Small allocation sizes (up to and including 32 kB) are
+// rounded to one of about 100 size classes, each of which
+// has its own free list of objects of exactly that size.
+// Any free page of memory can be split into a set of objects
+// of one size class, which are then managed using free list
+// allocators.
+//
+// The allocator's data structures are:
+//
+//	FixAlloc: a free-list allocator for fixed-size objects,
+//		used to manage storage used by the allocator.
+//	MHeap: the malloc heap, managed at page (4096-byte) granularity.
+//	MSpan: a run of pages managed by the MHeap.
+//	MCentral: a shared free list for a given size class.
+//	MCache: a per-thread (in Go, per-P) cache for small objects.
+//	MStats: allocation statistics.
+//
+// Allocating a small object proceeds up a hierarchy of caches:
+//
+//	1. Round the size up to one of the small size classes
+//	   and look in the corresponding MCache free list.
+//	   If the list is not empty, allocate an object from it.
+//	   This can all be done without acquiring a lock.
+//
+//	2. If the MCache free list is empty, replenish it by
+//	   taking a bunch of objects from the MCentral free list.
+//	   Moving a bunch amortizes the cost of acquiring the MCentral lock.
+//
+//	3. If the MCentral free list is empty, replenish it by
+//	   allocating a run of pages from the MHeap and then
+//	   chopping that memory into a objects of the given size.
+//	   Allocating many objects amortizes the cost of locking
+//	   the heap.
+//
+//	4. If the MHeap is empty or has no page runs large enough,
+//	   allocate a new group of pages (at least 1MB) from the
+//	   operating system.  Allocating a large run of pages
+//	   amortizes the cost of talking to the operating system.
+//
+// Freeing a small object proceeds up the same hierarchy:
+//
+//	1. Look up the size class for the object and add it to
+//	   the MCache free list.
+//
+//	2. If the MCache free list is too long or the MCache has
+//	   too much memory, return some to the MCentral free lists.
+//
+//	3. If all the objects in a given span have returned to
+//	   the MCentral list, return that span to the page heap.
+//
+//	4. If the heap has too much memory, return some to the
+//	   operating system.
+//
+//	TODO(rsc): Step 4 is not implemented.
+//
+// Allocating and freeing a large object uses the page heap
+// directly, bypassing the MCache and MCentral free lists.
+//
+// The small objects on the MCache and MCentral free lists
+// may or may not be zeroed.  They are zeroed if and only if
+// the second word of the object is zero.  A span in the
+// page heap is zeroed unless s->needzero is set. When a span
+// is allocated to break into small objects, it is zeroed if needed
+// and s->needzero is set. There are two main benefits to delaying the
+// zeroing this way:
+//
+//	1. stack frames allocated from the small object lists
+//	   or the page heap can avoid zeroing altogether.
+//	2. the cost of zeroing when reusing a small object is
+//	   charged to the mutator, not the garbage collector.
+//
+// This C code was written with an eye toward translating to Go
+// in the future.  Methods have the form Type_Method(Type *t, ...).
+
+const (
+	_PageShift = 13
+	_PageSize  = 1 << _PageShift
+	_PageMask  = _PageSize - 1
+)
+
+const (
+	// _64bit = 1 on 64-bit systems, 0 on 32-bit systems
+	_64bit = 1 << (^uintptr(0) >> 63) / 2
+
+	// Computed constant.  The definition of MaxSmallSize and the
+	// algorithm in msize.c produce some number of different allocation
+	// size classes.  NumSizeClasses is that number.  It's needed here
+	// because there are static arrays of this length; when msize runs its
+	// size choosing algorithm it double-checks that NumSizeClasses agrees.
+	_NumSizeClasses = 67
+
+	// Tunable constants.
+	_MaxSmallSize = 32 << 10
+
+	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.goc.
+	_TinySize      = 16
+	_TinySizeClass = 2
+
+	_FixAllocChunk  = 16 << 10               // Chunk size for FixAlloc
+	_MaxMHeapList   = 1 << (20 - _PageShift) // Maximum page length for fixed-size list in MHeap.
+	_HeapAllocChunk = 1 << 20                // Chunk size for heap growth
+
+	// Per-P, per order stack segment cache size.
+	_StackCacheSize = 32 * 1024
+
+	// Number of orders that get caching.  Order 0 is FixedStack
+	// and each successive order is twice as large.
+	_NumStackOrders = 3
+
+	// Number of bits in page to span calculations (4k pages).
+	// On Windows 64-bit we limit the arena to 32GB or 35 bits.
+	// Windows counts memory used by page table into committed memory
+	// of the process, so we can't reserve too much memory.
+	// See http://golang.org/issue/5402 and http://golang.org/issue/5236.
+	// On other 64-bit platforms, we limit the arena to 128GB, or 37 bits.
+	// On 32-bit, we don't bother limiting anything, so we use the full 32-bit address.
+	_MHeapMap_TotalBits = (_64bit*_Windows)*35 + (_64bit*(1-_Windows))*37 + (1-_64bit)*32
+	_MHeapMap_Bits      = _MHeapMap_TotalBits - _PageShift
+
+	_MaxMem = uintptr(1<<_MHeapMap_TotalBits - 1)
+
+	// Max number of threads to run garbage collection.
+	// 2, 3, and 4 are all plausible maximums depending
+	// on the hardware details of the machine.  The garbage
+	// collector scales well to 32 cpus.
+	_MaxGcproc = 32
+)
+
+// A generic linked list of blocks.  (Typically the block is bigger than sizeof(MLink).)
+type mlink struct {
+	next *mlink
+}
+
+// sysAlloc obtains a large chunk of zeroed memory from the
+// operating system, typically on the order of a hundred kilobytes
+// or a megabyte.
+// NOTE: sysAlloc returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysUnused notifies the operating system that the contents
+// of the memory region are no longer needed and can be reused
+// for other purposes.
+// SysUsed notifies the operating system that the contents
+// of the memory region are needed again.
+//
+// SysFree returns it unconditionally; this is only used if
+// an out-of-memory error has been detected midway through
+// an allocation.  It is okay if SysFree is a no-op.
+//
+// SysReserve reserves address space without allocating memory.
+// If the pointer passed to it is non-nil, the caller wants the
+// reservation there, but SysReserve can still choose another
+// location if that one is unavailable.  On some systems and in some
+// cases SysReserve will simply check that the address space is
+// available and not actually reserve it.  If SysReserve returns
+// non-nil, it sets *reserved to true if the address space is
+// reserved, false if it has merely been checked.
+// NOTE: SysReserve returns OS-aligned memory, but the heap allocator
+// may use larger alignment, so the caller must be careful to realign the
+// memory obtained by sysAlloc.
+//
+// SysMap maps previously reserved address space for use.
+// The reserved argument is true if the address space was really
+// reserved, not merely checked.
+//
+// SysFault marks a (already sysAlloc'd) region to fault
+// if accessed.  Used only for debugging the runtime.
+
+// FixAlloc is a simple free-list allocator for fixed size objects.
+// Malloc uses a FixAlloc wrapped around sysAlloc to manages its
+// MCache and MSpan objects.
+//
+// Memory returned by FixAlloc_Alloc is not zeroed.
+// The caller is responsible for locking around FixAlloc calls.
+// Callers can keep state in the object but the first word is
+// smashed by freeing and reallocating.
+type fixalloc struct {
+	size   uintptr
+	first  unsafe.Pointer // go func(unsafe.pointer, unsafe.pointer); f(arg, p) called first time p is returned
+	arg    unsafe.Pointer
+	list   *mlink
+	chunk  *byte
+	nchunk uint32
+	inuse  uintptr // in-use bytes now
+	stat   *uint64
+}
+
+// Statistics.
+// Shared with Go: if you edit this structure, also edit type MemStats in mem.go.
+type mstats struct {
+	// General statistics.
+	alloc       uint64 // bytes allocated and still in use
+	total_alloc uint64 // bytes allocated (even if freed)
+	sys         uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
+	nlookup     uint64 // number of pointer lookups
+	nmalloc     uint64 // number of mallocs
+	nfree       uint64 // number of frees
+
+	// Statistics about malloc heap.
+	// protected by mheap.lock
+	heap_alloc    uint64 // bytes allocated and still in use
+	heap_sys      uint64 // bytes obtained from system
+	heap_idle     uint64 // bytes in idle spans
+	heap_inuse    uint64 // bytes in non-idle spans
+	heap_released uint64 // bytes released to the os
+	heap_objects  uint64 // total number of allocated objects
+
+	// Statistics about allocation of low-level fixed-size structures.
+	// Protected by FixAlloc locks.
+	stacks_inuse uint64 // this number is included in heap_inuse above
+	stacks_sys   uint64 // always 0 in mstats
+	mspan_inuse  uint64 // mspan structures
+	mspan_sys    uint64
+	mcache_inuse uint64 // mcache structures
+	mcache_sys   uint64
+	buckhash_sys uint64 // profiling bucket hash table
+	gc_sys       uint64
+	other_sys    uint64
+
+	// Statistics about garbage collector.
+	// Protected by mheap or stopping the world during GC.
+	next_gc        uint64 // next gc (in heap_alloc time)
+	last_gc        uint64 // last gc (in absolute time)
+	pause_total_ns uint64
+	pause_ns       [256]uint64 // circular buffer of recent gc pause lengths
+	pause_end      [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
+	numgc          uint32
+	enablegc       bool
+	debuggc        bool
+
+	// Statistics about allocation size classes.
+
+	by_size [_NumSizeClasses]struct {
+		size    uint32
+		nmalloc uint64
+		nfree   uint64
+	}
+
+	tinyallocs uint64 // number of tiny allocations that didn't cause actual allocation; not exported to go directly
+}
+
+var memstats mstats
+
+// Size classes.  Computed and initialized by InitSizes.
+//
+// SizeToClass(0 <= n <= MaxSmallSize) returns the size class,
+//	1 <= sizeclass < NumSizeClasses, for n.
+//	Size class 0 is reserved to mean "not small".
+//
+// class_to_size[i] = largest size in class i
+// class_to_allocnpages[i] = number of pages to allocate when
+//	making new objects in class i
+
+var class_to_size [_NumSizeClasses]int32
+var class_to_allocnpages [_NumSizeClasses]int32
+var size_to_class8 [1024/8 + 1]int8
+var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+
+type mcachelist struct {
+	list  *mlink
+	nlist uint32
+}
+
+type stackfreelist struct {
+	list *mlink  // linked list of free stacks
+	size uintptr // total size of stacks in list
+}
+
+// Per-thread (in Go, per-P) cache for small objects.
+// No locking needed because it is per-thread (per-P).
+type mcache struct {
+	// The following members are accessed on every malloc,
+	// so they are grouped here for better caching.
+	next_sample      int32  // trigger heap sample after allocating this many bytes
+	local_cachealloc intptr // bytes allocated (or freed) from cache since last lock of heap
+	// Allocator cache for tiny objects w/o pointers.
+	// See "Tiny allocator" comment in malloc.goc.
+	tiny             *byte
+	tinysize         uintptr
+	local_tinyallocs uintptr // number of tiny allocs not counted in other stats
+
+	// The rest is not accessed on every malloc.
+	alloc [_NumSizeClasses]*mspan // spans to allocate from
+
+	stackcache [_NumStackOrders]stackfreelist
+
+	sudogcache *sudog
+
+	// Local allocator stats, flushed during GC.
+	local_nlookup    uintptr                  // number of pointer lookups
+	local_largefree  uintptr                  // bytes freed for large objects (>maxsmallsize)
+	local_nlargefree uintptr                  // number of frees for large objects (>maxsmallsize)
+	local_nsmallfree [_NumSizeClasses]uintptr // number of frees for small objects (<=maxsmallsize)
+}
+
+const (
+	_KindSpecialFinalizer = 1
+	_KindSpecialProfile   = 2
+	// Note: The finalizer special must be first because if we're freeing
+	// an object, a finalizer special will cause the freeing operation
+	// to abort, and we want to keep the other special records around
+	// if that happens.
+)
+
+type special struct {
+	next   *special // linked list in span
+	offset uint16   // span offset of object
+	kind   byte     // kind of special
+}
+
+// The described object has a finalizer set for it.
+type specialfinalizer struct {
+	special special
+	fn      *funcval
+	nret    uintptr
+	fint    *_type
+	ot      *ptrtype
+}
+
+// The described object is being heap profiled.
+type specialprofile struct {
+	special special
+	b       *bucket
+}
+
+// An MSpan is a run of pages.
+const (
+	_MSpanInUse = iota // allocated for garbage collected heap
+	_MSpanStack        // allocated for use by stack allocator
+	_MSpanFree
+	_MSpanListHead
+	_MSpanDead
+)
+
+type mspan struct {
+	next     *mspan  // in a span linked list
+	prev     *mspan  // in a span linked list
+	start    pageID  // starting page number
+	npages   uintptr // number of pages in span
+	freelist *mlink  // list of free objects
+	// sweep generation:
+	// if sweepgen == h->sweepgen - 2, the span needs sweeping
+	// if sweepgen == h->sweepgen - 1, the span is currently being swept
+	// if sweepgen == h->sweepgen, the span is swept and ready to use
+	// h->sweepgen is incremented by 2 after every GC
+	sweepgen    uint32
+	ref         uint16   // capacity - number of objects in freelist
+	sizeclass   uint8    // size class
+	incache     bool     // being used by an mcache
+	state       uint8    // mspaninuse etc
+	needzero    uint8    // needs to be zeroed before allocation
+	elemsize    uintptr  // computed from sizeclass or from npages
+	unusedsince int64    // first time spotted by gc in mspanfree state
+	npreleased  uintptr  // number of pages released to the os
+	limit       uintptr  // end of data in span
+	speciallock mutex    // guards specials list
+	specials    *special // linked list of special records sorted by offset.
+}
+
+// Every MSpan is in one doubly-linked list,
+// either one of the MHeap's free lists or one of the
+// MCentral's span lists.  We use empty MSpan structures as list heads.
+
+// Central list of free objects of a given size.
+type mcentral struct {
+	lock      mutex
+	sizeclass int32
+	nonempty  mspan // list of spans with a free object
+	empty     mspan // list of spans with no free objects (or cached in an mcache)
+}
+
+// Main malloc heap.
+// The heap itself is the "free[]" and "large" arrays,
+// but all the other global data is here too.
+type mheap struct {
+	lock      mutex
+	free      [_MaxMHeapList]mspan // free lists of given length
+	freelarge mspan                // free lists length >= _MaxMHeapList
+	busy      [_MaxMHeapList]mspan // busy lists of large objects of given length
+	busylarge mspan                // busy lists of large objects length >= _MaxMHeapList
+	allspans  **mspan              // all spans out there
+	gcspans   **mspan              // copy of allspans referenced by gc marker or sweeper
+	nspan     uint32
+	sweepgen  uint32 // sweep generation, see comment in mspan
+	sweepdone uint32 // all spans are swept
+
+	// span lookup
+	spans        **mspan
+	spans_mapped uintptr
+
+	// range of addresses we might see in the heap
+	bitmap         uintptr
+	bitmap_mapped  uintptr
+	arena_start    uintptr
+	arena_used     uintptr
+	arena_end      uintptr
+	arena_reserved bool
+
+	// central free lists for small size classes.
+	// the padding makes sure that the MCentrals are
+	// spaced CacheLineSize bytes apart, so that each MCentral.lock
+	// gets its own cache line.
+	central [_NumSizeClasses]struct {
+		mcentral mcentral
+		pad      [_CacheLineSize]byte
+	}
+
+	spanalloc             fixalloc // allocator for span*
+	cachealloc            fixalloc // allocator for mcache*
+	specialfinalizeralloc fixalloc // allocator for specialfinalizer*
+	specialprofilealloc   fixalloc // allocator for specialprofile*
+	speciallock           mutex    // lock for sepcial record allocators.
+
+	// Malloc stats.
+	largefree  uint64                  // bytes freed for large objects (>maxsmallsize)
+	nlargefree uint64                  // number of frees for large objects (>maxsmallsize)
+	nsmallfree [_NumSizeClasses]uint64 // number of frees for small objects (<=maxsmallsize)
+}
+
+var mheap_ mheap
+
+const (
+	// flags to malloc
+	_FlagNoScan = 1 << 0 // GC doesn't have to scan object
+	_FlagNoZero = 1 << 1 // don't zero memory
+)
+
+// NOTE: Layout known to queuefinalizer.
+type finalizer struct {
+	fn   *funcval       // function to call
+	arg  unsafe.Pointer // ptr to object
+	nret uintptr        // bytes of return values from fn
+	fint *_type         // type of first argument of fn
+	ot   *ptrtype       // type of ptr to object
+}
+
+type finblock struct {
+	alllink *finblock
+	next    *finblock
+	cnt     int32
+	cap     int32
+	fin     [1]finalizer
+}
+
+// Information from the compiler about the layout of stack frames.
+type bitvector struct {
+	n        int32 // # of bits
+	bytedata *uint8
+}
+
+type stackmap struct {
+	n        int32   // number of bitmaps
+	nbit     int32   // number of bits in each bitmap
+	bytedata [0]byte // bitmaps, each starting on a 32-bit boundary
+}
+
+// Returns pointer map data for the given stackmap index
+// (the index is encoded in PCDATA_StackMapIndex).
+
+// defined in mgc0.go
diff --git a/src/runtime/mcache.c b/src/runtime/mcache.c
deleted file mode 100644
index 95ddced..0000000
--- a/src/runtime/mcache.c
+++ /dev/null
@@ -1,115 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Per-P malloc cache for small objects.
-//
-// See malloc.h for an overview.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-extern volatile intgo runtime·MemProfileRate;
-
-// dummy MSpan that contains no free objects.
-MSpan runtime·emptymspan;
-
-MCache*
-runtime·allocmcache(void)
-{
-	intgo rate;
-	MCache *c;
-	int32 i;
-
-	runtime·lock(&runtime·mheap.lock);
-	c = runtime·FixAlloc_Alloc(&runtime·mheap.cachealloc);
-	runtime·unlock(&runtime·mheap.lock);
-	runtime·memclr((byte*)c, sizeof(*c));
-	for(i = 0; i < NumSizeClasses; i++)
-		c->alloc[i] = &runtime·emptymspan;
-
-	// Set first allocation sample size.
-	rate = runtime·MemProfileRate;
-	if(rate > 0x3fffffff)	// make 2*rate not overflow
-		rate = 0x3fffffff;
-	if(rate != 0)
-		c->next_sample = runtime·fastrand1() % (2*rate);
-
-	return c;
-}
-
-// mheap.lock needs to be held to release the gcworkbuf.
-static void
-freemcache(MCache *c)
-{
-	runtime·MCache_ReleaseAll(c);
-	runtime·stackcache_clear(c);
-	runtime·lock(&runtime·mheap.lock);
-	runtime·purgecachedstats(c);
-	runtime·FixAlloc_Free(&runtime·mheap.cachealloc, c);
-	runtime·unlock(&runtime·mheap.lock);
-}
-
-static void
-freemcache_m(void)
-{
-	MCache *c;
-
-	c = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	freemcache(c);
-}
-
-void
-runtime·freemcache(MCache *c)
-{
-	void (*fn)(void);
-
-	g->m->ptrarg[0] = c;
-	fn = freemcache_m;
-	runtime·onM(&fn);
-}
-
-// Gets a span that has a free object in it and assigns it
-// to be the cached span for the given sizeclass.  Returns this span.
-MSpan*
-runtime·MCache_Refill(MCache *c, int32 sizeclass)
-{
-	MSpan *s;
-
-	g->m->locks++;
-	// Return the current cached span to the central lists.
-	s = c->alloc[sizeclass];
-	if(s->freelist != nil)
-		runtime·throw("refill on a nonempty span");
-	if(s != &runtime·emptymspan)
-		s->incache = false;
-
-	// Get a new cached span from the central lists.
-	s = runtime·MCentral_CacheSpan(&runtime·mheap.central[sizeclass].mcentral);
-	if(s == nil)
-		runtime·throw("out of memory");
-	if(s->freelist == nil) {
-		runtime·printf("%d %d\n", s->ref, (int32)((s->npages << PageShift) / s->elemsize));
-		runtime·throw("empty span");
-	}
-	c->alloc[sizeclass] = s;
-	g->m->locks--;
-	return s;
-}
-
-void
-runtime·MCache_ReleaseAll(MCache *c)
-{
-	int32 i;
-	MSpan *s;
-
-	for(i=0; i<NumSizeClasses; i++) {
-		s = c->alloc[i];
-		if(s != &runtime·emptymspan) {
-			runtime·MCentral_UncacheSpan(&runtime·mheap.central[i].mcentral, s);
-			c->alloc[i] = &runtime·emptymspan;
-		}
-	}
-}
diff --git a/src/runtime/mcache.go b/src/runtime/mcache.go
new file mode 100644
index 0000000..08b1bc3
--- /dev/null
+++ b/src/runtime/mcache.go
@@ -0,0 +1,91 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Per-P malloc cache for small objects.
+//
+// See malloc.h for an overview.
+
+package runtime
+
+import "unsafe"
+
+// dummy MSpan that contains no free objects.
+var emptymspan mspan
+
+func allocmcache() *mcache {
+	lock(&mheap_.lock)
+	c := (*mcache)(fixAlloc_Alloc(&mheap_.cachealloc))
+	unlock(&mheap_.lock)
+	memclr(unsafe.Pointer(c), unsafe.Sizeof(*c))
+	for i := 0; i < _NumSizeClasses; i++ {
+		c.alloc[i] = &emptymspan
+	}
+
+	// Set first allocation sample size.
+	rate := MemProfileRate
+	if rate > 0x3fffffff { // make 2*rate not overflow
+		rate = 0x3fffffff
+	}
+	if rate != 0 {
+		c.next_sample = int32(int(fastrand1()) % (2 * rate))
+	}
+
+	return c
+}
+
+func freemcache(c *mcache) {
+	systemstack(func() {
+		mCache_ReleaseAll(c)
+		stackcache_clear(c)
+
+		// NOTE(rsc,rlh): If gcworkbuffree comes back, we need to coordinate
+		// with the stealing of gcworkbufs during garbage collection to avoid
+		// a race where the workbuf is double-freed.
+		// gcworkbuffree(c.gcworkbuf)
+
+		lock(&mheap_.lock)
+		purgecachedstats(c)
+		fixAlloc_Free(&mheap_.cachealloc, unsafe.Pointer(c))
+		unlock(&mheap_.lock)
+	})
+}
+
+// Gets a span that has a free object in it and assigns it
+// to be the cached span for the given sizeclass.  Returns this span.
+func mCache_Refill(c *mcache, sizeclass int32) *mspan {
+	_g_ := getg()
+
+	_g_.m.locks++
+	// Return the current cached span to the central lists.
+	s := c.alloc[sizeclass]
+	if s.freelist != nil {
+		gothrow("refill on a nonempty span")
+	}
+	if s != &emptymspan {
+		s.incache = false
+	}
+
+	// Get a new cached span from the central lists.
+	s = mCentral_CacheSpan(&mheap_.central[sizeclass].mcentral)
+	if s == nil {
+		gothrow("out of memory")
+	}
+	if s.freelist == nil {
+		println(s.ref, (s.npages<<_PageShift)/s.elemsize)
+		gothrow("empty span")
+	}
+	c.alloc[sizeclass] = s
+	_g_.m.locks--
+	return s
+}
+
+func mCache_ReleaseAll(c *mcache) {
+	for i := 0; i < _NumSizeClasses; i++ {
+		s := c.alloc[i]
+		if s != &emptymspan {
+			mCentral_UncacheSpan(&mheap_.central[i].mcentral, s)
+			c.alloc[i] = &emptymspan
+		}
+	}
+}
diff --git a/src/runtime/mcentral.c b/src/runtime/mcentral.c
deleted file mode 100644
index fe6bcfe..0000000
--- a/src/runtime/mcentral.c
+++ /dev/null
@@ -1,214 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Central free lists.
-//
-// See malloc.h for an overview.
-//
-// The MCentral doesn't actually contain the list of free objects; the MSpan does.
-// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
-// and those that are completely allocated (c->empty).
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-static MSpan* MCentral_Grow(MCentral *c);
-
-// Initialize a single central free list.
-void
-runtime·MCentral_Init(MCentral *c, int32 sizeclass)
-{
-	c->sizeclass = sizeclass;
-	runtime·MSpanList_Init(&c->nonempty);
-	runtime·MSpanList_Init(&c->empty);
-}
-
-// Allocate a span to use in an MCache.
-MSpan*
-runtime·MCentral_CacheSpan(MCentral *c)
-{
-	MSpan *s;
-	int32 cap, n;
-	uint32 sg;
-
-	runtime·lock(&c->lock);
-	sg = runtime·mheap.sweepgen;
-retry:
-	for(s = c->nonempty.next; s != &c->nonempty; s = s->next) {
-		if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-			runtime·MSpanList_Remove(s);
-			runtime·MSpanList_InsertBack(&c->empty, s);
-			runtime·unlock(&c->lock);
-			runtime·MSpan_Sweep(s, true);
-			goto havespan;
-		}
-		if(s->sweepgen == sg-1) {
-			// the span is being swept by background sweeper, skip
-			continue;
-		}
-		// we have a nonempty span that does not require sweeping, allocate from it
-		runtime·MSpanList_Remove(s);
-		runtime·MSpanList_InsertBack(&c->empty, s);
-		runtime·unlock(&c->lock);
-		goto havespan;
-	}
-
-	for(s = c->empty.next; s != &c->empty; s = s->next) {
-		if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-			// we have an empty span that requires sweeping,
-			// sweep it and see if we can free some space in it
-			runtime·MSpanList_Remove(s);
-			// swept spans are at the end of the list
-			runtime·MSpanList_InsertBack(&c->empty, s);
-			runtime·unlock(&c->lock);
-			runtime·MSpan_Sweep(s, true);
-			if(s->freelist != nil)
-				goto havespan;
-			runtime·lock(&c->lock);
-			// the span is still empty after sweep
-			// it is already in the empty list, so just retry
-			goto retry;
-		}
-		if(s->sweepgen == sg-1) {
-			// the span is being swept by background sweeper, skip
-			continue;
-		}
-		// already swept empty span,
-		// all subsequent ones must also be either swept or in process of sweeping
-		break;
-	}
-	runtime·unlock(&c->lock);
-
-	// Replenish central list if empty.
-	s = MCentral_Grow(c);
-	if(s == nil)
-		return nil;
-	runtime·lock(&c->lock);
-	runtime·MSpanList_InsertBack(&c->empty, s);
-	runtime·unlock(&c->lock);
-
-havespan:
-	// At this point s is a non-empty span, queued at the end of the empty list,
-	// c is unlocked.
-	cap = (s->npages << PageShift) / s->elemsize;
-	n = cap - s->ref;
-	if(n == 0)
-		runtime·throw("empty span");
-	if(s->freelist == nil)
-		runtime·throw("freelist empty");
-	s->incache = true;
-	return s;
-}
-
-// Return span from an MCache.
-void
-runtime·MCentral_UncacheSpan(MCentral *c, MSpan *s)
-{
-	int32 cap, n;
-
-	runtime·lock(&c->lock);
-
-	s->incache = false;
-
-	if(s->ref == 0)
-		runtime·throw("uncaching full span");
-
-	cap = (s->npages << PageShift) / s->elemsize;
-	n = cap - s->ref;
-	if(n > 0) {
-		runtime·MSpanList_Remove(s);
-		runtime·MSpanList_Insert(&c->nonempty, s);
-	}
-	runtime·unlock(&c->lock);
-}
-
-// Free n objects from a span s back into the central free list c.
-// Called during sweep.
-// Returns true if the span was returned to heap.  Sets sweepgen to
-// the latest generation.
-// If preserve=true, don't return the span to heap nor relink in MCentral lists;
-// caller takes care of it.
-bool
-runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end, bool preserve)
-{
-	bool wasempty;
-
-	if(s->incache)
-		runtime·throw("freespan into cached span");
-
-	// Add the objects back to s's free list.
-	wasempty = s->freelist == nil;
-	end->next = s->freelist;
-	s->freelist = start;
-	s->ref -= n;
-
-	if(preserve) {
-		// preserve is set only when called from MCentral_CacheSpan above,
-		// the span must be in the empty list.
-		if(s->next == nil)
-			runtime·throw("can't preserve unlinked span");
-		runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
-		return false;
-	}
-
-	runtime·lock(&c->lock);
-
-	// Move to nonempty if necessary.
-	if(wasempty) {
-		runtime·MSpanList_Remove(s);
-		runtime·MSpanList_Insert(&c->nonempty, s);
-	}
-
-	// delay updating sweepgen until here.  This is the signal that
-	// the span may be used in an MCache, so it must come after the
-	// linked list operations above (actually, just after the
-	// lock of c above.)
-	runtime·atomicstore(&s->sweepgen, runtime·mheap.sweepgen);
-
-	if(s->ref != 0) {
-		runtime·unlock(&c->lock);
-		return false;
-	}
-
-	// s is completely freed, return it to the heap.
-	runtime·MSpanList_Remove(s);
-	s->needzero = 1;
-	s->freelist = nil;
-	runtime·unlock(&c->lock);
-	runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
-	runtime·MHeap_Free(&runtime·mheap, s, 0);
-	return true;
-}
-
-// Fetch a new span from the heap and carve into objects for the free list.
-static MSpan*
-MCentral_Grow(MCentral *c)
-{
-	uintptr size, npages, i, n;
-	MLink **tailp, *v;
-	byte *p;
-	MSpan *s;
-
-	npages = runtime·class_to_allocnpages[c->sizeclass];
-	size = runtime·class_to_size[c->sizeclass];
-	n = (npages << PageShift) / size;
-	s = runtime·MHeap_Alloc(&runtime·mheap, npages, c->sizeclass, 0, 1);
-	if(s == nil)
-		return nil;
-
-	// Carve span into sequence of blocks.
-	tailp = &s->freelist;
-	p = (byte*)(s->start << PageShift);
-	s->limit = p + size*n;
-	for(i=0; i<n; i++) {
-		v = (MLink*)p;
-		*tailp = v;
-		tailp = &v->next;
-		p += size;
-	}
-	*tailp = nil;
-	runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
-	return s;
-}
diff --git a/src/runtime/mcentral.go b/src/runtime/mcentral.go
new file mode 100644
index 0000000..0d172a0
--- /dev/null
+++ b/src/runtime/mcentral.go
@@ -0,0 +1,199 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Central free lists.
+//
+// See malloc.h for an overview.
+//
+// The MCentral doesn't actually contain the list of free objects; the MSpan does.
+// Each MCentral is two lists of MSpans: those with free objects (c->nonempty)
+// and those that are completely allocated (c->empty).
+
+package runtime
+
+import "unsafe"
+
+// Initialize a single central free list.
+func mCentral_Init(c *mcentral, sizeclass int32) {
+	c.sizeclass = sizeclass
+	mSpanList_Init(&c.nonempty)
+	mSpanList_Init(&c.empty)
+}
+
+// Allocate a span to use in an MCache.
+func mCentral_CacheSpan(c *mcentral) *mspan {
+	lock(&c.lock)
+	sg := mheap_.sweepgen
+retry:
+	var s *mspan
+	for s = c.nonempty.next; s != &c.nonempty; s = s.next {
+		if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
+			mSpanList_Remove(s)
+			mSpanList_InsertBack(&c.empty, s)
+			unlock(&c.lock)
+			mSpan_Sweep(s, true)
+			goto havespan
+		}
+		if s.sweepgen == sg-1 {
+			// the span is being swept by background sweeper, skip
+			continue
+		}
+		// we have a nonempty span that does not require sweeping, allocate from it
+		mSpanList_Remove(s)
+		mSpanList_InsertBack(&c.empty, s)
+		unlock(&c.lock)
+		goto havespan
+	}
+
+	for s = c.empty.next; s != &c.empty; s = s.next {
+		if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
+			// we have an empty span that requires sweeping,
+			// sweep it and see if we can free some space in it
+			mSpanList_Remove(s)
+			// swept spans are at the end of the list
+			mSpanList_InsertBack(&c.empty, s)
+			unlock(&c.lock)
+			mSpan_Sweep(s, true)
+			if s.freelist != nil {
+				goto havespan
+			}
+			lock(&c.lock)
+			// the span is still empty after sweep
+			// it is already in the empty list, so just retry
+			goto retry
+		}
+		if s.sweepgen == sg-1 {
+			// the span is being swept by background sweeper, skip
+			continue
+		}
+		// already swept empty span,
+		// all subsequent ones must also be either swept or in process of sweeping
+		break
+	}
+	unlock(&c.lock)
+
+	// Replenish central list if empty.
+	s = mCentral_Grow(c)
+	if s == nil {
+		return nil
+	}
+	lock(&c.lock)
+	mSpanList_InsertBack(&c.empty, s)
+	unlock(&c.lock)
+
+	// At this point s is a non-empty span, queued at the end of the empty list,
+	// c is unlocked.
+havespan:
+	cap := int32((s.npages << _PageShift) / s.elemsize)
+	n := cap - int32(s.ref)
+	if n == 0 {
+		gothrow("empty span")
+	}
+	if s.freelist == nil {
+		gothrow("freelist empty")
+	}
+	s.incache = true
+	return s
+}
+
+// Return span from an MCache.
+func mCentral_UncacheSpan(c *mcentral, s *mspan) {
+	lock(&c.lock)
+
+	s.incache = false
+
+	if s.ref == 0 {
+		gothrow("uncaching full span")
+	}
+
+	cap := int32((s.npages << _PageShift) / s.elemsize)
+	n := cap - int32(s.ref)
+	if n > 0 {
+		mSpanList_Remove(s)
+		mSpanList_Insert(&c.nonempty, s)
+	}
+	unlock(&c.lock)
+}
+
+// Free n objects from a span s back into the central free list c.
+// Called during sweep.
+// Returns true if the span was returned to heap.  Sets sweepgen to
+// the latest generation.
+// If preserve=true, don't return the span to heap nor relink in MCentral lists;
+// caller takes care of it.
+func mCentral_FreeSpan(c *mcentral, s *mspan, n int32, start *mlink, end *mlink, preserve bool) bool {
+	if s.incache {
+		gothrow("freespan into cached span")
+	}
+
+	// Add the objects back to s's free list.
+	wasempty := s.freelist == nil
+	end.next = s.freelist
+	s.freelist = start
+	s.ref -= uint16(n)
+
+	if preserve {
+		// preserve is set only when called from MCentral_CacheSpan above,
+		// the span must be in the empty list.
+		if s.next == nil {
+			gothrow("can't preserve unlinked span")
+		}
+		atomicstore(&s.sweepgen, mheap_.sweepgen)
+		return false
+	}
+
+	lock(&c.lock)
+
+	// Move to nonempty if necessary.
+	if wasempty {
+		mSpanList_Remove(s)
+		mSpanList_Insert(&c.nonempty, s)
+	}
+
+	// delay updating sweepgen until here.  This is the signal that
+	// the span may be used in an MCache, so it must come after the
+	// linked list operations above (actually, just after the
+	// lock of c above.)
+	atomicstore(&s.sweepgen, mheap_.sweepgen)
+
+	if s.ref != 0 {
+		unlock(&c.lock)
+		return false
+	}
+
+	// s is completely freed, return it to the heap.
+	mSpanList_Remove(s)
+	s.needzero = 1
+	s.freelist = nil
+	unlock(&c.lock)
+	unmarkspan(uintptr(s.start)<<_PageShift, s.npages<<_PageShift)
+	mHeap_Free(&mheap_, s, 0)
+	return true
+}
+
+// Fetch a new span from the heap and carve into objects for the free list.
+func mCentral_Grow(c *mcentral) *mspan {
+	npages := uintptr(class_to_allocnpages[c.sizeclass])
+	size := uintptr(class_to_size[c.sizeclass])
+	n := (npages << _PageShift) / size
+
+	s := mHeap_Alloc(&mheap_, npages, c.sizeclass, false, true)
+	if s == nil {
+		return nil
+	}
+
+	// Carve span into sequence of blocks.
+	tailp := &s.freelist
+	p := uintptr(s.start << _PageShift)
+	s.limit = p + size*n
+	for i := uintptr(0); i < n; i++ {
+		v := (*mlink)(unsafe.Pointer(p))
+		*tailp = v
+		tailp = &v.next
+		p += size
+	}
+	*tailp = nil
+	markspan(unsafe.Pointer(uintptr(s.start)<<_PageShift), size, n, size*n < s.npages<<_PageShift)
+	return s
+}
diff --git a/src/runtime/mem.go b/src/runtime/mem.go
index e6f1eb0..1835672 100644
--- a/src/runtime/mem.go
+++ b/src/runtime/mem.go
@@ -59,7 +59,11 @@
 	}
 }
 
-var sizeof_C_MStats uintptr // filled in by malloc.goc
+// Size of the trailing by_size array differs between Go and C,
+// and all data after by_size is local to runtime, not exported.
+// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+// sizeof_C_MStats is what C thinks about size of Go struct.
+var sizeof_C_MStats = unsafe.Offsetof(memstats.by_size) + 61*unsafe.Sizeof(memstats.by_size[0])
 
 func init() {
 	var memStats MemStats
@@ -78,15 +82,16 @@
 	semacquire(&worldsema, false)
 	gp := getg()
 	gp.m.gcing = 1
-	onM(stoptheworld)
+	systemstack(stoptheworld)
 
-	gp.m.ptrarg[0] = noescape(unsafe.Pointer(m))
-	onM(readmemstats_m)
+	systemstack(func() {
+		readmemstats_m(m)
+	})
 
 	gp.m.gcing = 0
 	gp.m.locks++
 	semrelease(&worldsema)
-	onM(starttheworld)
+	systemstack(starttheworld)
 	gp.m.locks--
 }
 
@@ -95,14 +100,15 @@
 	semacquire(&worldsema, false)
 	gp := getg()
 	gp.m.gcing = 1
-	onM(stoptheworld)
+	systemstack(stoptheworld)
 
-	gp.m.scalararg[0] = fd
-	onM(writeheapdump_m)
+	systemstack(func() {
+		writeheapdump_m(fd)
+	})
 
 	gp.m.gcing = 0
 	gp.m.locks++
 	semrelease(&worldsema)
-	onM(starttheworld)
+	systemstack(starttheworld)
 	gp.m.locks--
 }
diff --git a/src/runtime/mem_bsd.go b/src/runtime/mem_bsd.go
new file mode 100644
index 0000000..4bd40a3
--- /dev/null
+++ b/src/runtime/mem_bsd.go
@@ -0,0 +1,88 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build dragonfly freebsd netbsd openbsd solaris
+
+package runtime
+
+import "unsafe"
+
+//go:nosplit
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
+	v := unsafe.Pointer(mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+	if uintptr(v) < 4096 {
+		return nil
+	}
+	xadd64(stat, int64(n))
+	return v
+}
+
+func sysUnused(v unsafe.Pointer, n uintptr) {
+	madvise(v, n, _MADV_FREE)
+}
+
+func sysUsed(v unsafe.Pointer, n uintptr) {
+}
+
+func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
+	xadd64(stat, -int64(n))
+	munmap(v, n)
+}
+
+func sysFault(v unsafe.Pointer, n uintptr) {
+	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
+}
+
+func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// and check the assumption in SysMap.
+	if ptrSize == 8 && uint64(n) > 1<<32 {
+		*reserved = false
+		return v
+	}
+
+	p := unsafe.Pointer(mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+	if uintptr(p) < 4096 {
+		return nil
+	}
+	*reserved = true
+	return p
+}
+
+func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
+	const _ENOMEM = 12
+
+	xadd64(stat, int64(n))
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if !reserved {
+		flags := int32(_MAP_ANON | _MAP_PRIVATE)
+		if GOOS == "dragonfly" {
+			// TODO(jsing): For some reason DragonFly seems to return
+			// memory at a different address than we requested, even when
+			// there should be no reason for it to do so. This can be
+			// avoided by using MAP_FIXED, but I'm not sure we should need
+			// to do this - we do not on other platforms.
+			flags |= _MAP_FIXED
+		}
+		p := mmap(v, n, _PROT_READ|_PROT_WRITE, flags, -1, 0)
+		if uintptr(p) == _ENOMEM {
+			gothrow("runtime: out of memory")
+		}
+		if p != v {
+			print("runtime: address space conflict: map(", v, ") = ", p, "\n")
+			gothrow("runtime: address space conflict")
+		}
+		return
+	}
+
+	p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
+	if uintptr(p) == _ENOMEM {
+		gothrow("runtime: out of memory")
+	}
+	if p != v {
+		gothrow("runtime: cannot map pages in arena address space")
+	}
+}
diff --git a/src/runtime/mem_darwin.c b/src/runtime/mem_darwin.c
deleted file mode 100644
index bf3ede5..0000000
--- a/src/runtime/mem_darwin.c
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *v;
-
-	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(v < (void*)4096)
-		return nil;
-	runtime·xadd64(stat, n);
-	return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	// Linux's MADV_DONTNEED is like BSD's MADV_FREE.
-	runtime·madvise(v, n, MADV_FREE);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	*reserved = true;
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096)
-		return nil;
-	return p;
-}
-
-enum
-{
-	ENOMEM = 12,
-};
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	USED(reserved);
-
-	runtime·xadd64(stat, n);
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_darwin.go b/src/runtime/mem_darwin.go
new file mode 100644
index 0000000..1bee933
--- /dev/null
+++ b/src/runtime/mem_darwin.go
@@ -0,0 +1,58 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//go:nosplit
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
+	v := (unsafe.Pointer)(mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+	if uintptr(v) < 4096 {
+		return nil
+	}
+	xadd64(stat, int64(n))
+	return v
+}
+
+func sysUnused(v unsafe.Pointer, n uintptr) {
+	// Linux's MADV_DONTNEED is like BSD's MADV_FREE.
+	madvise(v, n, _MADV_FREE)
+}
+
+func sysUsed(v unsafe.Pointer, n uintptr) {
+}
+
+func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
+	xadd64(stat, -int64(n))
+	munmap(v, n)
+}
+
+func sysFault(v unsafe.Pointer, n uintptr) {
+	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
+}
+
+func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
+	*reserved = true
+	p := (unsafe.Pointer)(mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0))
+	if uintptr(p) < 4096 {
+		return nil
+	}
+	return p
+}
+
+const (
+	_ENOMEM = 12
+)
+
+func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
+	xadd64(stat, int64(n))
+	p := (unsafe.Pointer)(mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0))
+	if uintptr(p) == _ENOMEM {
+		gothrow("runtime: out of memory")
+	}
+	if p != v {
+		gothrow("runtime: cannot map pages in arena address space")
+	}
+}
diff --git a/src/runtime/mem_dragonfly.c b/src/runtime/mem_dragonfly.c
deleted file mode 100644
index 11457b2..0000000
--- a/src/runtime/mem_dragonfly.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-	ENOMEM = 12,
-};
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *v;
-
-	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(v < (void*)4096)
-		return nil;
-	runtime·xadd64(stat, n);
-	return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	runtime·madvise(v, n, MADV_FREE);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space.  Instead, assume that the reservation is okay
-	// and check the assumption in SysMap.
-	if(sizeof(void*) == 8 && n > 1LL<<32) {
-		*reserved = false;
-		return v;
-	}
-
-	*reserved = true;
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096)
-		return nil;
-	return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	runtime·xadd64(stat, n);
-
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if(!reserved) {
-		// TODO(jsing): For some reason DragonFly seems to return
-		// memory at a different address than we requested, even when
-		// there should be no reason for it to do so. This can be
-		// avoided by using MAP_FIXED, but I'm not sure we should need
-		// to do this - we do not on other platforms.
-		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-		if(p == (void*)ENOMEM)
-			runtime·throw("runtime: out of memory");
-		if(p != v) {
-			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-			runtime·throw("runtime: address space conflict");
-		}
-		return;
-	}
-
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_freebsd.c b/src/runtime/mem_freebsd.c
deleted file mode 100644
index 18a9a2f..0000000
--- a/src/runtime/mem_freebsd.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-	ENOMEM = 12,
-};
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *v;
-
-	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(v < (void*)4096)
-		return nil;
-	runtime·xadd64(stat, n);
-	return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	runtime·madvise(v, n, MADV_FREE);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space.  Instead, assume that the reservation is okay
-	// and check the assumption in SysMap.
-	if(sizeof(void*) == 8 && n > 1LL<<32) {
-		*reserved = false;
-		return v;
-	}
-
-	*reserved = true;
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096)
-		return nil;
-	return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	runtime·xadd64(stat, n);
-
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if(!reserved) {
-		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-		if(p == (void*)ENOMEM)
-			runtime·throw("runtime: out of memory");
-		if(p != v) {
-			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-			runtime·throw("runtime: address space conflict");
-		}
-		return;
-	}
-
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_linux.c b/src/runtime/mem_linux.c
deleted file mode 100644
index 52e02b3..0000000
--- a/src/runtime/mem_linux.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-	_PAGE_SIZE = PhysPageSize,
-	EACCES = 13,
-};
-
-static int32
-addrspace_free(void *v, uintptr n)
-{
-	int32 errval;
-	uintptr chunk;
-	uintptr off;
-	
-	// NOTE: vec must be just 1 byte long here.
-	// Mincore returns ENOMEM if any of the pages are unmapped,
-	// but we want to know that all of the pages are unmapped.
-	// To make these the same, we can only ask about one page
-	// at a time. See golang.org/issue/7476.
-	static byte vec[1];
-
-	for(off = 0; off < n; off += chunk) {
-		chunk = _PAGE_SIZE * sizeof vec;
-		if(chunk > (n - off))
-			chunk = n - off;
-		errval = runtime·mincore((int8*)v + off, chunk, vec);
-		// ENOMEM means unmapped, which is what we want.
-		// Anything else we assume means the pages are mapped.
-		if (errval != -ENOMEM && errval != ENOMEM) {
-			return 0;
-		}
-	}
-	return 1;
-}
-
-static void *
-mmap_fixed(byte *v, uintptr n, int32 prot, int32 flags, int32 fd, uint32 offset)
-{
-	void *p;
-
-	p = runtime·mmap(v, n, prot, flags, fd, offset);
-	if(p != v) {
-		if(p > (void*)4096) {
-			runtime·munmap(p, n);
-			p = nil;
-		}
-		// On some systems, mmap ignores v without
-		// MAP_FIXED, so retry if the address space is free.
-		if(addrspace_free(v, n))
-			p = runtime·mmap(v, n, prot, flags|MAP_FIXED, fd, offset);
-	}
-	return p;
-}
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *p;
-
-	p = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096) {
-		if(p == (void*)EACCES) {
-			runtime·printf("runtime: mmap: access denied\n");
-			runtime·printf("if you're running SELinux, enable execmem for this process.\n");
-			runtime·exit(2);
-		}
-		if(p == (void*)EAGAIN) {
-			runtime·printf("runtime: mmap: too much locked memory (check 'ulimit -l').\n");
-			runtime·exit(2);
-		}
-		return nil;
-	}
-	runtime·xadd64(stat, n);
-	return p;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	runtime·madvise(v, n, MADV_DONTNEED);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space.  Instead, assume that the reservation is okay
-	// if we can reserve at least 64K and check the assumption in SysMap.
-	// Only user-mode Linux (UML) rejects these requests.
-	if(sizeof(void*) == 8 && n > 1LL<<32) {
-		p = mmap_fixed(v, 64<<10, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-		if (p != v) {
-			if(p >= (void*)4096)
-				runtime·munmap(p, 64<<10);
-			return nil;
-		}
-		runtime·munmap(p, 64<<10);
-		*reserved = false;
-		return v;
-	}
-
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if((uintptr)p < 4096)
-		return nil;
-	*reserved = true;
-	return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	runtime·xadd64(stat, n);
-
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if(!reserved) {
-		p = mmap_fixed(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-		if(p == (void*)ENOMEM)
-			runtime·throw("runtime: out of memory");
-		if(p != v) {
-			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-			runtime·throw("runtime: address space conflict");
-		}
-		return;
-	}
-
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
new file mode 100644
index 0000000..85b55ef
--- /dev/null
+++ b/src/runtime/mem_linux.go
@@ -0,0 +1,135 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_PAGE_SIZE = _PhysPageSize
+	_EACCES    = 13
+)
+
+// NOTE: vec must be just 1 byte long here.
+// Mincore returns ENOMEM if any of the pages are unmapped,
+// but we want to know that all of the pages are unmapped.
+// To make these the same, we can only ask about one page
+// at a time. See golang.org/issue/7476.
+var addrspace_vec [1]byte
+
+func addrspace_free(v unsafe.Pointer, n uintptr) bool {
+	var chunk uintptr
+	for off := uintptr(0); off < n; off += chunk {
+		chunk = _PAGE_SIZE * uintptr(len(addrspace_vec))
+		if chunk > (n - off) {
+			chunk = n - off
+		}
+		errval := mincore(unsafe.Pointer(uintptr(v)+off), chunk, &addrspace_vec[0])
+		// ENOMEM means unmapped, which is what we want.
+		// Anything else we assume means the pages are mapped.
+		if errval != -_ENOMEM {
+			return false
+		}
+	}
+	return true
+}
+
+func mmap_fixed(v unsafe.Pointer, n uintptr, prot, flags, fd int32, offset uint32) unsafe.Pointer {
+	p := mmap(v, n, prot, flags, fd, offset)
+	// On some systems, mmap ignores v without
+	// MAP_FIXED, so retry if the address space is free.
+	if p != v && addrspace_free(v, n) {
+		if uintptr(p) > 4096 {
+			munmap(p, n)
+		}
+		p = mmap(v, n, prot, flags|_MAP_FIXED, fd, offset)
+	}
+	return p
+}
+
+//go:nosplit
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
+	p := mmap(nil, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+	if uintptr(p) < 4096 {
+		if uintptr(p) == _EACCES {
+			print("runtime: mmap: access denied\n")
+			print("if you're running SELinux, enable execmem for this process.\n")
+			exit(2)
+		}
+		if uintptr(p) == _EAGAIN {
+			print("runtime: mmap: too much locked memory (check 'ulimit -l').\n")
+			exit(2)
+		}
+		return nil
+	}
+	xadd64(stat, int64(n))
+	return p
+}
+
+func sysUnused(v unsafe.Pointer, n uintptr) {
+	madvise(v, n, _MADV_DONTNEED)
+}
+
+func sysUsed(v unsafe.Pointer, n uintptr) {
+}
+
+func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
+	xadd64(stat, -int64(n))
+	munmap(v, n)
+}
+
+func sysFault(v unsafe.Pointer, n uintptr) {
+	mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE|_MAP_FIXED, -1, 0)
+}
+
+func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
+	// On 64-bit, people with ulimit -v set complain if we reserve too
+	// much address space.  Instead, assume that the reservation is okay
+	// if we can reserve at least 64K and check the assumption in SysMap.
+	// Only user-mode Linux (UML) rejects these requests.
+	if ptrSize == 7 && uint64(n) > 1<<32 {
+		p := mmap_fixed(v, 64<<10, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+		if p != v {
+			if uintptr(p) >= 4096 {
+				munmap(p, 64<<10)
+			}
+			return nil
+		}
+		munmap(p, 64<<10)
+		*reserved = false
+		return v
+	}
+
+	p := mmap(v, n, _PROT_NONE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+	if uintptr(p) < 4096 {
+		return nil
+	}
+	*reserved = true
+	return p
+}
+
+func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
+	xadd64(stat, int64(n))
+
+	// On 64-bit, we don't actually have v reserved, so tread carefully.
+	if !reserved {
+		p := mmap_fixed(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_PRIVATE, -1, 0)
+		if uintptr(p) == _ENOMEM {
+			gothrow("runtime: out of memory")
+		}
+		if p != v {
+			print("runtime: address space conflict: map(", v, ") = ", p, "\n")
+			gothrow("runtime: address space conflict")
+		}
+		return
+	}
+
+	p := mmap(v, n, _PROT_READ|_PROT_WRITE, _MAP_ANON|_MAP_FIXED|_MAP_PRIVATE, -1, 0)
+	if uintptr(p) == _ENOMEM {
+		gothrow("runtime: out of memory")
+	}
+	if p != v {
+		gothrow("runtime: cannot map pages in arena address space")
+	}
+}
diff --git a/src/runtime/mem_netbsd.c b/src/runtime/mem_netbsd.c
deleted file mode 100644
index 31820e5..0000000
--- a/src/runtime/mem_netbsd.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-	ENOMEM = 12,
-};
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *v;
-
-	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(v < (void*)4096)
-		return nil;
-	runtime·xadd64(stat, n);
-	return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	runtime·madvise(v, n, MADV_FREE);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space.  Instead, assume that the reservation is okay
-	// and check the assumption in SysMap.
-	if(sizeof(void*) == 8 && n > 1LL<<32) {
-		*reserved = false;
-		return v;
-	}
-
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096)
-		return nil;
-	*reserved = true;
-	return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	runtime·xadd64(stat, n);
-
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if(!reserved) {
-		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-		if(p == (void*)ENOMEM)
-			runtime·throw("runtime: out of memory");
-		if(p != v) {
-			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-			runtime·throw("runtime: address space conflict");
-		}
-		return;
-	}
-
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_openbsd.c b/src/runtime/mem_openbsd.c
deleted file mode 100644
index 31820e5..0000000
--- a/src/runtime/mem_openbsd.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-	ENOMEM = 12,
-};
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *v;
-
-	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(v < (void*)4096)
-		return nil;
-	runtime·xadd64(stat, n);
-	return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	runtime·madvise(v, n, MADV_FREE);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space.  Instead, assume that the reservation is okay
-	// and check the assumption in SysMap.
-	if(sizeof(void*) == 8 && n > 1LL<<32) {
-		*reserved = false;
-		return v;
-	}
-
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096)
-		return nil;
-	*reserved = true;
-	return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	runtime·xadd64(stat, n);
-
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if(!reserved) {
-		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-		if(p == (void*)ENOMEM)
-			runtime·throw("runtime: out of memory");
-		if(p != v) {
-			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-			runtime·throw("runtime: address space conflict");
-		}
-		return;
-	}
-
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_solaris.c b/src/runtime/mem_solaris.c
deleted file mode 100644
index 8e90ba1..0000000
--- a/src/runtime/mem_solaris.c
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum
-{
-	ENOMEM = 12,
-};
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	void *v;
-
-	v = runtime·mmap(nil, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(v < (void*)4096)
-		return nil;
-	runtime·xadd64(stat, n);
-	return v;
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	USED(v);
-	USED(n);
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, -(uint64)n);
-	runtime·munmap(v, n);
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	void *p;
-
-	// On 64-bit, people with ulimit -v set complain if we reserve too
-	// much address space.  Instead, assume that the reservation is okay
-	// and check the assumption in SysMap.
-	if(sizeof(void*) == 8 && n > 1LL<<32) {
-		*reserved = false;
-		return v;
-	}
-	
-	p = runtime·mmap(v, n, PROT_NONE, MAP_ANON|MAP_PRIVATE, -1, 0);
-	if(p < (void*)4096)
-		return nil;
-	*reserved = true;
-	return p;
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-	
-	runtime·xadd64(stat, n);
-
-	// On 64-bit, we don't actually have v reserved, so tread carefully.
-	if(!reserved) {
-		p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
-		if(p == (void*)ENOMEM)
-			runtime·throw("runtime: out of memory");
-		if(p != v) {
-			runtime·printf("runtime: address space conflict: map(%p) = %p\n", v, p);
-			runtime·throw("runtime: address space conflict");
-		}
-		return;
-	}
-
-	p = runtime·mmap(v, n, PROT_READ|PROT_WRITE, MAP_ANON|MAP_FIXED|MAP_PRIVATE, -1, 0);
-	if(p == (void*)ENOMEM)
-		runtime·throw("runtime: out of memory");
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_windows.c b/src/runtime/mem_windows.c
deleted file mode 100644
index 6ea9920..0000000
--- a/src/runtime/mem_windows.c
+++ /dev/null
@@ -1,132 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "os_GOOS.h"
-#include "defs_GOOS_GOARCH.h"
-#include "malloc.h"
-#include "textflag.h"
-
-enum {
-	MEM_COMMIT = 0x1000,
-	MEM_RESERVE = 0x2000,
-	MEM_DECOMMIT = 0x4000,
-	MEM_RELEASE = 0x8000,
-	
-	PAGE_READWRITE = 0x0004,
-	PAGE_NOACCESS = 0x0001,
-};
-
-#pragma dynimport runtime·VirtualAlloc VirtualAlloc "kernel32.dll"
-#pragma dynimport runtime·VirtualFree VirtualFree "kernel32.dll"
-#pragma dynimport runtime·VirtualProtect VirtualProtect "kernel32.dll"
-extern void *runtime·VirtualAlloc;
-extern void *runtime·VirtualFree;
-extern void *runtime·VirtualProtect;
-
-#pragma textflag NOSPLIT
-void*
-runtime·sysAlloc(uintptr n, uint64 *stat)
-{
-	runtime·xadd64(stat, n);
-	return runtime·stdcall4(runtime·VirtualAlloc, 0, n, MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
-}
-
-void
-runtime·SysUnused(void *v, uintptr n)
-{
-	void *r;
-	uintptr small;
-
-	r = runtime·stdcall3(runtime·VirtualFree, (uintptr)v, n, MEM_DECOMMIT);
-	if(r != nil)
-		return;
-
-	// Decommit failed. Usual reason is that we've merged memory from two different
-	// VirtualAlloc calls, and Windows will only let each VirtualFree handle pages from
-	// a single VirtualAlloc. It is okay to specify a subset of the pages from a single alloc,
-	// just not pages from multiple allocs. This is a rare case, arising only when we're
-	// trying to give memory back to the operating system, which happens on a time
-	// scale of minutes. It doesn't have to be terribly fast. Instead of extra bookkeeping
-	// on all our VirtualAlloc calls, try freeing successively smaller pieces until
-	// we manage to free something, and then repeat. This ends up being O(n log n)
-	// in the worst case, but that's fast enough.
-	while(n > 0) {
-		small = n;
-		while(small >= 4096 && runtime·stdcall3(runtime·VirtualFree, (uintptr)v, small, MEM_DECOMMIT) == nil)
-			small = (small / 2) & ~(4096-1);
-		if(small < 4096)
-			runtime·throw("runtime: failed to decommit pages");
-		v = (byte*)v + small;
-		n -= small;
-	}
-}
-
-void
-runtime·SysUsed(void *v, uintptr n)
-{
-	void *r;
-	uintptr small;
-
-	r = runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, n, MEM_COMMIT, PAGE_READWRITE);
-	if(r != v)
-		runtime·throw("runtime: failed to commit pages");
-
-	// Commit failed. See SysUnused.
-	while(n > 0) {
-		small = n;
-		while(small >= 4096 && runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, small, MEM_COMMIT, PAGE_READWRITE) == nil)
-			small = (small / 2) & ~(4096-1);
-		if(small < 4096)
-			runtime·throw("runtime: failed to decommit pages");
-		v = (byte*)v + small;
-		n -= small;
-	}
-}
-
-void
-runtime·SysFree(void *v, uintptr n, uint64 *stat)
-{
-	uintptr r;
-
-	runtime·xadd64(stat, -(uint64)n);
-	r = (uintptr)runtime·stdcall3(runtime·VirtualFree, (uintptr)v, 0, MEM_RELEASE);
-	if(r == 0)
-		runtime·throw("runtime: failed to release pages");
-}
-
-void
-runtime·SysFault(void *v, uintptr n)
-{
-	// SysUnused makes the memory inaccessible and prevents its reuse
-	runtime·SysUnused(v, n);
-}
-
-void*
-runtime·SysReserve(void *v, uintptr n, bool *reserved)
-{
-	*reserved = true;
-	// v is just a hint.
-	// First try at v.
-	v = runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, n, MEM_RESERVE, PAGE_READWRITE);
-	if(v != nil)
-		return v;
-	
-	// Next let the kernel choose the address.
-	return runtime·stdcall4(runtime·VirtualAlloc, 0, n, MEM_RESERVE, PAGE_READWRITE);
-}
-
-void
-runtime·SysMap(void *v, uintptr n, bool reserved, uint64 *stat)
-{
-	void *p;
-
-	USED(reserved);
-
-	runtime·xadd64(stat, n);
-	p = runtime·stdcall4(runtime·VirtualAlloc, (uintptr)v, n, MEM_COMMIT, PAGE_READWRITE);
-	if(p != v)
-		runtime·throw("runtime: cannot map pages in arena address space");
-}
diff --git a/src/runtime/mem_windows.go b/src/runtime/mem_windows.go
new file mode 100644
index 0000000..a1dcad0
--- /dev/null
+++ b/src/runtime/mem_windows.go
@@ -0,0 +1,119 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+const (
+	_MEM_COMMIT   = 0x1000
+	_MEM_RESERVE  = 0x2000
+	_MEM_DECOMMIT = 0x4000
+	_MEM_RELEASE  = 0x8000
+
+	_PAGE_READWRITE = 0x0004
+	_PAGE_NOACCESS  = 0x0001
+)
+
+//go:cgo_import_dynamic runtime._VirtualAlloc VirtualAlloc "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualFree VirtualFree "kernel32.dll"
+//go:cgo_import_dynamic runtime._VirtualProtect VirtualProtect "kernel32.dll"
+
+var (
+	_VirtualAlloc,
+	_VirtualFree,
+	_VirtualProtect stdFunction
+)
+
+//go:nosplit
+func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer {
+	xadd64(stat, int64(n))
+	return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_COMMIT|_MEM_RESERVE, _PAGE_READWRITE))
+}
+
+func sysUnused(v unsafe.Pointer, n uintptr) {
+	r := stdcall3(_VirtualFree, uintptr(v), n, _MEM_DECOMMIT)
+	if r != 0 {
+		return
+	}
+
+	// Decommit failed. Usual reason is that we've merged memory from two different
+	// VirtualAlloc calls, and Windows will only let each VirtualFree handle pages from
+	// a single VirtualAlloc. It is okay to specify a subset of the pages from a single alloc,
+	// just not pages from multiple allocs. This is a rare case, arising only when we're
+	// trying to give memory back to the operating system, which happens on a time
+	// scale of minutes. It doesn't have to be terribly fast. Instead of extra bookkeeping
+	// on all our VirtualAlloc calls, try freeing successively smaller pieces until
+	// we manage to free something, and then repeat. This ends up being O(n log n)
+	// in the worst case, but that's fast enough.
+	for n > 0 {
+		small := n
+		for small >= 4096 && stdcall3(_VirtualFree, uintptr(v), small, _MEM_DECOMMIT) == 0 {
+			small /= 2
+			small &^= 4096 - 1
+		}
+		if small < 4096 {
+			gothrow("runtime: failed to decommit pages")
+		}
+		v = add(v, small)
+		n -= small
+	}
+}
+
+func sysUsed(v unsafe.Pointer, n uintptr) {
+	r := stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_COMMIT, _PAGE_READWRITE)
+	if r != uintptr(v) {
+		gothrow("runtime: failed to commit pages")
+	}
+
+	// Commit failed. See SysUnused.
+	for n > 0 {
+		small := n
+		for small >= 4096 && stdcall4(_VirtualAlloc, uintptr(v), small, _MEM_COMMIT, _PAGE_READWRITE) == 0 {
+			small /= 2
+			small &^= 4096 - 1
+		}
+		if small < 4096 {
+			gothrow("runtime: failed to decommit pages")
+		}
+		v = add(v, small)
+		n -= small
+	}
+}
+
+func sysFree(v unsafe.Pointer, n uintptr, stat *uint64) {
+	xadd64(stat, -int64(n))
+	r := stdcall3(_VirtualFree, uintptr(v), 0, _MEM_RELEASE)
+	if r == 0 {
+		gothrow("runtime: failed to release pages")
+	}
+}
+
+func sysFault(v unsafe.Pointer, n uintptr) {
+	// SysUnused makes the memory inaccessible and prevents its reuse
+	sysUnused(v, n)
+}
+
+func sysReserve(v unsafe.Pointer, n uintptr, reserved *bool) unsafe.Pointer {
+	*reserved = true
+	// v is just a hint.
+	// First try at v.
+	v = unsafe.Pointer(stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_RESERVE, _PAGE_READWRITE))
+	if v != nil {
+		return v
+	}
+
+	// Next let the kernel choose the address.
+	return unsafe.Pointer(stdcall4(_VirtualAlloc, 0, n, _MEM_RESERVE, _PAGE_READWRITE))
+}
+
+func sysMap(v unsafe.Pointer, n uintptr, reserved bool, stat *uint64) {
+	xadd64(stat, int64(n))
+	p := stdcall4(_VirtualAlloc, uintptr(v), n, _MEM_COMMIT, _PAGE_READWRITE)
+	if p != uintptr(v) {
+		gothrow("runtime: cannot map pages in arena address space")
+	}
+}
diff --git a/src/runtime/mfixalloc.c b/src/runtime/mfixalloc.c
deleted file mode 100644
index d670629..0000000
--- a/src/runtime/mfixalloc.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Fixed-size object allocator.  Returned memory is not zeroed.
-//
-// See malloc.h for overview.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-// Initialize f to allocate objects of the given size,
-// using the allocator to obtain chunks of memory.
-void
-runtime·FixAlloc_Init(FixAlloc *f, uintptr size, void (*first)(void*, byte*), void *arg, uint64 *stat)
-{
-	f->size = size;
-	f->first = first;
-	f->arg = arg;
-	f->list = nil;
-	f->chunk = nil;
-	f->nchunk = 0;
-	f->inuse = 0;
-	f->stat = stat;
-}
-
-void*
-runtime·FixAlloc_Alloc(FixAlloc *f)
-{
-	void *v;
-	
-	if(f->size == 0) {
-		runtime·printf("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n");
-		runtime·throw("runtime: internal error");
-	}
-
-	if(f->list) {
-		v = f->list;
-		f->list = *(void**)f->list;
-		f->inuse += f->size;
-		return v;
-	}
-	if(f->nchunk < f->size) {
-		f->chunk = runtime·persistentalloc(FixAllocChunk, 0, f->stat);
-		f->nchunk = FixAllocChunk;
-	}
-	v = f->chunk;
-	if(f->first)
-		f->first(f->arg, v);
-	f->chunk += f->size;
-	f->nchunk -= f->size;
-	f->inuse += f->size;
-	return v;
-}
-
-void
-runtime·FixAlloc_Free(FixAlloc *f, void *p)
-{
-	f->inuse -= f->size;
-	*(void**)p = f->list;
-	f->list = p;
-}
-
diff --git a/src/runtime/mfixalloc.go b/src/runtime/mfixalloc.go
new file mode 100644
index 0000000..b66a17e
--- /dev/null
+++ b/src/runtime/mfixalloc.go
@@ -0,0 +1,59 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Fixed-size object allocator.  Returned memory is not zeroed.
+//
+// See malloc.h for overview.
+
+package runtime
+
+import "unsafe"
+
+// Initialize f to allocate objects of the given size,
+// using the allocator to obtain chunks of memory.
+func fixAlloc_Init(f *fixalloc, size uintptr, first func(unsafe.Pointer, unsafe.Pointer), arg unsafe.Pointer, stat *uint64) {
+	f.size = size
+	f.first = *(*unsafe.Pointer)(unsafe.Pointer(&first))
+	f.arg = arg
+	f.list = nil
+	f.chunk = nil
+	f.nchunk = 0
+	f.inuse = 0
+	f.stat = stat
+}
+
+func fixAlloc_Alloc(f *fixalloc) unsafe.Pointer {
+	if f.size == 0 {
+		print("runtime: use of FixAlloc_Alloc before FixAlloc_Init\n")
+		gothrow("runtime: internal error")
+	}
+
+	if f.list != nil {
+		v := unsafe.Pointer(f.list)
+		f.list = f.list.next
+		f.inuse += f.size
+		return v
+	}
+	if uintptr(f.nchunk) < f.size {
+		f.chunk = (*uint8)(persistentalloc(_FixAllocChunk, 0, f.stat))
+		f.nchunk = _FixAllocChunk
+	}
+
+	v := (unsafe.Pointer)(f.chunk)
+	if f.first != nil {
+		fn := *(*func(unsafe.Pointer, unsafe.Pointer))(unsafe.Pointer(&f.first))
+		fn(f.arg, v)
+	}
+	f.chunk = (*byte)(add(unsafe.Pointer(f.chunk), f.size))
+	f.nchunk -= uint32(f.size)
+	f.inuse += f.size
+	return v
+}
+
+func fixAlloc_Free(f *fixalloc, p unsafe.Pointer) {
+	f.inuse -= f.size
+	v := (*mlink)(p)
+	v.next = f.list
+	f.list = v
+}
diff --git a/src/runtime/mgc.go b/src/runtime/mgc.go
new file mode 100644
index 0000000..57bd8b3
--- /dev/null
+++ b/src/runtime/mgc.go
@@ -0,0 +1,2422 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// TODO(rsc): The code having to do with the heap bitmap needs very serious cleanup.
+// It has gotten completely out of control.
+
+// Garbage collector (GC).
+//
+// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC
+// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is
+// non-generational and non-compacting. Allocation is done using size segregated per P allocation
+// areas to minimize fragmentation while eliminating locks in the common case.
+//
+// The algorithm decomposes into several steps.
+// This is a high level description of the algorithm being used. For an overview of GC a good
+// place to start is Richard Jones' gchandbook.org.
+//
+// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see
+// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978.
+// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975.
+// For journal quality proofs that these steps are complete, correct, and terminate see
+// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world.
+// Concurrency and Computation: Practice and Experience 15(3-5), 2003.
+//
+//  0. Set phase = GCscan from GCoff.
+//  1. Wait for all P's to acknowledge phase change.
+//         At this point all goroutines have passed through a GC safepoint and
+//         know we are in the GCscan phase.
+//  2. GC scans all goroutine stacks, mark and enqueues all encountered pointers
+//       (marking avoids most duplicate enqueuing but races may produce duplication which is benign).
+//       Preempted goroutines are scanned before P schedules next goroutine.
+//  3. Set phase = GCmark.
+//  4. Wait for all P's to acknowledge phase change.
+//  5. Now write barrier marks and enqueues black, grey, or white to white pointers.
+//       Malloc still allocates white (non-marked) objects.
+//  6. Meanwhile GC transitively walks the heap marking reachable objects.
+//  7. When GC finishes marking heap, it preempts P's one-by-one and
+//       retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine
+//       currently scheduled on the P).
+//  8. Once the GC has exhausted all available marking work it sets phase = marktermination.
+//  9. Wait for all P's to acknowledge phase change.
+// 10. Malloc now allocates black objects, so number of unmarked reachable objects
+//        monotonically decreases.
+// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects.
+// 12. When GC completes a full cycle over P's and discovers no new grey
+//         objects, (which means all reachable objects are marked) set phase = GCsweep.
+// 13. Wait for all P's to acknowledge phase change.
+// 14. Now malloc allocates white (but sweeps spans before use).
+//         Write barrier becomes nop.
+// 15. GC does background sweeping, see description below.
+// 16. When sweeping is complete set phase to GCoff.
+// 17. When sufficient allocation has taken place replay the sequence starting at 0 above,
+//         see discussion of GC rate below.
+
+// Changing phases.
+// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase.
+// All phase action must be benign in the presence of a change.
+// Starting with GCoff
+// GCoff to GCscan
+//     GSscan scans stacks and globals greying them and never marks an object black.
+//     Once all the P's are aware of the new phase they will scan gs on preemption.
+//     This means that the scanning of preempted gs can't start until all the Ps
+//     have acknowledged.
+// GCscan to GCmark
+//     GCMark turns on the write barrier which also only greys objects. No scanning
+//     of objects (making them black) can happen until all the Ps have acknowledged
+//     the phase change.
+// GCmark to GCmarktermination
+//     The only change here is that we start allocating black so the Ps must acknowledge
+//     the change before we begin the termination algorithm
+// GCmarktermination to GSsweep
+//     Object currently on the freelist must be marked black for this to work.
+//     Are things on the free lists black or white? How does the sweep phase work?
+
+// Concurrent sweep.
+// The sweep phase proceeds concurrently with normal program execution.
+// The heap is swept span-by-span both lazily (when a goroutine needs another span)
+// and concurrently in a background goroutine (this helps programs that are not CPU bound).
+// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
+// and so next_gc calculation is tricky and happens as follows.
+// At the end of the stop-the-world phase next_gc is conservatively set based on total
+// heap size; all spans are marked as "needs sweeping".
+// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
+// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
+// closer to the target value. However, this is not enough to avoid over-allocating memory.
+// Consider that a goroutine wants to allocate a new span for a large object and
+// there are no free swept spans, but there are small-object unswept spans.
+// If the goroutine naively allocates a new span, it can surpass the yet-unknown
+// target next_gc value. In order to prevent such cases (1) when a goroutine needs
+// to allocate a new small-object span, it sweeps small-object spans for the same
+// object size until it frees at least one object; (2) when a goroutine needs to
+// allocate large-object span from heap, it sweeps spans until it frees at least
+// that many pages into heap. Together these two measures ensure that we don't surpass
+// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
+// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
+// but there can still be other one-page unswept spans which could be combined into a two-page span.
+// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
+// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
+// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
+// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
+// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
+// The finalizer goroutine is kicked off only when all spans are swept.
+// When the next GC starts, it sweeps all not-yet-swept spans (if any).
+
+// GC rate.
+// Next GC is after we've allocated an extra amount of memory proportional to
+// the amount already in use. The proportion is controlled by GOGC environment variable
+// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
+// (this mark is tracked in next_gc variable). This keeps the GC cost in linear
+// proportion to the allocation cost. Adjusting GOGC just changes the linear constant
+// (and also the amount of extra memory used).
+
+package runtime
+
+import "unsafe"
+
+const (
+	_DebugGC         = 0
+	_DebugGCPtrs     = false // if true, print trace of every pointer load during GC
+	_ConcurrentSweep = true
+
+	_WorkbufSize     = 4 * 1024
+	_FinBlockSize    = 4 * 1024
+	_RootData        = 0
+	_RootBss         = 1
+	_RootFinalizers  = 2
+	_RootSpans       = 3
+	_RootFlushCaches = 4
+	_RootCount       = 5
+)
+
+// ptrmask for an allocation containing a single pointer.
+var oneptr = [...]uint8{bitsPointer}
+
+// Initialized from $GOGC.  GOGC=off means no GC.
+var gcpercent int32
+
+// Holding worldsema grants an M the right to try to stop the world.
+// The procedure is:
+//
+//	semacquire(&worldsema);
+//	m.gcing = 1;
+//	stoptheworld();
+//
+//	... do stuff ...
+//
+//	m.gcing = 0;
+//	semrelease(&worldsema);
+//	starttheworld();
+//
+var worldsema uint32 = 1
+
+// It is a bug if bits does not have bitBoundary set but
+// there are still some cases where this happens related
+// to stack spans.
+type markbits struct {
+	bitp  *byte   // pointer to the byte holding xbits
+	shift uintptr // bits xbits needs to be shifted to get bits
+	xbits byte    // byte holding all the bits from *bitp
+	bits  byte    // mark and boundary bits relevant to corresponding slot.
+	tbits byte    // pointer||scalar bits relevant to corresponding slot.
+}
+
+type workbuf struct {
+	node lfnode // must be first
+	nobj uintptr
+	obj  [(_WorkbufSize - unsafe.Sizeof(lfnode{}) - ptrSize) / ptrSize]uintptr
+}
+
+var data, edata, bss, ebss, gcdata, gcbss struct{}
+
+var finlock mutex  // protects the following variables
+var fing *g        // goroutine that runs finalizers
+var finq *finblock // list of finalizers that are to be executed
+var finc *finblock // cache of free blocks
+var finptrmask [_FinBlockSize / ptrSize / pointersPerByte]byte
+var fingwait bool
+var fingwake bool
+var allfin *finblock // list of all blocks
+
+var gcdatamask bitvector
+var gcbssmask bitvector
+
+var gclock mutex
+
+var badblock [1024]uintptr
+var nbadblock int32
+
+type workdata struct {
+	full    uint64                // lock-free list of full blocks
+	empty   uint64                // lock-free list of empty blocks
+	partial uint64                // lock-free list of partially filled blocks
+	pad0    [_CacheLineSize]uint8 // prevents false-sharing between full/empty and nproc/nwait
+	nproc   uint32
+	tstart  int64
+	nwait   uint32
+	ndone   uint32
+	alldone note
+	markfor *parfor
+
+	// Copy of mheap.allspans for marker or sweeper.
+	spans []*mspan
+}
+
+var work workdata
+
+//go:linkname weak_cgo_allocate go.weak.runtime._cgo_allocate_internal
+var weak_cgo_allocate byte
+
+// Is _cgo_allocate linked into the binary?
+func have_cgo_allocate() bool {
+	return &weak_cgo_allocate != nil
+}
+
+// To help debug the concurrent GC we remark with the world
+// stopped ensuring that any object encountered has their normal
+// mark bit set. To do this we use an orthogonal bit
+// pattern to indicate the object is marked. The following pattern
+// uses the upper two bits in the object's bounday nibble.
+// 01: scalar  not marked
+// 10: pointer not marked
+// 11: pointer     marked
+// 00: scalar      marked
+// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
+// The higher bit is 1 for pointers and 0 for scalars, whether the object
+// is marked or not.
+// The first nibble no longer holds the bitsDead pattern indicating that the
+// there are no more pointers in the object. This information is held
+// in the second nibble.
+
+// When marking an object if the bool checkmark is true one uses the above
+// encoding, otherwise one uses the bitMarked bit in the lower two bits
+// of the nibble.
+var (
+	checkmark         = false
+	gccheckmarkenable = true
+)
+
+// Is address b in the known heap. If it doesn't have a valid gcmap
+// returns false. For example pointers into stacks will return false.
+func inheap(b uintptr) bool {
+	if b == 0 || b < mheap_.arena_start || b >= mheap_.arena_used {
+		return false
+	}
+	// Not a beginning of a block, consult span table to find the block beginning.
+	k := b >> _PageShift
+	x := k
+	x -= mheap_.arena_start >> _PageShift
+	s := h_spans[x]
+	if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
+		return false
+	}
+	return true
+}
+
+// Given an address in the heap return the relevant byte from the gcmap. This routine
+// can be used on addresses to the start of an object or to the interior of the an object.
+func slottombits(obj uintptr, mbits *markbits) {
+	off := (obj&^(ptrSize-1) - mheap_.arena_start) / ptrSize
+	mbits.bitp = (*byte)(unsafe.Pointer(mheap_.arena_start - off/wordsPerBitmapByte - 1))
+	mbits.shift = off % wordsPerBitmapByte * gcBits
+	mbits.xbits = *mbits.bitp
+	mbits.bits = (mbits.xbits >> mbits.shift) & bitMask
+	mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2
+}
+
+// b is a pointer into the heap.
+// Find the start of the object refered to by b.
+// Set mbits to the associated bits from the bit map.
+// If b is not a valid heap object return nil and
+// undefined values in mbits.
+func objectstart(b uintptr, mbits *markbits) uintptr {
+	obj := b &^ (ptrSize - 1)
+	for {
+		slottombits(obj, mbits)
+		if mbits.bits&bitBoundary == bitBoundary {
+			break
+		}
+
+		// Not a beginning of a block, consult span table to find the block beginning.
+		k := b >> _PageShift
+		x := k
+		x -= mheap_.arena_start >> _PageShift
+		s := h_spans[x]
+		if s == nil || pageID(k) < s.start || b >= s.limit || s.state != mSpanInUse {
+			if s != nil && s.state == _MSpanStack {
+				return 0 // This is legit.
+			}
+
+			// The following ensures that we are rigorous about what data
+			// structures hold valid pointers
+			if false {
+				// Still happens sometimes. We don't know why.
+				printlock()
+				print("runtime:objectstart Span weird: obj=", hex(obj), " k=", hex(k))
+				if s == nil {
+					print(" s=nil\n")
+				} else {
+					print(" s.start=", hex(s.start<<_PageShift), " s.limit=", hex(s.limit), " s.state=", s.state, "\n")
+				}
+				printunlock()
+				gothrow("objectstart: bad pointer in unexpected span")
+			}
+			return 0
+		}
+
+		p := uintptr(s.start) << _PageShift
+		if s.sizeclass != 0 {
+			size := s.elemsize
+			idx := (obj - p) / size
+			p = p + idx*size
+		}
+		if p == obj {
+			print("runtime: failed to find block beginning for ", hex(p), " s=", hex(s.start*_PageSize), " s.limit=", s.limit, "\n")
+			gothrow("failed to find block beginning")
+		}
+		obj = p
+	}
+
+	// if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit
+	// Clear any low bits to get to the start of the object.
+	// greyobject depends on this.
+	return obj
+}
+
+// Slow for now as we serialize this, since this is on a debug path
+// speed is not critical at this point.
+var andlock mutex
+
+func atomicand8(src *byte, val byte) {
+	lock(&andlock)
+	*src &= val
+	unlock(&andlock)
+}
+
+// Mark using the checkmark scheme.
+func docheckmark(mbits *markbits) {
+	// xor 01 moves 01(scalar unmarked) to 00(scalar marked)
+	// and 10(pointer unmarked) to 11(pointer marked)
+	if mbits.tbits == _BitsScalar {
+		atomicand8(mbits.bitp, ^byte(_BitsCheckMarkXor<<mbits.shift<<2))
+	} else if mbits.tbits == _BitsPointer {
+		atomicor8(mbits.bitp, byte(_BitsCheckMarkXor<<mbits.shift<<2))
+	}
+
+	// reload bits for ischeckmarked
+	mbits.xbits = *mbits.bitp
+	mbits.bits = (mbits.xbits >> mbits.shift) & bitMask
+	mbits.tbits = ((mbits.xbits >> mbits.shift) & bitPtrMask) >> 2
+}
+
+// In the default scheme does mbits refer to a marked object.
+func ismarked(mbits *markbits) bool {
+	if mbits.bits&bitBoundary != bitBoundary {
+		gothrow("ismarked: bits should have boundary bit set")
+	}
+	return mbits.bits&bitMarked == bitMarked
+}
+
+// In the checkmark scheme does mbits refer to a marked object.
+func ischeckmarked(mbits *markbits) bool {
+	if mbits.bits&bitBoundary != bitBoundary {
+		gothrow("ischeckmarked: bits should have boundary bit set")
+	}
+	return mbits.tbits == _BitsScalarMarked || mbits.tbits == _BitsPointerMarked
+}
+
+// When in GCmarkterminate phase we allocate black.
+func gcmarknewobject_m(obj uintptr) {
+	if gcphase != _GCmarktermination {
+		gothrow("marking new object while not in mark termination phase")
+	}
+	if checkmark { // The world should be stopped so this should not happen.
+		gothrow("gcmarknewobject called while doing checkmark")
+	}
+
+	var mbits markbits
+	slottombits(obj, &mbits)
+	if mbits.bits&bitMarked != 0 {
+		return
+	}
+
+	// Each byte of GC bitmap holds info for two words.
+	// If the current object is larger than two words, or if the object is one word
+	// but the object it shares the byte with is already marked,
+	// then all the possible concurrent updates are trying to set the same bit,
+	// so we can use a non-atomic update.
+	if mbits.xbits&(bitMask|(bitMask<<gcBits)) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
+		*mbits.bitp = mbits.xbits | bitMarked<<mbits.shift
+	} else {
+		atomicor8(mbits.bitp, bitMarked<<mbits.shift)
+	}
+}
+
+// obj is the start of an object with mark mbits.
+// If it isn't already marked, mark it and enqueue into workbuf.
+// Return possibly new workbuf to use.
+func greyobject(obj uintptr, mbits *markbits, wbuf *workbuf) *workbuf {
+	// obj should be start of allocation, and so must be at least pointer-aligned.
+	if obj&(ptrSize-1) != 0 {
+		gothrow("greyobject: obj not pointer-aligned")
+	}
+
+	if checkmark {
+		if !ismarked(mbits) {
+			print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), ", mbits->bits=", hex(mbits.bits), " *mbits->bitp=", hex(*mbits.bitp), "\n")
+
+			k := obj >> _PageShift
+			x := k
+			x -= mheap_.arena_start >> _PageShift
+			s := h_spans[x]
+			printlock()
+			print("runtime:greyobject Span: obj=", hex(obj), " k=", hex(k))
+			if s == nil {
+				print(" s=nil\n")
+			} else {
+				print(" s.start=", hex(s.start*_PageSize), " s.limit=", hex(s.limit), " s.sizeclass=", s.sizeclass, " s.elemsize=", s.elemsize, "\n")
+				// NOTE(rsc): This code is using s.sizeclass as an approximation of the
+				// number of pointer-sized words in an object. Perhaps not what was intended.
+				for i := 0; i < int(s.sizeclass); i++ {
+					print(" *(obj+", i*ptrSize, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + uintptr(i)*ptrSize))), "\n")
+				}
+			}
+			gothrow("checkmark found unmarked object")
+		}
+		if ischeckmarked(mbits) {
+			return wbuf
+		}
+		docheckmark(mbits)
+		if !ischeckmarked(mbits) {
+			print("mbits xbits=", hex(mbits.xbits), " bits=", hex(mbits.bits), " tbits=", hex(mbits.tbits), " shift=", mbits.shift, "\n")
+			gothrow("docheckmark and ischeckmarked disagree")
+		}
+	} else {
+		// If marked we have nothing to do.
+		if mbits.bits&bitMarked != 0 {
+			return wbuf
+		}
+
+		// Each byte of GC bitmap holds info for two words.
+		// If the current object is larger than two words, or if the object is one word
+		// but the object it shares the byte with is already marked,
+		// then all the possible concurrent updates are trying to set the same bit,
+		// so we can use a non-atomic update.
+		if mbits.xbits&(bitMask|bitMask<<gcBits) != bitBoundary|bitBoundary<<gcBits || work.nproc == 1 {
+			*mbits.bitp = mbits.xbits | bitMarked<<mbits.shift
+		} else {
+			atomicor8(mbits.bitp, bitMarked<<mbits.shift)
+		}
+	}
+
+	if !checkmark && (mbits.xbits>>(mbits.shift+2))&_BitsMask == _BitsDead {
+		return wbuf // noscan object
+	}
+
+	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
+	// seems like a nice optimization that can be added back in.
+	// There needs to be time between the PREFETCH and the use.
+	// Previously we put the obj in an 8 element buffer that is drained at a rate
+	// to give the PREFETCH time to do its work.
+	// Use of PREFETCHNTA might be more appropriate than PREFETCH
+
+	// If workbuf is full, obtain an empty one.
+	if wbuf.nobj >= uintptr(len(wbuf.obj)) {
+		wbuf = getempty(wbuf)
+	}
+
+	wbuf.obj[wbuf.nobj] = obj
+	wbuf.nobj++
+	return wbuf
+}
+
+// Scan the object b of size n, adding pointers to wbuf.
+// Return possibly new wbuf to use.
+// If ptrmask != nil, it specifies where pointers are in b.
+// If ptrmask == nil, the GC bitmap should be consulted.
+// In this case, n may be an overestimate of the size; the GC bitmap
+// must also be used to make sure the scan stops at the end of b.
+func scanobject(b, n uintptr, ptrmask *uint8, wbuf *workbuf) *workbuf {
+	arena_start := mheap_.arena_start
+	arena_used := mheap_.arena_used
+
+	// Find bits of the beginning of the object.
+	var ptrbitp unsafe.Pointer
+	var mbits markbits
+	if ptrmask == nil {
+		b = objectstart(b, &mbits)
+		if b == 0 {
+			return wbuf
+		}
+		ptrbitp = unsafe.Pointer(mbits.bitp)
+	}
+	for i := uintptr(0); i < n; i += ptrSize {
+		// Find bits for this word.
+		var bits uintptr
+		if ptrmask != nil {
+			// dense mask (stack or data)
+			bits = (uintptr(*(*byte)(add(unsafe.Pointer(ptrmask), (i/ptrSize)/4))) >> (((i / ptrSize) % 4) * bitsPerPointer)) & bitsMask
+		} else {
+			// Check if we have reached end of span.
+			// n is an overestimate of the size of the object.
+			if (b+i)%_PageSize == 0 && h_spans[(b-arena_start)>>_PageShift] != h_spans[(b+i-arena_start)>>_PageShift] {
+				break
+			}
+
+			// Consult GC bitmap.
+			bits = uintptr(*(*byte)(ptrbitp))
+			if wordsPerBitmapByte != 2 {
+				gothrow("alg doesn't work for wordsPerBitmapByte != 2")
+			}
+			j := (uintptr(b) + i) / ptrSize & 1 // j indicates upper nibble or lower nibble
+			bits >>= gcBits * j
+			if i == 0 {
+				bits &^= bitBoundary
+			}
+			ptrbitp = add(ptrbitp, -j)
+
+			if bits&bitBoundary != 0 && i != 0 {
+				break // reached beginning of the next object
+			}
+			bits = (bits & bitPtrMask) >> 2 // bits refer to the type bits.
+
+			if i != 0 && bits == bitsDead { // BitsDead in first nibble not valid during checkmark
+				break // reached no-scan part of the object
+			}
+		}
+
+		if bits <= _BitsScalar { // _BitsScalar, _BitsDead, _BitsScalarMarked
+			continue
+		}
+
+		if bits&_BitsPointer != _BitsPointer {
+			print("gc checkmark=", checkmark, " b=", hex(b), " ptrmask=", ptrmask, " mbits.bitp=", mbits.bitp, " mbits.xbits=", hex(mbits.xbits), " bits=", hex(bits), "\n")
+			gothrow("unexpected garbage collection bits")
+		}
+
+		obj := *(*uintptr)(unsafe.Pointer(b + i))
+
+		// At this point we have extracted the next potential pointer.
+		// Check if it points into heap.
+		if obj == 0 || obj < arena_start || obj >= arena_used {
+			continue
+		}
+
+		// Mark the object. return some important bits.
+		// We we combine the following two rotines we don't have to pass mbits or obj around.
+		var mbits markbits
+		obj = objectstart(obj, &mbits)
+		if obj == 0 {
+			continue
+		}
+		wbuf = greyobject(obj, &mbits, wbuf)
+	}
+	return wbuf
+}
+
+// scanblock starts by scanning b as scanobject would.
+// If the gcphase is GCscan, that's all scanblock does.
+// Otherwise it traverses some fraction of the pointers it found in b, recursively.
+// As a special case, scanblock(nil, 0, nil) means to scan previously queued work,
+// stopping only when no work is left in the system.
+func scanblock(b, n uintptr, ptrmask *uint8) {
+	wbuf := getpartialorempty()
+	if b != 0 {
+		wbuf = scanobject(b, n, ptrmask, wbuf)
+		if gcphase == _GCscan {
+			if inheap(b) && ptrmask == nil {
+				// b is in heap, we are in GCscan so there should be a ptrmask.
+				gothrow("scanblock: In GCscan phase and inheap is true.")
+			}
+			// GCscan only goes one level deep since mark wb not turned on.
+			putpartial(wbuf)
+			return
+		}
+	}
+	if gcphase == _GCscan {
+		gothrow("scanblock: In GCscan phase but no b passed in.")
+	}
+
+	keepworking := b == 0
+
+	// ptrmask can have 2 possible values:
+	// 1. nil - obtain pointer mask from GC bitmap.
+	// 2. pointer to a compact mask (for stacks and data).
+	for {
+		if wbuf.nobj == 0 {
+			if !keepworking {
+				putempty(wbuf)
+				return
+			}
+			// Refill workbuf from global queue.
+			wbuf = getfull(wbuf)
+			if wbuf == nil { // nil means out of work barrier reached
+				return
+			}
+
+			if wbuf.nobj <= 0 {
+				gothrow("runtime:scanblock getfull returns empty buffer")
+			}
+		}
+
+		// If another proc wants a pointer, give it some.
+		if work.nwait > 0 && wbuf.nobj > 4 && work.full == 0 {
+			wbuf = handoff(wbuf)
+		}
+
+		// This might be a good place to add prefetch code...
+		// if(wbuf->nobj > 4) {
+		//         PREFETCH(wbuf->obj[wbuf->nobj - 3];
+		//  }
+		wbuf.nobj--
+		b = wbuf.obj[wbuf.nobj]
+		wbuf = scanobject(b, mheap_.arena_used-b, nil, wbuf)
+	}
+}
+
+func markroot(desc *parfor, i uint32) {
+	// Note: if you add a case here, please also update heapdump.c:dumproots.
+	switch i {
+	case _RootData:
+		scanblock(uintptr(unsafe.Pointer(&data)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)), gcdatamask.bytedata)
+
+	case _RootBss:
+		scanblock(uintptr(unsafe.Pointer(&bss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)), gcbssmask.bytedata)
+
+	case _RootFinalizers:
+		for fb := allfin; fb != nil; fb = fb.alllink {
+			scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), uintptr(fb.cnt)*unsafe.Sizeof(fb.fin[0]), &finptrmask[0])
+		}
+
+	case _RootSpans:
+		// mark MSpan.specials
+		sg := mheap_.sweepgen
+		for spanidx := uint32(0); spanidx < uint32(len(work.spans)); spanidx++ {
+			s := work.spans[spanidx]
+			if s.state != mSpanInUse {
+				continue
+			}
+			if !checkmark && s.sweepgen != sg {
+				// sweepgen was updated (+2) during non-checkmark GC pass
+				print("sweep ", s.sweepgen, " ", sg, "\n")
+				gothrow("gc: unswept span")
+			}
+			for sp := s.specials; sp != nil; sp = sp.next {
+				if sp.kind != _KindSpecialFinalizer {
+					continue
+				}
+				// don't mark finalized object, but scan it so we
+				// retain everything it points to.
+				spf := (*specialfinalizer)(unsafe.Pointer(sp))
+				// A finalizer can be set for an inner byte of an object, find object beginning.
+				p := uintptr(s.start<<_PageShift) + uintptr(spf.special.offset)/s.elemsize*s.elemsize
+				if gcphase != _GCscan {
+					scanblock(p, s.elemsize, nil) // scanned during mark phase
+				}
+				scanblock(uintptr(unsafe.Pointer(&spf.fn)), ptrSize, &oneptr[0])
+			}
+		}
+
+	case _RootFlushCaches:
+		if gcphase != _GCscan { // Do not flush mcaches during GCscan phase.
+			flushallmcaches()
+		}
+
+	default:
+		// the rest is scanning goroutine stacks
+		if uintptr(i-_RootCount) >= allglen {
+			gothrow("markroot: bad index")
+		}
+		gp := allgs[i-_RootCount]
+
+		// remember when we've first observed the G blocked
+		// needed only to output in traceback
+		status := readgstatus(gp) // We are not in a scan state
+		if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
+			gp.waitsince = work.tstart
+		}
+
+		// Shrink a stack if not much of it is being used but not in the scan phase.
+		if gcphase != _GCscan { // Do not shrink during GCscan phase.
+			shrinkstack(gp)
+		}
+		if readgstatus(gp) == _Gdead {
+			gp.gcworkdone = true
+		} else {
+			gp.gcworkdone = false
+		}
+		restart := stopg(gp)
+
+		// goroutine will scan its own stack when it stops running.
+		// Wait until it has.
+		for readgstatus(gp) == _Grunning && !gp.gcworkdone {
+		}
+
+		// scanstack(gp) is done as part of gcphasework
+		// But to make sure we finished we need to make sure that
+		// the stack traps have all responded so drop into
+		// this while loop until they respond.
+		for !gp.gcworkdone {
+			status = readgstatus(gp)
+			if status == _Gdead {
+				gp.gcworkdone = true // scan is a noop
+				break
+			}
+			if status == _Gwaiting || status == _Grunnable {
+				restart = stopg(gp)
+			}
+		}
+		if restart {
+			restartg(gp)
+		}
+	}
+}
+
+// Get an empty work buffer off the work.empty list,
+// allocating new buffers as needed.
+func getempty(b *workbuf) *workbuf {
+	if b != nil {
+		putfull(b)
+		b = nil
+	}
+	if work.empty != 0 {
+		b = (*workbuf)(lfstackpop(&work.empty))
+	}
+	if b != nil && b.nobj != 0 {
+		_g_ := getg()
+		print("m", _g_.m.id, ": getempty: popped b=", b, " with non-zero b.nobj=", b.nobj, "\n")
+		gothrow("getempty: workbuffer not empty, b->nobj not 0")
+	}
+	if b == nil {
+		b = (*workbuf)(persistentalloc(unsafe.Sizeof(*b), _CacheLineSize, &memstats.gc_sys))
+		b.nobj = 0
+	}
+	return b
+}
+
+func putempty(b *workbuf) {
+	if b.nobj != 0 {
+		gothrow("putempty: b->nobj not 0")
+	}
+	lfstackpush(&work.empty, &b.node)
+}
+
+func putfull(b *workbuf) {
+	if b.nobj <= 0 {
+		gothrow("putfull: b->nobj <= 0")
+	}
+	lfstackpush(&work.full, &b.node)
+}
+
+// Get an partially empty work buffer
+// if none are available get an empty one.
+func getpartialorempty() *workbuf {
+	b := (*workbuf)(lfstackpop(&work.partial))
+	if b == nil {
+		b = getempty(nil)
+	}
+	return b
+}
+
+func putpartial(b *workbuf) {
+	if b.nobj == 0 {
+		lfstackpush(&work.empty, &b.node)
+	} else if b.nobj < uintptr(len(b.obj)) {
+		lfstackpush(&work.partial, &b.node)
+	} else if b.nobj == uintptr(len(b.obj)) {
+		lfstackpush(&work.full, &b.node)
+	} else {
+		print("b=", b, " b.nobj=", b.nobj, " len(b.obj)=", len(b.obj), "\n")
+		gothrow("putpartial: bad Workbuf b.nobj")
+	}
+}
+
+// Get a full work buffer off the work.full or a partially
+// filled one off the work.partial list. If nothing is available
+// wait until all the other gc helpers have finished and then
+// return nil.
+// getfull acts as a barrier for work.nproc helpers. As long as one
+// gchelper is actively marking objects it
+// may create a workbuffer that the other helpers can work on.
+// The for loop either exits when a work buffer is found
+// or when _all_ of the work.nproc GC helpers are in the loop
+// looking for work and thus not capable of creating new work.
+// This is in fact the termination condition for the STW mark
+// phase.
+func getfull(b *workbuf) *workbuf {
+	if b != nil {
+		putempty(b)
+	}
+
+	b = (*workbuf)(lfstackpop(&work.full))
+	if b == nil {
+		b = (*workbuf)(lfstackpop(&work.partial))
+	}
+	if b != nil || work.nproc == 1 {
+		return b
+	}
+
+	xadd(&work.nwait, +1)
+	for i := 0; ; i++ {
+		if work.full != 0 {
+			xadd(&work.nwait, -1)
+			b = (*workbuf)(lfstackpop(&work.full))
+			if b == nil {
+				b = (*workbuf)(lfstackpop(&work.partial))
+			}
+			if b != nil {
+				return b
+			}
+			xadd(&work.nwait, +1)
+		}
+		if work.nwait == work.nproc {
+			return nil
+		}
+		_g_ := getg()
+		if i < 10 {
+			_g_.m.gcstats.nprocyield++
+			procyield(20)
+		} else if i < 20 {
+			_g_.m.gcstats.nosyield++
+			osyield()
+		} else {
+			_g_.m.gcstats.nsleep++
+			usleep(100)
+		}
+	}
+}
+
+func handoff(b *workbuf) *workbuf {
+	// Make new buffer with half of b's pointers.
+	b1 := getempty(nil)
+	n := b.nobj / 2
+	b.nobj -= n
+	b1.nobj = n
+	memmove(unsafe.Pointer(&b1.obj[0]), unsafe.Pointer(&b.obj[b.nobj]), n*unsafe.Sizeof(b1.obj[0]))
+	_g_ := getg()
+	_g_.m.gcstats.nhandoff++
+	_g_.m.gcstats.nhandoffcnt += uint64(n)
+
+	// Put b on full list - let first half of b get stolen.
+	lfstackpush(&work.full, &b.node)
+	return b1
+}
+
+func stackmapdata(stkmap *stackmap, n int32) bitvector {
+	if n < 0 || n >= stkmap.n {
+		gothrow("stackmapdata: index out of range")
+	}
+	return bitvector{stkmap.nbit, (*byte)(add(unsafe.Pointer(&stkmap.bytedata), uintptr(n*((stkmap.nbit+31)/32*4))))}
+}
+
+// Scan a stack frame: local variables and function arguments/results.
+func scanframe(frame *stkframe, unused unsafe.Pointer) bool {
+
+	f := frame.fn
+	targetpc := frame.continpc
+	if targetpc == 0 {
+		// Frame is dead.
+		return true
+	}
+	if _DebugGC > 1 {
+		print("scanframe ", gofuncname(f), "\n")
+	}
+	if targetpc != f.entry {
+		targetpc--
+	}
+	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+	if pcdata == -1 {
+		// We do not have a valid pcdata value but there might be a
+		// stackmap for this function.  It is likely that we are looking
+		// at the function prologue, assume so and hope for the best.
+		pcdata = 0
+	}
+
+	// Scan local variables if stack frame has been allocated.
+	size := frame.varp - frame.sp
+	var minsize uintptr
+	if thechar != '6' && thechar != '8' {
+		minsize = ptrSize
+	} else {
+		minsize = 0
+	}
+	if size > minsize {
+		stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+		if stkmap == nil || stkmap.n <= 0 {
+			print("runtime: frame ", gofuncname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
+			gothrow("missing stackmap")
+		}
+
+		// Locals bitmap information, scan just the pointers in locals.
+		if pcdata < 0 || pcdata >= stkmap.n {
+			// don't know where we are
+			print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " locals stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n")
+			gothrow("scanframe: bad symbol table")
+		}
+		bv := stackmapdata(stkmap, pcdata)
+		size = (uintptr(bv.n) * ptrSize) / bitsPerPointer
+		scanblock(frame.varp-size, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata)
+	}
+
+	// Scan arguments.
+	if frame.arglen > 0 {
+		var bv bitvector
+		if frame.argmap != nil {
+			bv = *frame.argmap
+		} else {
+			stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+			if stkmap == nil || stkmap.n <= 0 {
+				print("runtime: frame ", gofuncname(f), " untyped args ", hex(frame.argp), "+", hex(frame.arglen), "\n")
+				gothrow("missing stackmap")
+			}
+			if pcdata < 0 || pcdata >= stkmap.n {
+				// don't know where we are
+				print("runtime: pcdata is ", pcdata, " and ", stkmap.n, " args stack map entries for ", gofuncname(f), " (targetpc=", targetpc, ")\n")
+				gothrow("scanframe: bad symbol table")
+			}
+			bv = stackmapdata(stkmap, pcdata)
+		}
+		scanblock(frame.argp, uintptr(bv.n)/bitsPerPointer*ptrSize, bv.bytedata)
+	}
+	return true
+}
+
+func scanstack(gp *g) {
+	// TODO(rsc): Due to a precedence error, this was never checked in the original C version.
+	// If you enable the check, the gothrow happens.
+	/*
+		if readgstatus(gp)&_Gscan == 0 {
+			print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+			gothrow("mark - bad status")
+		}
+	*/
+
+	switch readgstatus(gp) &^ _Gscan {
+	default:
+		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+		gothrow("mark - bad status")
+	case _Gdead:
+		return
+	case _Grunning:
+		print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+		gothrow("scanstack: goroutine not stopped")
+	case _Grunnable, _Gsyscall, _Gwaiting:
+		// ok
+	}
+
+	if gp == getg() {
+		gothrow("can't scan our own stack")
+	}
+	mp := gp.m
+	if mp != nil && mp.helpgc != 0 {
+		gothrow("can't scan gchelper stack")
+	}
+
+	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, scanframe, nil, 0)
+	tracebackdefers(gp, scanframe, nil)
+}
+
+// If the slot is grey or black return true, if white return false.
+// If the slot is not in the known heap and thus does not have a valid GC bitmap then
+// it is considered grey. Globals and stacks can hold such slots.
+// The slot is grey if its mark bit is set and it is enqueued to be scanned.
+// The slot is black if it has already been scanned.
+// It is white if it has a valid mark bit and the bit is not set.
+func shaded(slot uintptr) bool {
+	if !inheap(slot) { // non-heap slots considered grey
+		return true
+	}
+
+	var mbits markbits
+	valid := objectstart(slot, &mbits)
+	if valid == 0 {
+		return true
+	}
+
+	if checkmark {
+		return ischeckmarked(&mbits)
+	}
+
+	return mbits.bits&bitMarked != 0
+}
+
+// Shade the object if it isn't already.
+// The object is not nil and known to be in the heap.
+func shade(b uintptr) {
+	if !inheap(b) {
+		gothrow("shade: passed an address not in the heap")
+	}
+
+	wbuf := getpartialorempty()
+	// Mark the object, return some important bits.
+	// If we combine the following two rotines we don't have to pass mbits or obj around.
+	var mbits markbits
+	obj := objectstart(b, &mbits)
+	if obj != 0 {
+		wbuf = greyobject(obj, &mbits, wbuf) // augments the wbuf
+	}
+	putpartial(wbuf)
+}
+
+// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object.
+// The original Dijkstra barrier only shaded ptrs being placed in black slots.
+//
+// Shade indicates that it has seen a white pointer by adding the referent
+// to wbuf as well as marking it.
+//
+// slot is the destination (dst) in go code
+// ptr is the value that goes into the slot (src) in the go code
+//
+// Dijkstra pointed out that maintaining the no black to white
+// pointers means that white to white pointers not need
+// to be noted by the write barrier. Furthermore if either
+// white object dies before it is reached by the
+// GC then the object can be collected during this GC cycle
+// instead of waiting for the next cycle. Unfortunately the cost of
+// ensure that the object holding the slot doesn't concurrently
+// change to black without the mutator noticing seems prohibitive.
+//
+// Consider the following example where the mutator writes into
+// a slot and then loads the slot's mark bit while the GC thread
+// writes to the slot's mark bit and then as part of scanning reads
+// the slot.
+//
+// Initially both [slot] and [slotmark] are 0 (nil)
+// Mutator thread          GC thread
+// st [slot], ptr          st [slotmark], 1
+//
+// ld r1, [slotmark]       ld r2, [slot]
+//
+// This is a classic example of independent reads of independent writes,
+// aka IRIW. The question is if r1==r2==0 is allowed and for most HW the
+// answer is yes without inserting a memory barriers between the st and the ld.
+// These barriers are expensive so we have decided that we will
+// always grey the ptr object regardless of the slot's color.
+func gcmarkwb_m(slot *uintptr, ptr uintptr) {
+	switch gcphase {
+	default:
+		gothrow("gcphasework in bad gcphase")
+
+	case _GCoff, _GCquiesce, _GCstw, _GCsweep, _GCscan:
+		// ok
+
+	case _GCmark, _GCmarktermination:
+		if ptr != 0 && inheap(ptr) {
+			shade(ptr)
+		}
+	}
+}
+
+// The gp has been moved to a GC safepoint. GC phase specific
+// work is done here.
+func gcphasework(gp *g) {
+	switch gcphase {
+	default:
+		gothrow("gcphasework in bad gcphase")
+	case _GCoff, _GCquiesce, _GCstw, _GCsweep:
+		// No work.
+	case _GCscan:
+		// scan the stack, mark the objects, put pointers in work buffers
+		// hanging off the P where this is being run.
+		scanstack(gp)
+	case _GCmark:
+		// No work.
+	case _GCmarktermination:
+		scanstack(gp)
+		// All available mark work will be emptied before returning.
+	}
+	gp.gcworkdone = true
+}
+
+var finalizer1 = [...]byte{
+	// Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
+	// Each byte describes 4 words.
+	// Need 4 Finalizers described by 5 bytes before pattern repeats:
+	//	ptr ptr uintptr ptr ptr
+	//	ptr ptr uintptr ptr ptr
+	//	ptr ptr uintptr ptr ptr
+	//	ptr ptr uintptr ptr ptr
+	// aka
+	//	ptr ptr uintptr ptr
+	//	ptr ptr ptr uintptr
+	//	ptr ptr ptr ptr
+	//	uintptr ptr ptr ptr
+	//	ptr uintptr ptr ptr
+	// Assumptions about Finalizer layout checked below.
+	bitsPointer | bitsPointer<<2 | bitsScalar<<4 | bitsPointer<<6,
+	bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsScalar<<6,
+	bitsPointer | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6,
+	bitsScalar | bitsPointer<<2 | bitsPointer<<4 | bitsPointer<<6,
+	bitsPointer | bitsScalar<<2 | bitsPointer<<4 | bitsPointer<<6,
+}
+
+func queuefinalizer(p unsafe.Pointer, fn *funcval, nret uintptr, fint *_type, ot *ptrtype) {
+	lock(&finlock)
+	if finq == nil || finq.cnt == finq.cap {
+		if finc == nil {
+			finc = (*finblock)(persistentalloc(_FinBlockSize, 0, &memstats.gc_sys))
+			finc.cap = int32((_FinBlockSize-unsafe.Sizeof(finblock{}))/unsafe.Sizeof(finalizer{}) + 1)
+			finc.alllink = allfin
+			allfin = finc
+			if finptrmask[0] == 0 {
+				// Build pointer mask for Finalizer array in block.
+				// Check assumptions made in finalizer1 array above.
+				if (unsafe.Sizeof(finalizer{}) != 5*ptrSize ||
+					unsafe.Offsetof(finalizer{}.fn) != 0 ||
+					unsafe.Offsetof(finalizer{}.arg) != ptrSize ||
+					unsafe.Offsetof(finalizer{}.nret) != 2*ptrSize ||
+					unsafe.Offsetof(finalizer{}.fint) != 3*ptrSize ||
+					unsafe.Offsetof(finalizer{}.ot) != 4*ptrSize ||
+					bitsPerPointer != 2) {
+					gothrow("finalizer out of sync")
+				}
+				for i := range finptrmask {
+					finptrmask[i] = finalizer1[i%len(finalizer1)]
+				}
+			}
+		}
+		block := finc
+		finc = block.next
+		block.next = finq
+		finq = block
+	}
+	f := (*finalizer)(add(unsafe.Pointer(&finq.fin[0]), uintptr(finq.cnt)*unsafe.Sizeof(finq.fin[0])))
+	finq.cnt++
+	f.fn = fn
+	f.nret = nret
+	f.fint = fint
+	f.ot = ot
+	f.arg = p
+	fingwake = true
+	unlock(&finlock)
+}
+
+func iterate_finq(callback func(*funcval, unsafe.Pointer, uintptr, *_type, *ptrtype)) {
+	for fb := allfin; fb != nil; fb = fb.alllink {
+		for i := int32(0); i < fb.cnt; i++ {
+			f := &fb.fin[i]
+			callback(f.fn, f.arg, f.nret, f.fint, f.ot)
+		}
+	}
+}
+
+// Returns only when span s has been swept.
+func mSpan_EnsureSwept(s *mspan) {
+	// Caller must disable preemption.
+	// Otherwise when this function returns the span can become unswept again
+	// (if GC is triggered on another goroutine).
+	_g_ := getg()
+	if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+		gothrow("MSpan_EnsureSwept: m is not locked")
+	}
+
+	sg := mheap_.sweepgen
+	if atomicload(&s.sweepgen) == sg {
+		return
+	}
+	// The caller must be sure that the span is a MSpanInUse span.
+	if cas(&s.sweepgen, sg-2, sg-1) {
+		mSpan_Sweep(s, false)
+		return
+	}
+	// unfortunate condition, and we don't have efficient means to wait
+	for atomicload(&s.sweepgen) != sg {
+		osyield()
+	}
+}
+
+// Sweep frees or collects finalizers for blocks not marked in the mark phase.
+// It clears the mark bits in preparation for the next GC round.
+// Returns true if the span was returned to heap.
+// If preserve=true, don't return it to heap nor relink in MCentral lists;
+// caller takes care of it.
+func mSpan_Sweep(s *mspan, preserve bool) bool {
+	if checkmark {
+		gothrow("MSpan_Sweep: checkmark only runs in STW and after the sweep")
+	}
+
+	// It's critical that we enter this function with preemption disabled,
+	// GC must not start while we are in the middle of this function.
+	_g_ := getg()
+	if _g_.m.locks == 0 && _g_.m.mallocing == 0 && _g_ != _g_.m.g0 {
+		gothrow("MSpan_Sweep: m is not locked")
+	}
+	sweepgen := mheap_.sweepgen
+	if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
+		print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+		gothrow("MSpan_Sweep: bad span state")
+	}
+	arena_start := mheap_.arena_start
+	cl := s.sizeclass
+	size := s.elemsize
+	var n int32
+	var npages int32
+	if cl == 0 {
+		n = 1
+	} else {
+		// Chunk full of small blocks.
+		npages = class_to_allocnpages[cl]
+		n = (npages << _PageShift) / int32(size)
+	}
+	res := false
+	nfree := 0
+	var head mlink
+	end := &head
+	c := _g_.m.mcache
+	sweepgenset := false
+
+	// Mark any free objects in this span so we don't collect them.
+	for link := s.freelist; link != nil; link = link.next {
+		off := (uintptr(unsafe.Pointer(link)) - arena_start) / ptrSize
+		bitp := arena_start - off/wordsPerBitmapByte - 1
+		shift := (off % wordsPerBitmapByte) * gcBits
+		*(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift
+	}
+
+	// Unlink & free special records for any objects we're about to free.
+	specialp := &s.specials
+	special := *specialp
+	for special != nil {
+		// A finalizer can be set for an inner byte of an object, find object beginning.
+		p := uintptr(s.start<<_PageShift) + uintptr(special.offset)/size*size
+		off := (p - arena_start) / ptrSize
+		bitp := arena_start - off/wordsPerBitmapByte - 1
+		shift := (off % wordsPerBitmapByte) * gcBits
+		bits := (*(*byte)(unsafe.Pointer(bitp)) >> shift) & bitMask
+		if bits&bitMarked == 0 {
+			// Find the exact byte for which the special was setup
+			// (as opposed to object beginning).
+			p := uintptr(s.start<<_PageShift) + uintptr(special.offset)
+			// about to free object: splice out special record
+			y := special
+			special = special.next
+			*specialp = special
+			if !freespecial(y, unsafe.Pointer(p), size, false) {
+				// stop freeing of object if it has a finalizer
+				*(*byte)(unsafe.Pointer(bitp)) |= bitMarked << shift
+			}
+		} else {
+			// object is still live: keep special record
+			specialp = &special.next
+			special = *specialp
+		}
+	}
+
+	// Sweep through n objects of given size starting at p.
+	// This thread owns the span now, so it can manipulate
+	// the block bitmap without atomic operations.
+	p := uintptr(s.start << _PageShift)
+	off := (p - arena_start) / ptrSize
+	bitp := arena_start - off/wordsPerBitmapByte - 1
+	shift := uint(0)
+	step := size / (ptrSize * wordsPerBitmapByte)
+	// Rewind to the previous quadruple as we move to the next
+	// in the beginning of the loop.
+	bitp += step
+	if step == 0 {
+		// 8-byte objects.
+		bitp++
+		shift = gcBits
+	}
+	for ; n > 0; n, p = n-1, p+size {
+		bitp -= step
+		if step == 0 {
+			if shift != 0 {
+				bitp--
+			}
+			shift = gcBits - shift
+		}
+
+		xbits := *(*byte)(unsafe.Pointer(bitp))
+		bits := (xbits >> shift) & bitMask
+
+		// Allocated and marked object, reset bits to allocated.
+		if bits&bitMarked != 0 {
+			*(*byte)(unsafe.Pointer(bitp)) &^= bitMarked << shift
+			continue
+		}
+
+		// At this point we know that we are looking at garbage object
+		// that needs to be collected.
+		if debug.allocfreetrace != 0 {
+			tracefree(unsafe.Pointer(p), size)
+		}
+
+		// Reset to allocated+noscan.
+		*(*byte)(unsafe.Pointer(bitp)) = uint8(uintptr(xbits&^((bitMarked|bitsMask<<2)<<shift)) | uintptr(bitsDead)<<(shift+2))
+		if cl == 0 {
+			// Free large span.
+			if preserve {
+				gothrow("can't preserve large span")
+			}
+			unmarkspan(p, s.npages<<_PageShift)
+			s.needzero = 1
+
+			// important to set sweepgen before returning it to heap
+			atomicstore(&s.sweepgen, sweepgen)
+			sweepgenset = true
+
+			// NOTE(rsc,dvyukov): The original implementation of efence
+			// in CL 22060046 used SysFree instead of SysFault, so that
+			// the operating system would eventually give the memory
+			// back to us again, so that an efence program could run
+			// longer without running out of memory. Unfortunately,
+			// calling SysFree here without any kind of adjustment of the
+			// heap data structures means that when the memory does
+			// come back to us, we have the wrong metadata for it, either in
+			// the MSpan structures or in the garbage collection bitmap.
+			// Using SysFault here means that the program will run out of
+			// memory fairly quickly in efence mode, but at least it won't
+			// have mysterious crashes due to confused memory reuse.
+			// It should be possible to switch back to SysFree if we also
+			// implement and then call some kind of MHeap_DeleteSpan.
+			if debug.efence > 0 {
+				s.limit = 0 // prevent mlookup from finding this span
+				sysFault(unsafe.Pointer(p), size)
+			} else {
+				mHeap_Free(&mheap_, s, 1)
+			}
+			c.local_nlargefree++
+			c.local_largefree += size
+			xadd64(&memstats.next_gc, -int64(size)*int64(gcpercent+100)/100)
+			res = true
+		} else {
+			// Free small object.
+			if size > 2*ptrSize {
+				*(*uintptr)(unsafe.Pointer(p + ptrSize)) = uintptrMask & 0xdeaddeaddeaddead // mark as "needs to be zeroed"
+			} else if size > ptrSize {
+				*(*uintptr)(unsafe.Pointer(p + ptrSize)) = 0
+			}
+			end.next = (*mlink)(unsafe.Pointer(p))
+			end = end.next
+			nfree++
+		}
+	}
+
+	// We need to set s.sweepgen = h.sweepgen only when all blocks are swept,
+	// because of the potential for a concurrent free/SetFinalizer.
+	// But we need to set it before we make the span available for allocation
+	// (return it to heap or mcentral), because allocation code assumes that a
+	// span is already swept if available for allocation.
+	if !sweepgenset && nfree == 0 {
+		// The span must be in our exclusive ownership until we update sweepgen,
+		// check for potential races.
+		if s.state != mSpanInUse || s.sweepgen != sweepgen-1 {
+			print("MSpan_Sweep: state=", s.state, " sweepgen=", s.sweepgen, " mheap.sweepgen=", sweepgen, "\n")
+			gothrow("MSpan_Sweep: bad span state after sweep")
+		}
+		atomicstore(&s.sweepgen, sweepgen)
+	}
+	if nfree > 0 {
+		c.local_nsmallfree[cl] += uintptr(nfree)
+		c.local_cachealloc -= intptr(uintptr(nfree) * size)
+		xadd64(&memstats.next_gc, -int64(nfree)*int64(size)*int64(gcpercent+100)/100)
+		res = mCentral_FreeSpan(&mheap_.central[cl].mcentral, s, int32(nfree), head.next, end, preserve)
+		// MCentral_FreeSpan updates sweepgen
+	}
+	return res
+}
+
+// State of background sweep.
+// Protected by gclock.
+type sweepdata struct {
+	g       *g
+	parked  bool
+	started bool
+
+	spanidx uint32 // background sweeper position
+
+	nbgsweep    uint32
+	npausesweep uint32
+}
+
+var sweep sweepdata
+
+// sweeps one span
+// returns number of pages returned to heap, or ^uintptr(0) if there is nothing to sweep
+func sweepone() uintptr {
+	_g_ := getg()
+
+	// increment locks to ensure that the goroutine is not preempted
+	// in the middle of sweep thus leaving the span in an inconsistent state for next GC
+	_g_.m.locks++
+	sg := mheap_.sweepgen
+	for {
+		idx := xadd(&sweep.spanidx, 1) - 1
+		if idx >= uint32(len(work.spans)) {
+			mheap_.sweepdone = 1
+			_g_.m.locks--
+			return ^uintptr(0)
+		}
+		s := work.spans[idx]
+		if s.state != mSpanInUse {
+			s.sweepgen = sg
+			continue
+		}
+		if s.sweepgen != sg-2 || !cas(&s.sweepgen, sg-2, sg-1) {
+			continue
+		}
+		npages := s.npages
+		if !mSpan_Sweep(s, false) {
+			npages = 0
+		}
+		_g_.m.locks--
+		return npages
+	}
+}
+
+func gosweepone() uintptr {
+	var ret uintptr
+	systemstack(func() {
+		ret = sweepone()
+	})
+	return ret
+}
+
+func gosweepdone() bool {
+	return mheap_.sweepdone != 0
+}
+
+func gchelper() {
+	_g_ := getg()
+	_g_.m.traceback = 2
+	gchelperstart()
+
+	// parallel mark for over GC roots
+	parfordo(work.markfor)
+	if gcphase != _GCscan {
+		scanblock(0, 0, nil) // blocks in getfull
+	}
+
+	nproc := work.nproc // work.nproc can change right after we increment work.ndone
+	if xadd(&work.ndone, +1) == nproc-1 {
+		notewakeup(&work.alldone)
+	}
+	_g_.m.traceback = 0
+}
+
+func cachestats() {
+	for i := 0; ; i++ {
+		p := allp[i]
+		if p == nil {
+			break
+		}
+		c := p.mcache
+		if c == nil {
+			continue
+		}
+		purgecachedstats(c)
+	}
+}
+
+func flushallmcaches() {
+	for i := 0; ; i++ {
+		p := allp[i]
+		if p == nil {
+			break
+		}
+		c := p.mcache
+		if c == nil {
+			continue
+		}
+		mCache_ReleaseAll(c)
+		stackcache_clear(c)
+	}
+}
+
+func updatememstats(stats *gcstats) {
+	if stats != nil {
+		*stats = gcstats{}
+	}
+	for mp := allm; mp != nil; mp = mp.alllink {
+		if stats != nil {
+			src := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(&mp.gcstats))
+			dst := (*[unsafe.Sizeof(gcstats{}) / 8]uint64)(unsafe.Pointer(stats))
+			for i, v := range src {
+				dst[i] += v
+			}
+			mp.gcstats = gcstats{}
+		}
+	}
+
+	memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
+	memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
+	memstats.sys = memstats.heap_sys + memstats.stacks_sys + memstats.mspan_sys +
+		memstats.mcache_sys + memstats.buckhash_sys + memstats.gc_sys + memstats.other_sys
+
+	// Calculate memory allocator stats.
+	// During program execution we only count number of frees and amount of freed memory.
+	// Current number of alive object in the heap and amount of alive heap memory
+	// are calculated by scanning all spans.
+	// Total number of mallocs is calculated as number of frees plus number of alive objects.
+	// Similarly, total amount of allocated memory is calculated as amount of freed memory
+	// plus amount of alive heap memory.
+	memstats.alloc = 0
+	memstats.total_alloc = 0
+	memstats.nmalloc = 0
+	memstats.nfree = 0
+	for i := 0; i < len(memstats.by_size); i++ {
+		memstats.by_size[i].nmalloc = 0
+		memstats.by_size[i].nfree = 0
+	}
+
+	// Flush MCache's to MCentral.
+	systemstack(flushallmcaches)
+
+	// Aggregate local stats.
+	cachestats()
+
+	// Scan all spans and count number of alive objects.
+	lock(&mheap_.lock)
+	for i := uint32(0); i < mheap_.nspan; i++ {
+		s := h_allspans[i]
+		if s.state != mSpanInUse {
+			continue
+		}
+		if s.sizeclass == 0 {
+			memstats.nmalloc++
+			memstats.alloc += uint64(s.elemsize)
+		} else {
+			memstats.nmalloc += uint64(s.ref)
+			memstats.by_size[s.sizeclass].nmalloc += uint64(s.ref)
+			memstats.alloc += uint64(s.ref) * uint64(s.elemsize)
+		}
+	}
+	unlock(&mheap_.lock)
+
+	// Aggregate by size class.
+	smallfree := uint64(0)
+	memstats.nfree = mheap_.nlargefree
+	for i := 0; i < len(memstats.by_size); i++ {
+		memstats.nfree += mheap_.nsmallfree[i]
+		memstats.by_size[i].nfree = mheap_.nsmallfree[i]
+		memstats.by_size[i].nmalloc += mheap_.nsmallfree[i]
+		smallfree += uint64(mheap_.nsmallfree[i]) * uint64(class_to_size[i])
+	}
+	memstats.nfree += memstats.tinyallocs
+	memstats.nmalloc += memstats.nfree
+
+	// Calculate derived stats.
+	memstats.total_alloc = uint64(memstats.alloc) + uint64(mheap_.largefree) + smallfree
+	memstats.heap_alloc = memstats.alloc
+	memstats.heap_objects = memstats.nmalloc - memstats.nfree
+}
+
+func gcinit() {
+	if unsafe.Sizeof(workbuf{}) != _WorkbufSize {
+		gothrow("runtime: size of Workbuf is suboptimal")
+	}
+
+	work.markfor = parforalloc(_MaxGcproc)
+	gcpercent = readgogc()
+	gcdatamask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcdata)), uintptr(unsafe.Pointer(&edata))-uintptr(unsafe.Pointer(&data)))
+	gcbssmask = unrollglobgcprog((*byte)(unsafe.Pointer(&gcbss)), uintptr(unsafe.Pointer(&ebss))-uintptr(unsafe.Pointer(&bss)))
+}
+
+// Called from malloc.go using onM, stopping and starting the world handled in caller.
+func gc_m(start_time int64, eagersweep bool) {
+	_g_ := getg()
+	gp := _g_.m.curg
+	casgstatus(gp, _Grunning, _Gwaiting)
+	gp.waitreason = "garbage collection"
+
+	gc(start_time, eagersweep)
+	casgstatus(gp, _Gwaiting, _Grunning)
+}
+
+// Similar to clearcheckmarkbits but works on a single span.
+// It preforms two tasks.
+// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
+//    for nibbles with the BoundaryBit set.
+// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and
+//    BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
+// For the second case it is possible to restore the BitsDead pattern but since
+// clearmark is a debug tool performance has a lower priority than simplicity.
+// The span is MSpanInUse and the world is stopped.
+func clearcheckmarkbitsspan(s *mspan) {
+	if s.state != _MSpanInUse {
+		print("runtime:clearcheckmarkbitsspan: state=", s.state, "\n")
+		gothrow("clearcheckmarkbitsspan: bad span state")
+	}
+
+	arena_start := mheap_.arena_start
+	cl := s.sizeclass
+	size := s.elemsize
+	var n int32
+	if cl == 0 {
+		n = 1
+	} else {
+		// Chunk full of small blocks
+		npages := class_to_allocnpages[cl]
+		n = npages << _PageShift / int32(size)
+	}
+
+	// MSpan_Sweep has similar code but instead of overloading and
+	// complicating that routine we do a simpler walk here.
+	// Sweep through n objects of given size starting at p.
+	// This thread owns the span now, so it can manipulate
+	// the block bitmap without atomic operations.
+	p := uintptr(s.start) << _PageShift
+
+	// Find bits for the beginning of the span.
+	off := (p - arena_start) / ptrSize
+	bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+	step := size / (ptrSize * wordsPerBitmapByte)
+
+	// The type bit values are:
+	//	00 - BitsDead, for us BitsScalarMarked
+	//	01 - BitsScalar
+	//	10 - BitsPointer
+	//	11 - unused, for us BitsPointerMarked
+	//
+	// When called to prepare for the checkmark phase (checkmark==1),
+	// we change BitsDead to BitsScalar, so that there are no BitsScalarMarked
+	// type bits anywhere.
+	//
+	// The checkmark phase marks by changing BitsScalar to BitsScalarMarked
+	// and BitsPointer to BitsPointerMarked.
+	//
+	// When called to clean up after the checkmark phase (checkmark==0),
+	// we unmark by changing BitsScalarMarked back to BitsScalar and
+	// BitsPointerMarked back to BitsPointer.
+	//
+	// There are two problems with the scheme as just described.
+	// First, the setup rewrites BitsDead to BitsScalar, but the type bits
+	// following a BitsDead are uninitialized and must not be used.
+	// Second, objects that are free are expected to have their type
+	// bits zeroed (BitsDead), so in the cleanup we need to restore
+	// any BitsDeads that were there originally.
+	//
+	// In a one-word object (8-byte allocation on 64-bit system),
+	// there is no difference between BitsScalar and BitsDead, because
+	// neither is a pointer and there are no more words in the object,
+	// so using BitsScalar during the checkmark is safe and mapping
+	// both back to BitsDead during cleanup is also safe.
+	//
+	// In a larger object, we need to be more careful. During setup,
+	// if the type of the first word is BitsDead, we change it to BitsScalar
+	// (as we must) but also initialize the type of the second
+	// word to BitsDead, so that a scan during the checkmark phase
+	// will still stop before seeing the uninitialized type bits in the
+	// rest of the object. The sequence 'BitsScalar BitsDead' never
+	// happens in real type bitmaps - BitsDead is always as early
+	// as possible, so immediately after the last BitsPointer.
+	// During cleanup, if we see a BitsScalar, we can check to see if it
+	// is followed by BitsDead. If so, it was originally BitsDead and
+	// we can change it back.
+
+	if step == 0 {
+		// updating top and bottom nibbles, all boundaries
+		for i := int32(0); i < n/2; i, bitp = i+1, addb(bitp, uintptrMask&-1) {
+			if *bitp&bitBoundary == 0 {
+				gothrow("missing bitBoundary")
+			}
+			b := (*bitp & bitPtrMask) >> 2
+			if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) {
+				*bitp &^= 0x0c // convert to _BitsDead
+			} else if b == _BitsScalarMarked || b == _BitsPointerMarked {
+				*bitp &^= _BitsCheckMarkXor << 2
+			}
+
+			if (*bitp>>gcBits)&bitBoundary == 0 {
+				gothrow("missing bitBoundary")
+			}
+			b = ((*bitp >> gcBits) & bitPtrMask) >> 2
+			if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) {
+				*bitp &^= 0xc0 // convert to _BitsDead
+			} else if b == _BitsScalarMarked || b == _BitsPointerMarked {
+				*bitp &^= _BitsCheckMarkXor << (2 + gcBits)
+			}
+		}
+	} else {
+		// updating bottom nibble for first word of each object
+		for i := int32(0); i < n; i, bitp = i+1, addb(bitp, -step) {
+			if *bitp&bitBoundary == 0 {
+				gothrow("missing bitBoundary")
+			}
+			b := (*bitp & bitPtrMask) >> 2
+
+			if checkmark && b == _BitsDead {
+				// move BitsDead into second word.
+				// set bits to BitsScalar in preparation for checkmark phase.
+				*bitp &^= 0xc0
+				*bitp |= _BitsScalar << 2
+			} else if !checkmark && (b == _BitsScalar || b == _BitsScalarMarked) && *bitp&0xc0 == 0 {
+				// Cleaning up after checkmark phase.
+				// First word is scalar or dead (we forgot)
+				// and second word is dead.
+				// First word might as well be dead too.
+				*bitp &^= 0x0c
+			} else if b == _BitsScalarMarked || b == _BitsPointerMarked {
+				*bitp ^= _BitsCheckMarkXor << 2
+			}
+		}
+	}
+}
+
+// clearcheckmarkbits preforms two tasks.
+// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
+//    for nibbles with the BoundaryBit set.
+// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and
+//    BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
+// This is a bit expensive but preserves the BitsDead encoding during the normal marking.
+// BitsDead remains valid for every nibble except the ones with BitsBoundary set.
+func clearcheckmarkbits() {
+	for _, s := range work.spans {
+		if s.state == _MSpanInUse {
+			clearcheckmarkbitsspan(s)
+		}
+	}
+}
+
+// Called from malloc.go using onM.
+// The world is stopped. Rerun the scan and mark phases
+// using the bitMarkedCheck bit instead of the
+// bitMarked bit. If the marking encounters an
+// bitMarked bit that is not set then we throw.
+func gccheckmark_m(startTime int64, eagersweep bool) {
+	if !gccheckmarkenable {
+		return
+	}
+
+	if checkmark {
+		gothrow("gccheckmark_m, entered with checkmark already true")
+	}
+
+	checkmark = true
+	clearcheckmarkbits()        // Converts BitsDead to BitsScalar.
+	gc_m(startTime, eagersweep) // turns off checkmark
+	// Work done, fixed up the GC bitmap to remove the checkmark bits.
+	clearcheckmarkbits()
+}
+
+func gccheckmarkenable_m() {
+	gccheckmarkenable = true
+}
+
+func gccheckmarkdisable_m() {
+	gccheckmarkenable = false
+}
+
+func finishsweep_m() {
+	// The world is stopped so we should be able to complete the sweeps
+	// quickly.
+	for sweepone() != ^uintptr(0) {
+		sweep.npausesweep++
+	}
+
+	// There may be some other spans being swept concurrently that
+	// we need to wait for. If finishsweep_m is done with the world stopped
+	// this code is not required.
+	sg := mheap_.sweepgen
+	for _, s := range work.spans {
+		if s.sweepgen != sg && s.state == _MSpanInUse {
+			mSpan_EnsureSwept(s)
+		}
+	}
+}
+
+// Scan all of the stacks, greying (or graying if in America) the referents
+// but not blackening them since the mark write barrier isn't installed.
+func gcscan_m() {
+	_g_ := getg()
+
+	// Grab the g that called us and potentially allow rescheduling.
+	// This allows it to be scanned like other goroutines.
+	mastergp := _g_.m.curg
+	casgstatus(mastergp, _Grunning, _Gwaiting)
+	mastergp.waitreason = "garbage collection scan"
+
+	// Span sweeping has been done by finishsweep_m.
+	// Long term we will want to make this goroutine runnable
+	// by placing it onto a scanenqueue state and then calling
+	// runtime·restartg(mastergp) to make it Grunnable.
+	// At the bottom we will want to return this p back to the scheduler.
+	oldphase := gcphase
+
+	// Prepare flag indicating that the scan has not been completed.
+	lock(&allglock)
+	local_allglen := allglen
+	for i := uintptr(0); i < local_allglen; i++ {
+		gp := allgs[i]
+		gp.gcworkdone = false // set to true in gcphasework
+	}
+	unlock(&allglock)
+
+	work.nwait = 0
+	work.ndone = 0
+	work.nproc = 1 // For now do not do this in parallel.
+	gcphase = _GCscan
+	//	ackgcphase is not needed since we are not scanning running goroutines.
+	parforsetup(work.markfor, work.nproc, uint32(_RootCount+local_allglen), nil, false, markroot)
+	parfordo(work.markfor)
+
+	lock(&allglock)
+	// Check that gc work is done.
+	for i := uintptr(0); i < local_allglen; i++ {
+		gp := allgs[i]
+		if !gp.gcworkdone {
+			gothrow("scan missed a g")
+		}
+	}
+	unlock(&allglock)
+
+	gcphase = oldphase
+	casgstatus(mastergp, _Gwaiting, _Grunning)
+	// Let the g that called us continue to run.
+}
+
+// Mark all objects that are known about.
+func gcmark_m() {
+	scanblock(0, 0, nil)
+}
+
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+func gcinstallmarkwb_m() {
+	gcphase = _GCmark
+}
+
+// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
+// all go routines see the new barrier.
+func gcinstalloffwb_m() {
+	gcphase = _GCoff
+}
+
+func gc(start_time int64, eagersweep bool) {
+	if _DebugGCPtrs {
+		print("GC start\n")
+	}
+
+	if debug.allocfreetrace > 0 {
+		tracegc()
+	}
+
+	_g_ := getg()
+	_g_.m.traceback = 2
+	t0 := start_time
+	work.tstart = start_time
+
+	var t1 int64
+	if debug.gctrace > 0 {
+		t1 = nanotime()
+	}
+
+	if !checkmark {
+		finishsweep_m() // skip during checkmark debug phase.
+	}
+
+	// Cache runtime.mheap_.allspans in work.spans to avoid conflicts with
+	// resizing/freeing allspans.
+	// New spans can be created while GC progresses, but they are not garbage for
+	// this round:
+	//  - new stack spans can be created even while the world is stopped.
+	//  - new malloc spans can be created during the concurrent sweep
+
+	// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+	lock(&mheap_.lock)
+	// Free the old cached sweep array if necessary.
+	if work.spans != nil && &work.spans[0] != &h_allspans[0] {
+		sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
+	}
+	// Cache the current array for marking.
+	mheap_.gcspans = mheap_.allspans
+	work.spans = h_allspans
+	unlock(&mheap_.lock)
+	oldphase := gcphase
+
+	work.nwait = 0
+	work.ndone = 0
+	work.nproc = uint32(gcprocs())
+	gcphase = _GCmarktermination
+
+	// World is stopped so allglen will not change.
+	for i := uintptr(0); i < allglen; i++ {
+		gp := allgs[i]
+		gp.gcworkdone = false // set to true in gcphasework
+	}
+
+	parforsetup(work.markfor, work.nproc, uint32(_RootCount+allglen), nil, false, markroot)
+	if work.nproc > 1 {
+		noteclear(&work.alldone)
+		helpgc(int32(work.nproc))
+	}
+
+	var t2 int64
+	if debug.gctrace > 0 {
+		t2 = nanotime()
+	}
+
+	gchelperstart()
+	parfordo(work.markfor)
+	scanblock(0, 0, nil)
+
+	if work.full != 0 {
+		gothrow("work.full != 0")
+	}
+	if work.partial != 0 {
+		gothrow("work.partial != 0")
+	}
+
+	gcphase = oldphase
+	var t3 int64
+	if debug.gctrace > 0 {
+		t3 = nanotime()
+	}
+
+	if work.nproc > 1 {
+		notesleep(&work.alldone)
+	}
+
+	shrinkfinish()
+
+	cachestats()
+	// next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
+	// estimate what was live heap size after previous GC (for printing only)
+	heap0 := memstats.next_gc * 100 / (uint64(gcpercent) + 100)
+	// conservatively set next_gc to high value assuming that everything is live
+	// concurrent/lazy sweep will reduce this number while discovering new garbage
+	memstats.next_gc = memstats.heap_alloc + memstats.heap_alloc*uint64(gcpercent)/100
+
+	t4 := nanotime()
+	atomicstore64(&memstats.last_gc, uint64(unixnanotime())) // must be Unix time to make sense to user
+	memstats.pause_ns[memstats.numgc%uint32(len(memstats.pause_ns))] = uint64(t4 - t0)
+	memstats.pause_end[memstats.numgc%uint32(len(memstats.pause_end))] = uint64(t4)
+	memstats.pause_total_ns += uint64(t4 - t0)
+	memstats.numgc++
+	if memstats.debuggc {
+		print("pause ", t4-t0, "\n")
+	}
+
+	if debug.gctrace > 0 {
+		heap1 := memstats.heap_alloc
+		var stats gcstats
+		updatememstats(&stats)
+		if heap1 != memstats.heap_alloc {
+			print("runtime: mstats skew: heap=", heap1, "/", memstats.heap_alloc, "\n")
+			gothrow("mstats skew")
+		}
+		obj := memstats.nmalloc - memstats.nfree
+
+		stats.nprocyield += work.markfor.nprocyield
+		stats.nosyield += work.markfor.nosyield
+		stats.nsleep += work.markfor.nsleep
+
+		print("gc", memstats.numgc, "(", work.nproc, "): ",
+			(t1-t0)/1000, "+", (t2-t1)/1000, "+", (t3-t2)/1000, "+", (t4-t3)/1000, " us, ",
+			heap0>>20, " -> ", heap1>>20, " MB, ",
+			obj, " (", memstats.nmalloc, "-", memstats.nfree, ") objects, ",
+			gcount(), " goroutines, ",
+			len(work.spans), "/", sweep.nbgsweep, "/", sweep.npausesweep, " sweeps, ",
+			stats.nhandoff, "(", stats.nhandoffcnt, ") handoff, ",
+			work.markfor.nsteal, "(", work.markfor.nstealcnt, ") steal, ",
+			stats.nprocyield, "/", stats.nosyield, "/", stats.nsleep, " yields\n")
+		sweep.nbgsweep = 0
+		sweep.npausesweep = 0
+	}
+
+	// See the comment in the beginning of this function as to why we need the following.
+	// Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
+	lock(&mheap_.lock)
+	// Free the old cached mark array if necessary.
+	if work.spans != nil && &work.spans[0] != &h_allspans[0] {
+		sysFree(unsafe.Pointer(&work.spans[0]), uintptr(len(work.spans))*unsafe.Sizeof(work.spans[0]), &memstats.other_sys)
+	}
+
+	if gccheckmarkenable {
+		if !checkmark {
+			// first half of two-pass; don't set up sweep
+			unlock(&mheap_.lock)
+			return
+		}
+		checkmark = false // done checking marks
+	}
+
+	// Cache the current array for sweeping.
+	mheap_.gcspans = mheap_.allspans
+	mheap_.sweepgen += 2
+	mheap_.sweepdone = 0
+	work.spans = h_allspans
+	sweep.spanidx = 0
+	unlock(&mheap_.lock)
+
+	if _ConcurrentSweep && !eagersweep {
+		lock(&gclock)
+		if !sweep.started {
+			go bgsweep()
+			sweep.started = true
+		} else if sweep.parked {
+			sweep.parked = false
+			ready(sweep.g)
+		}
+		unlock(&gclock)
+	} else {
+		// Sweep all spans eagerly.
+		for sweepone() != ^uintptr(0) {
+			sweep.npausesweep++
+		}
+		// Do an additional mProf_GC, because all 'free' events are now real as well.
+		mProf_GC()
+	}
+
+	mProf_GC()
+	_g_.m.traceback = 0
+
+	if _DebugGCPtrs {
+		print("GC end\n")
+	}
+}
+
+func readmemstats_m(stats *MemStats) {
+	updatememstats(nil)
+
+	// Size of the trailing by_size array differs between Go and C,
+	// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
+	memmove(unsafe.Pointer(stats), unsafe.Pointer(&memstats), sizeof_C_MStats)
+
+	// Stack numbers are part of the heap numbers, separate those out for user consumption
+	stats.StackSys = stats.StackInuse
+	stats.HeapInuse -= stats.StackInuse
+	stats.HeapSys -= stats.StackInuse
+}
+
+//go:linkname readGCStats runtime/debug.readGCStats
+func readGCStats(pauses *[]uint64) {
+	systemstack(func() {
+		readGCStats_m(pauses)
+	})
+}
+
+func readGCStats_m(pauses *[]uint64) {
+	p := *pauses
+	// Calling code in runtime/debug should make the slice large enough.
+	if cap(p) < len(memstats.pause_ns)+3 {
+		gothrow("runtime: short slice passed to readGCStats")
+	}
+
+	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
+	lock(&mheap_.lock)
+
+	n := memstats.numgc
+	if n > uint32(len(memstats.pause_ns)) {
+		n = uint32(len(memstats.pause_ns))
+	}
+
+	// The pause buffer is circular. The most recent pause is at
+	// pause_ns[(numgc-1)%len(pause_ns)], and then backward
+	// from there to go back farther in time. We deliver the times
+	// most recent first (in p[0]).
+	p = p[:cap(p)]
+	for i := uint32(0); i < n; i++ {
+		j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
+		p[i] = memstats.pause_ns[j]
+		p[n+i] = memstats.pause_end[j]
+	}
+
+	p[n+n] = memstats.last_gc
+	p[n+n+1] = uint64(memstats.numgc)
+	p[n+n+2] = memstats.pause_total_ns
+	unlock(&mheap_.lock)
+	*pauses = p[:n+n+3]
+}
+
+func setGCPercent(in int32) (out int32) {
+	lock(&mheap_.lock)
+	out = gcpercent
+	if in < 0 {
+		in = -1
+	}
+	gcpercent = in
+	unlock(&mheap_.lock)
+	return out
+}
+
+func gchelperstart() {
+	_g_ := getg()
+
+	if _g_.m.helpgc < 0 || _g_.m.helpgc >= _MaxGcproc {
+		gothrow("gchelperstart: bad m->helpgc")
+	}
+	if _g_ != _g_.m.g0 {
+		gothrow("gchelper not running on g0 stack")
+	}
+}
+
+func wakefing() *g {
+	var res *g
+	lock(&finlock)
+	if fingwait && fingwake {
+		fingwait = false
+		fingwake = false
+		res = fing
+	}
+	unlock(&finlock)
+	return res
+}
+
+func addb(p *byte, n uintptr) *byte {
+	return (*byte)(add(unsafe.Pointer(p), n))
+}
+
+// Recursively unrolls GC program in prog.
+// mask is where to store the result.
+// ppos is a pointer to position in mask, in bits.
+// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
+func unrollgcprog1(maskp *byte, prog *byte, ppos *uintptr, inplace, sparse bool) *byte {
+	arena_start := mheap_.arena_start
+	pos := *ppos
+	mask := (*[1 << 30]byte)(unsafe.Pointer(maskp))
+	for {
+		switch *prog {
+		default:
+			gothrow("unrollgcprog: unknown instruction")
+
+		case insData:
+			prog = addb(prog, 1)
+			siz := int(*prog)
+			prog = addb(prog, 1)
+			p := (*[1 << 30]byte)(unsafe.Pointer(prog))
+			for i := 0; i < siz; i++ {
+				v := p[i/_PointersPerByte]
+				v >>= (uint(i) % _PointersPerByte) * _BitsPerPointer
+				v &= _BitsMask
+				if inplace {
+					// Store directly into GC bitmap.
+					off := (uintptr(unsafe.Pointer(&mask[pos])) - arena_start) / ptrSize
+					bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+					shift := (off % wordsPerBitmapByte) * gcBits
+					if shift == 0 {
+						*bitp = 0
+					}
+					*bitp |= v << (shift + 2)
+					pos += ptrSize
+				} else if sparse {
+					// 4-bits per word
+					v <<= (pos % 8) + 2
+					mask[pos/8] |= v
+					pos += gcBits
+				} else {
+					// 2-bits per word
+					v <<= pos % 8
+					mask[pos/8] |= v
+					pos += _BitsPerPointer
+				}
+			}
+			prog = addb(prog, round(uintptr(siz)*_BitsPerPointer, 8)/8)
+
+		case insArray:
+			prog = (*byte)(add(unsafe.Pointer(prog), 1))
+			siz := uintptr(0)
+			for i := uintptr(0); i < ptrSize; i++ {
+				siz = (siz << 8) + uintptr(*(*byte)(add(unsafe.Pointer(prog), ptrSize-i-1)))
+			}
+			prog = (*byte)(add(unsafe.Pointer(prog), ptrSize))
+			var prog1 *byte
+			for i := uintptr(0); i < siz; i++ {
+				prog1 = unrollgcprog1(&mask[0], prog, &pos, inplace, sparse)
+			}
+			if *prog1 != insArrayEnd {
+				gothrow("unrollgcprog: array does not end with insArrayEnd")
+			}
+			prog = (*byte)(add(unsafe.Pointer(prog1), 1))
+
+		case insArrayEnd, insEnd:
+			*ppos = pos
+			return prog
+		}
+	}
+}
+
+// Unrolls GC program prog for data/bss, returns dense GC mask.
+func unrollglobgcprog(prog *byte, size uintptr) bitvector {
+	masksize := round(round(size, ptrSize)/ptrSize*bitsPerPointer, 8) / 8
+	mask := (*[1 << 30]byte)(persistentalloc(masksize+1, 0, &memstats.gc_sys))
+	mask[masksize] = 0xa1
+	pos := uintptr(0)
+	prog = unrollgcprog1(&mask[0], prog, &pos, false, false)
+	if pos != size/ptrSize*bitsPerPointer {
+		print("unrollglobgcprog: bad program size, got ", pos, ", expect ", size/ptrSize*bitsPerPointer, "\n")
+		gothrow("unrollglobgcprog: bad program size")
+	}
+	if *prog != insEnd {
+		gothrow("unrollglobgcprog: program does not end with insEnd")
+	}
+	if mask[masksize] != 0xa1 {
+		gothrow("unrollglobgcprog: overflow")
+	}
+	return bitvector{int32(masksize * 8), &mask[0]}
+}
+
+func unrollgcproginplace_m(v unsafe.Pointer, typ *_type, size, size0 uintptr) {
+	pos := uintptr(0)
+	prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
+	for pos != size0 {
+		unrollgcprog1((*byte)(v), prog, &pos, true, true)
+	}
+
+	// Mark first word as bitAllocated.
+	arena_start := mheap_.arena_start
+	off := (uintptr(v) - arena_start) / ptrSize
+	bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+	shift := (off % wordsPerBitmapByte) * gcBits
+	*bitp |= bitBoundary << shift
+
+	// Mark word after last as BitsDead.
+	if size0 < size {
+		off := (uintptr(v) + size0 - arena_start) / ptrSize
+		bitp := (*byte)(unsafe.Pointer(arena_start - off/wordsPerBitmapByte - 1))
+		shift := (off % wordsPerBitmapByte) * gcBits
+		*bitp &= uint8(^(bitPtrMask << shift) | uintptr(bitsDead)<<(shift+2))
+	}
+}
+
+var unroll mutex
+
+// Unrolls GC program in typ.gc[1] into typ.gc[0]
+func unrollgcprog_m(typ *_type) {
+	lock(&unroll)
+	mask := (*byte)(unsafe.Pointer(uintptr(typ.gc[0])))
+	if *mask == 0 {
+		pos := uintptr(8) // skip the unroll flag
+		prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
+		prog = unrollgcprog1(mask, prog, &pos, false, true)
+		if *prog != insEnd {
+			gothrow("unrollgcprog: program does not end with insEnd")
+		}
+		if typ.size/ptrSize%2 != 0 {
+			// repeat the program
+			prog := (*byte)(unsafe.Pointer(uintptr(typ.gc[1])))
+			unrollgcprog1(mask, prog, &pos, false, true)
+		}
+
+		// atomic way to say mask[0] = 1
+		atomicor8(mask, 1)
+	}
+	unlock(&unroll)
+}
+
+// mark the span of memory at v as having n blocks of the given size.
+// if leftover is true, there is left over space at the end of the span.
+func markspan(v unsafe.Pointer, size uintptr, n uintptr, leftover bool) {
+	if uintptr(v)+size*n > mheap_.arena_used || uintptr(v) < mheap_.arena_start {
+		gothrow("markspan: bad pointer")
+	}
+
+	// Find bits of the beginning of the span.
+	off := (uintptr(v) - uintptr(mheap_.arena_start)) / ptrSize
+	if off%wordsPerBitmapByte != 0 {
+		gothrow("markspan: unaligned length")
+	}
+	b := mheap_.arena_start - off/wordsPerBitmapByte - 1
+
+	// Okay to use non-atomic ops here, because we control
+	// the entire span, and each bitmap byte has bits for only
+	// one span, so no other goroutines are changing these bitmap words.
+
+	if size == ptrSize {
+		// Possible only on 64-bits (minimal size class is 8 bytes).
+		// Set memory to 0x11.
+		if (bitBoundary|bitsDead)<<gcBits|bitBoundary|bitsDead != 0x11 {
+			gothrow("markspan: bad bits")
+		}
+		if n%(wordsPerBitmapByte*ptrSize) != 0 {
+			gothrow("markspan: unaligned length")
+		}
+		b = b - n/wordsPerBitmapByte + 1 // find first byte
+		if b%ptrSize != 0 {
+			gothrow("markspan: unaligned pointer")
+		}
+		for i := uintptr(0); i < n; i, b = i+wordsPerBitmapByte*ptrSize, b+ptrSize {
+			*(*uintptr)(unsafe.Pointer(b)) = uintptrMask & 0x1111111111111111 // bitBoundary | bitsDead, repeated
+		}
+		return
+	}
+
+	if leftover {
+		n++ // mark a boundary just past end of last block too
+	}
+	step := size / (ptrSize * wordsPerBitmapByte)
+	for i := uintptr(0); i < n; i, b = i+1, b-step {
+		*(*byte)(unsafe.Pointer(b)) = bitBoundary | bitsDead<<2
+	}
+}
+
+// unmark the span of memory at v of length n bytes.
+func unmarkspan(v, n uintptr) {
+	if v+n > mheap_.arena_used || v < mheap_.arena_start {
+		gothrow("markspan: bad pointer")
+	}
+
+	off := (v - mheap_.arena_start) / ptrSize // word offset
+	if off%(ptrSize*wordsPerBitmapByte) != 0 {
+		gothrow("markspan: unaligned pointer")
+	}
+
+	b := mheap_.arena_start - off/wordsPerBitmapByte - 1
+	n /= ptrSize
+	if n%(ptrSize*wordsPerBitmapByte) != 0 {
+		gothrow("unmarkspan: unaligned length")
+	}
+
+	// Okay to use non-atomic ops here, because we control
+	// the entire span, and each bitmap word has bits for only
+	// one span, so no other goroutines are changing these
+	// bitmap words.
+	n /= wordsPerBitmapByte
+	memclr(unsafe.Pointer(b-n+1), n)
+}
+
+func mHeap_MapBits(h *mheap) {
+	// Caller has added extra mappings to the arena.
+	// Add extra mappings of bitmap words as needed.
+	// We allocate extra bitmap pieces in chunks of bitmapChunk.
+	const bitmapChunk = 8192
+
+	n := (h.arena_used - h.arena_start) / (ptrSize * wordsPerBitmapByte)
+	n = round(n, bitmapChunk)
+	n = round(n, _PhysPageSize)
+	if h.bitmap_mapped >= n {
+		return
+	}
+
+	sysMap(unsafe.Pointer(h.arena_start-n), n-h.bitmap_mapped, h.arena_reserved, &memstats.gc_sys)
+	h.bitmap_mapped = n
+}
+
+func getgcmaskcb(frame *stkframe, ctxt unsafe.Pointer) bool {
+	target := (*stkframe)(ctxt)
+	if frame.sp <= target.sp && target.sp < frame.varp {
+		*target = *frame
+		return false
+	}
+	return true
+}
+
+// Returns GC type info for object p for testing.
+func getgcmask(p unsafe.Pointer, t *_type, mask **byte, len *uintptr) {
+	*mask = nil
+	*len = 0
+
+	// data
+	if uintptr(unsafe.Pointer(&data)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&edata)) {
+		n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+		*len = n / ptrSize
+		*mask = &make([]byte, *len)[0]
+		for i := uintptr(0); i < n; i += ptrSize {
+			off := (uintptr(p) + i - uintptr(unsafe.Pointer(&data))) / ptrSize
+			bits := (*(*byte)(add(unsafe.Pointer(gcdatamask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask
+			*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+		}
+		return
+	}
+
+	// bss
+	if uintptr(unsafe.Pointer(&bss)) <= uintptr(p) && uintptr(p) < uintptr(unsafe.Pointer(&ebss)) {
+		n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+		*len = n / ptrSize
+		*mask = &make([]byte, *len)[0]
+		for i := uintptr(0); i < n; i += ptrSize {
+			off := (uintptr(p) + i - uintptr(unsafe.Pointer(&bss))) / ptrSize
+			bits := (*(*byte)(add(unsafe.Pointer(gcbssmask.bytedata), off/pointersPerByte)) >> ((off % pointersPerByte) * bitsPerPointer)) & bitsMask
+			*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+		}
+		return
+	}
+
+	// heap
+	var n uintptr
+	var base uintptr
+	if mlookup(uintptr(p), &base, &n, nil) != 0 {
+		*len = n / ptrSize
+		*mask = &make([]byte, *len)[0]
+		for i := uintptr(0); i < n; i += ptrSize {
+			off := (uintptr(base) + i - mheap_.arena_start) / ptrSize
+			b := mheap_.arena_start - off/wordsPerBitmapByte - 1
+			shift := (off % wordsPerBitmapByte) * gcBits
+			bits := (*(*byte)(unsafe.Pointer(b)) >> (shift + 2)) & bitsMask
+			*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+		}
+		return
+	}
+
+	// stack
+	var frame stkframe
+	frame.sp = uintptr(p)
+	_g_ := getg()
+	gentraceback(_g_.m.curg.sched.pc, _g_.m.curg.sched.sp, 0, _g_.m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&frame)), 0)
+	if frame.fn != nil {
+		f := frame.fn
+		targetpc := frame.continpc
+		if targetpc == 0 {
+			return
+		}
+		if targetpc != f.entry {
+			targetpc--
+		}
+		pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+		if pcdata == -1 {
+			return
+		}
+		stkmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+		if stkmap == nil || stkmap.n <= 0 {
+			return
+		}
+		bv := stackmapdata(stkmap, pcdata)
+		size := uintptr(bv.n) / bitsPerPointer * ptrSize
+		n := (*ptrtype)(unsafe.Pointer(t)).elem.size
+		*len = n / ptrSize
+		*mask = &make([]byte, *len)[0]
+		for i := uintptr(0); i < n; i += ptrSize {
+			off := (uintptr(p) + i - frame.varp + size) / ptrSize
+			bits := ((*(*byte)(add(unsafe.Pointer(bv.bytedata), off*bitsPerPointer/8))) >> ((off * bitsPerPointer) % 8)) & bitsMask
+			*(*byte)(add(unsafe.Pointer(*mask), i/ptrSize)) = bits
+		}
+	}
+}
+
+func unixnanotime() int64 {
+	var now int64
+	gc_unixnanotime(&now)
+	return now
+}
diff --git a/src/runtime/mgc0.c b/src/runtime/mgc0.c
deleted file mode 100644
index f37c01a..0000000
--- a/src/runtime/mgc0.c
+++ /dev/null
@@ -1,2682 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Garbage collector (GC).
-//
-// The GC runs concurrently with mutator threads, is type accurate (aka precise), allows multiple GC 
-// thread to run in parallel. It is a concurrent mark and sweep that uses a write barrier. It is 
-// non-generational and non-compacting. Allocation is done using size segregated per P allocation 
-// areas to minimize fragmentation while eliminating locks in the common case. 
-//
-// The algorithm decomposes into several steps.
-// This is a high level description of the algorithm being used. For an overview of GC a good
-// place to start is Richard Jones' gchandbook.org.
-// 
-// The algorithm's intellectual heritage includes Dijkstra's on-the-fly algorithm, see
-// Edsger W. Dijkstra, Leslie Lamport, A. J. Martin, C. S. Scholten, and E. F. M. Steffens. 1978. 
-// On-the-fly garbage collection: an exercise in cooperation. Commun. ACM 21, 11 (November 1978), 966-975.
-// For journal quality proofs that these steps are complete, correct, and terminate see
-// Hudson, R., and Moss, J.E.B. Copying Garbage Collection without stopping the world. 
-// Concurrency and Computation: Practice and Experience 15(3-5), 2003. 
-//
-//  0. Set phase = GCscan from GCoff.
-//  1. Wait for all P's to acknowledge phase change.
-//         At this point all goroutines have passed through a GC safepoint and
-//         know we are in the GCscan phase.
-//  2. GC scans all goroutine stacks, mark and enqueues all encountered pointers
-//       (marking avoids most duplicate enqueuing but races may produce duplication which is benign).
-//       Preempted goroutines are scanned before P schedules next goroutine.
-//  3. Set phase = GCmark.
-//  4. Wait for all P's to acknowledge phase change.
-//  5. Now write barrier marks and enqueues black, grey, or white to white pointers.
-//       Malloc still allocates white (non-marked) objects.
-//  6. Meanwhile GC transitively walks the heap marking reachable objects.
-//  7. When GC finishes marking heap, it preempts P's one-by-one and
-//       retakes partial wbufs (filled by write barrier or during a stack scan of the goroutine
-//       currently scheduled on the P).
-//  8. Once the GC has exhausted all available marking work it sets phase = marktermination.
-//  9. Wait for all P's to acknowledge phase change.
-// 10. Malloc now allocates black objects, so number of unmarked reachable objects
-//        monotonically decreases.
-// 11. GC preempts P's one-by-one taking partial wbufs and marks all unmarked yet reachable objects.
-// 12. When GC completes a full cycle over P's and discovers no new grey
-//         objects, (which means all reachable objects are marked) set phase = GCsweep.
-// 13. Wait for all P's to acknowledge phase change.
-// 14. Now malloc allocates white (but sweeps spans before use).
-//         Write barrier becomes nop.
-// 15. GC does background sweeping, see description below.
-// 16. When sweeping is complete set phase to GCoff.
-// 17. When sufficient allocation has taken place replay the sequence starting at 0 above, 
-//         see discussion of GC rate below.
-
-// Changing phases.
-// Phases are changed by setting the gcphase to the next phase and possibly calling ackgcphase.
-// All phase action must be benign in the presence of a change.
-// Starting with GCoff
-// GCoff to GCscan
-//     GSscan scans stacks and globals greying them and never marks an object black.
-//     Once all the P's are aware of the new phase they will scan gs on preemption.
-//     This means that the scanning of preempted gs can't start until all the Ps
-//     have acknowledged.
-// GCscan to GCmark
-//     GCMark turns on the write barrier which also only greys objects. No scanning
-//     of objects (making them black) can happen until all the Ps have acknowledged 
-//     the phase change.
-// GCmark to GCmarktermination
-//     The only change here is that we start allocating black so the Ps must acknowledge
-//     the change before we begin the termination algorithm
-// GCmarktermination to GSsweep
-//     Object currently on the freelist must be marked black for this to work. 
-//     Are things on the free lists black or white? How does the sweep phase work?
-
-// Concurrent sweep.
-// The sweep phase proceeds concurrently with normal program execution.
-// The heap is swept span-by-span both lazily (when a goroutine needs another span)
-// and concurrently in a background goroutine (this helps programs that are not CPU bound).
-// However, at the end of the stop-the-world GC phase we don't know the size of the live heap,
-// and so next_gc calculation is tricky and happens as follows.
-// At the end of the stop-the-world phase next_gc is conservatively set based on total
-// heap size; all spans are marked as "needs sweeping".
-// Whenever a span is swept, next_gc is decremented by GOGC*newly_freed_memory.
-// The background sweeper goroutine simply sweeps spans one-by-one bringing next_gc
-// closer to the target value. However, this is not enough to avoid over-allocating memory.
-// Consider that a goroutine wants to allocate a new span for a large object and
-// there are no free swept spans, but there are small-object unswept spans.
-// If the goroutine naively allocates a new span, it can surpass the yet-unknown
-// target next_gc value. In order to prevent such cases (1) when a goroutine needs
-// to allocate a new small-object span, it sweeps small-object spans for the same
-// object size until it frees at least one object; (2) when a goroutine needs to
-// allocate large-object span from heap, it sweeps spans until it frees at least
-// that many pages into heap. Together these two measures ensure that we don't surpass
-// target next_gc value by a large margin. There is an exception: if a goroutine sweeps
-// and frees two nonadjacent one-page spans to the heap, it will allocate a new two-page span,
-// but there can still be other one-page unswept spans which could be combined into a two-page span.
-// It's critical to ensure that no operations proceed on unswept spans (that would corrupt
-// mark bits in GC bitmap). During GC all mcaches are flushed into the central cache,
-// so they are empty. When a goroutine grabs a new span into mcache, it sweeps it.
-// When a goroutine explicitly frees an object or sets a finalizer, it ensures that
-// the span is swept (either by sweeping it, or by waiting for the concurrent sweep to finish).
-// The finalizer goroutine is kicked off only when all spans are swept.
-// When the next GC starts, it sweeps all not-yet-swept spans (if any).
-
-// GC rate.
-// Next GC is after we've allocated an extra amount of memory proportional to
-// the amount already in use. The proportion is controlled by GOGC environment variable
-// (100 by default). If GOGC=100 and we're using 4M, we'll GC again when we get to 8M
-// (this mark is tracked in next_gc variable). This keeps the GC cost in linear 
-// proportion to the allocation cost. Adjusting GOGC just changes the linear constant	
-// (and also the amount of extra memory used).
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "stack.h"
-#include "mgc0.h"
-#include "chan.h"
-#include "race.h"
-#include "type.h"
-#include "typekind.h"
-#include "funcdata.h"
-#include "textflag.h"
-
-enum {
-	Debug		= 0,
-	DebugPtrs	= 0, // if 1, print trace of every pointer load during GC
-	ConcurrentSweep	= 1,
-
-	FinBlockSize	= 4*1024,
-	RootData	= 0,
-	RootBss		= 1,
-	RootFinalizers	= 2,
-	RootSpans	= 3,
-	RootFlushCaches = 4,
-	RootCount	= 5,
-};
-
-// ptrmask for an allocation containing a single pointer.
-static byte oneptr[] = {BitsPointer};
-
-// Initialized from $GOGC.  GOGC=off means no GC.
-extern int32 runtime·gcpercent;
-
-// Holding worldsema grants an M the right to try to stop the world.
-// The procedure is:
-//
-//	runtime·semacquire(&runtime·worldsema);
-//	m->gcing = 1;
-//	runtime·stoptheworld();
-//
-//	... do stuff ...
-//
-//	m->gcing = 0;
-//	runtime·semrelease(&runtime·worldsema);
-//	runtime·starttheworld();
-//
-uint32 runtime·worldsema = 1;
-
-// It is a bug if bits does not have bitBoundary set but
-// there are still some cases where this happens related
-// to stack spans.
-typedef struct Markbits Markbits;
-struct Markbits {
-	byte *bitp; // pointer to the byte holding xbits
- 	byte shift; // bits xbits needs to be shifted to get bits
-	byte xbits; // byte holding all the bits from *bitp
-	byte bits;  // mark and boundary bits relevant to corresponding slot.
-	byte tbits; // pointer||scalar bits relevant to corresponding slot.
-};
-
-extern byte runtime·data[];
-extern byte runtime·edata[];
-extern byte runtime·bss[];
-extern byte runtime·ebss[];
-
-extern byte runtime·gcdata[];
-extern byte runtime·gcbss[];
-
-Mutex	runtime·finlock;	// protects the following variables
-G*	runtime·fing;		// goroutine that runs finalizers
-FinBlock*	runtime·finq;	// list of finalizers that are to be executed
-FinBlock*	runtime·finc;	// cache of free blocks
-static byte finptrmask[FinBlockSize/PtrSize/PointersPerByte];
-bool	runtime·fingwait;
-bool	runtime·fingwake;
-FinBlock	*runtime·allfin;	// list of all blocks
-
-BitVector	runtime·gcdatamask;
-BitVector	runtime·gcbssmask;
-
-Mutex	runtime·gclock;
-
-static Workbuf* getpartialorempty(void);
-static void	putpartial(Workbuf*);
-static Workbuf* getempty(Workbuf*);
-static Workbuf* getfull(Workbuf*);
-static void	putempty(Workbuf*);
-static void	putfull(Workbuf*);
-static Workbuf* handoff(Workbuf*);
-static void	gchelperstart(void);
-static void	flushallmcaches(void);
-static bool	scanframe(Stkframe*, void*);
-static void	scanstack(G*);
-static BitVector	unrollglobgcprog(byte*, uintptr);
-static void     scanblock(byte*, uintptr, byte*);
-static byte*    objectstart(byte*, Markbits*);
-static Workbuf*	greyobject(byte*, Markbits*, Workbuf*);
-static bool     inheap(byte*);
-static bool     shaded(byte*);
-static void     shade(byte*);
-static void	slottombits(byte*, Markbits*);
-static void     atomicxor8(byte*, byte);
-static bool     ischeckmarked(Markbits*);
-static bool     ismarked(Markbits*);
-static void     clearcheckmarkbits(void);
-static void     clearcheckmarkbitsspan(MSpan*);
-
-void runtime·bgsweep(void);
-void runtime·finishsweep_m(void);
-static FuncVal bgsweepv = {runtime·bgsweep};
-
-typedef struct WorkData WorkData;
-struct WorkData {
-	uint64	full;    // lock-free list of full blocks
-	uint64	empty;   // lock-free list of empty blocks
-	uint64  partial; // lock-free list of partially filled blocks
-	byte	pad0[CacheLineSize]; // prevents false-sharing between full/empty and nproc/nwait
-	uint32	nproc;
-	int64	tstart;
-	volatile uint32	nwait;
-	volatile uint32	ndone;
-	Note	alldone;
-	ParFor*	markfor;
-
-	// Copy of mheap.allspans for marker or sweeper.
-	MSpan**	spans;
-	uint32	nspan;
-};
-WorkData runtime·work;
-
-// To help debug the concurrent GC we remark with the world
-// stopped ensuring that any object encountered has their normal
-// mark bit set. To do this we use an orthogonal bit
-// pattern to indicate the object is marked. The following pattern
-// uses the upper two bits in the object's bounday nibble. 
-// 01: scalar  not marked
-// 10: pointer not marked
-// 11: pointer     marked
-// 00: scalar      marked
-// Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
-// The higher bit is 1 for pointers and 0 for scalars, whether the object
-// is marked or not.
-// The first nibble no longer holds the bitsDead pattern indicating that the
-// there are no more pointers in the object. This information is held
-// in the second nibble.
-
-// When marking an object if the bool checkmark is true one uses the above 
-// encoding, otherwise one uses the bitMarked bit in the lower two bits 
-// of the nibble.
-static bool checkmark = false;
-static bool gccheckmarkenable = true;
-
-// Is address b in the known heap. If it doesn't have a valid gcmap
-// returns false. For example pointers into stacks will return false.
-static bool
-inheap(byte *b)
-{
-	MSpan *s;
-	pageID k;
-	uintptr x;
-
-	if(b == nil || b < runtime·mheap.arena_start || b >= runtime·mheap.arena_used)
-		return false;
-	// Not a beginning of a block, consult span table to find the block beginning.
-	k = (uintptr)b>>PageShift;
-	x = k;
-	x -= (uintptr)runtime·mheap.arena_start>>PageShift;
-	s = runtime·mheap.spans[x];
-	if(s == nil || k < s->start || b >= s->limit || s->state != MSpanInUse)
-		return false;
-	return true;
-}
-
-// Given an address in the heap return the relevant byte from the gcmap. This routine
-// can be used on addresses to the start of an object or to the interior of the an object.
-static void
-slottombits(byte *obj, Markbits *mbits)
-{
-	uintptr off;
-
-	off = (uintptr*)((uintptr)obj&~(PtrSize-1)) - (uintptr*)runtime·mheap.arena_start;
-	mbits->bitp = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-	mbits->shift = (off % wordsPerBitmapByte) * gcBits;
-	mbits->xbits = *mbits->bitp;
-	mbits->bits = (mbits->xbits >> mbits->shift) & bitMask;
-	mbits->tbits = ((mbits->xbits >> mbits->shift) & bitPtrMask) >> 2;
-}
-
-// b is a pointer into the heap.
-// Find the start of the object refered to by b.
-// Set mbits to the associated bits from the bit map.
-// If b is not a valid heap object return nil and
-// undefined values in mbits.
-static byte*
-objectstart(byte *b, Markbits *mbits)
-{
-	byte *obj, *p;
-	MSpan *s;
-	pageID k;
-	uintptr x, size, idx;
-
-	obj = (byte*)((uintptr)b&~(PtrSize-1));
-	for(;;) {
-		slottombits(obj, mbits);
-		if((mbits->bits&bitBoundary) == bitBoundary)
-			break;
-
-		// Not a beginning of a block, consult span table to find the block beginning.
-		k = (uintptr)obj>>PageShift;
-		x = k;
-		x -= (uintptr)runtime·mheap.arena_start>>PageShift;
-		s = runtime·mheap.spans[x];
-		if(s == nil || k < s->start || obj >= s->limit || s->state != MSpanInUse){
-			if(s != nil && s->state == MSpanStack) {
-				return nil; // This is legit.
-			}
-
-			// The following ensures that we are rigorous about what data 
-			// structures hold valid pointers
-			if(0) {
-				// Still happens sometimes. We don't know why.
-				runtime·printf("runtime:objectstart Span weird: obj=%p, k=%p", obj, k);
-				if (s == nil)
-					runtime·printf(" s=nil\n");
-				else
-					runtime·printf(" s->start=%p s->limit=%p, s->state=%d\n", s->start*PageSize, s->limit, s->state);
-				runtime·throw("objectstart: bad pointer in unexpected span");
-			}
-			return nil;
-		}
-		p = (byte*)((uintptr)s->start<<PageShift);
-		if(s->sizeclass != 0) {
-			size = s->elemsize;
-			idx = ((byte*)obj - p)/size;
-			p = p+idx*size;
-		}
-		if(p == obj) {
-			runtime·printf("runtime: failed to find block beginning for %p s=%p s->limit=%p\n",
-				       p, s->start*PageSize, s->limit);
-			runtime·throw("failed to find block beginning");
-		}
-		obj = p;
-	}
-	// if size(obj.firstfield) < PtrSize, the &obj.secondfield could map to the boundary bit
-	// Clear any low bits to get to the start of the object.
-	// greyobject depends on this.
-	return obj;
-}
-
-// Slow for now as we serialize this, since this is on a debug path 
-// speed is not critical at this point.
-static Mutex andlock;
-static void
-atomicand8(byte *src, byte val)
-{
-	runtime·lock(&andlock);
-	*src = *src&val;
-	runtime·unlock(&andlock);
-}
-
-// Mark using the checkmark scheme.
-void
-docheckmark(Markbits *mbits)
-{
-	// xor 01 moves 01(scalar unmarked) to 00(scalar marked) 
-	// and 10(pointer unmarked) to 11(pointer marked)
-	if(mbits->tbits == BitsScalar)
-		atomicand8(mbits->bitp, ~(byte)(BitsCheckMarkXor<<mbits->shift<<2));
-	else if(mbits->tbits == BitsPointer)
-		runtime·atomicor8(mbits->bitp, BitsCheckMarkXor<<mbits->shift<<2);
-
-	// reload bits for ischeckmarked
-	mbits->xbits = *mbits->bitp;
-	mbits->bits = (mbits->xbits >> mbits->shift) & bitMask;
-	mbits->tbits = ((mbits->xbits >> mbits->shift) & bitPtrMask) >> 2;
-
-	return;
-}
-
-// In the default scheme does mbits refer to a marked object.
-static bool
-ismarked(Markbits *mbits)
-{
-	if((mbits->bits&bitBoundary) != bitBoundary)
-		runtime·throw("ismarked: bits should have boundary bit set");
-	return (mbits->bits&bitMarked) == bitMarked;
-}
-
-// In the checkmark scheme does mbits refer to a marked object.
-static bool
-ischeckmarked(Markbits *mbits)
-{
-	if((mbits->bits&bitBoundary) != bitBoundary)
-		runtime·printf("runtime:ischeckmarked: bits should have boundary bit set\n");
-	return mbits->tbits==BitsScalarMarked || mbits->tbits==BitsPointerMarked;
-}
-
-// When in GCmarkterminate phase we allocate black.
-void
-runtime·gcmarknewobject_m(void)
-{
-	Markbits mbits;
-	byte *obj;
-
-	if(runtime·gcphase != GCmarktermination)
-		runtime·throw("marking new object while not in mark termination phase");
-	if(checkmark) // The world should be stopped so this should not happen.
-		runtime·throw("gcmarknewobject called while doing checkmark");
-
-	obj = g->m->ptrarg[0];	
-	slottombits((byte*)((uintptr)obj & (PtrSize-1)), &mbits);
-
-	if((mbits.bits&bitMarked) != 0)
-		return;
-	
-	// Each byte of GC bitmap holds info for two words.
-	// If the current object is larger than two words, or if the object is one word
-	// but the object it shares the byte with is already marked,
-	// then all the possible concurrent updates are trying to set the same bit,
-	// so we can use a non-atomic update.
-	if((mbits.xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) || runtime·work.nproc == 1)
-		*mbits.bitp = mbits.xbits | (bitMarked<<mbits.shift);
-	else
-		runtime·atomicor8(mbits.bitp, bitMarked<<mbits.shift);
-	return;	
-}
-
-// obj is the start of an object with mark mbits.
-// If it isn't already marked, mark it and enqueue into workbuf.
-// Return possibly new workbuf to use.
-static Workbuf*
-greyobject(byte *obj, Markbits *mbits, Workbuf *wbuf) 
-{
-	// obj should be start of allocation, and so must be at least pointer-aligned.
-	if(((uintptr)obj & (PtrSize-1)) != 0)
-		runtime·throw("greyobject: obj not pointer-aligned");
-
-	if(checkmark) {
-		if(!ismarked(mbits)) {
-			MSpan *s;
-			pageID k;
-			uintptr x, i;
-
-			runtime·printf("runtime:greyobject: checkmarks finds unexpected unmarked object obj=%p, mbits->bits=%x, *mbits->bitp=%x\n", obj, mbits->bits, *mbits->bitp);
-
-			k = (uintptr)obj>>PageShift;
-			x = k;
-			x -= (uintptr)runtime·mheap.arena_start>>PageShift;
-			s = runtime·mheap.spans[x];
-			runtime·printf("runtime:greyobject Span: obj=%p, k=%p", obj, k);
-			if (s == nil) {
-				runtime·printf(" s=nil\n");
-			} else {
-				runtime·printf(" s->start=%p s->limit=%p, s->state=%d, s->sizeclass=%d, s->elemsize=%D \n", s->start*PageSize, s->limit, s->state, s->sizeclass, s->elemsize);
-				for(i=0; i<s->sizeclass; i++) {
-					runtime·printf(" ((uintptr*)obj)[%D]=%p\n", i, ((uintptr*)obj)[i]);
-				}
-			}
-			runtime·throw("checkmark found unmarked object");
-		}
-		if(ischeckmarked(mbits))
-			return wbuf;
-		docheckmark(mbits);
-		if(!ischeckmarked(mbits)) {
-			runtime·printf("mbits xbits=%x bits=%x tbits=%x shift=%d\n", mbits->xbits, mbits->bits, mbits->tbits, mbits->shift);
-			runtime·throw("docheckmark and ischeckmarked disagree");
-		}
-	} else {
-		// If marked we have nothing to do.
-		if((mbits->bits&bitMarked) != 0)
-			return wbuf;
-
-		// Each byte of GC bitmap holds info for two words.
-		// If the current object is larger than two words, or if the object is one word
-		// but the object it shares the byte with is already marked,
-		// then all the possible concurrent updates are trying to set the same bit,
-		// so we can use a non-atomic update.
-		if((mbits->xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) || runtime·work.nproc == 1)
-			*mbits->bitp = mbits->xbits | (bitMarked<<mbits->shift);
-		else
-			runtime·atomicor8(mbits->bitp, bitMarked<<mbits->shift);
-	}
-
-	if (!checkmark && (((mbits->xbits>>(mbits->shift+2))&BitsMask) == BitsDead))
-		return wbuf;  // noscan object
-
-	// Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
-	// seems like a nice optimization that can be added back in.
-	// There needs to be time between the PREFETCH and the use.
-	// Previously we put the obj in an 8 element buffer that is drained at a rate
-	// to give the PREFETCH time to do its work.
-	// Use of PREFETCHNTA might be more appropriate than PREFETCH
-
-	// If workbuf is full, obtain an empty one.
-	if(wbuf->nobj >= nelem(wbuf->obj)) {
-		wbuf = getempty(wbuf);
-	}
-
-	wbuf->obj[wbuf->nobj] = obj;
-	wbuf->nobj++;
-	return wbuf;                    
-}
-
-// Scan the object b of size n, adding pointers to wbuf.
-// Return possibly new wbuf to use.
-// If ptrmask != nil, it specifies where pointers are in b.
-// If ptrmask == nil, the GC bitmap should be consulted.
-// In this case, n may be an overestimate of the size; the GC bitmap
-// must also be used to make sure the scan stops at the end of b.
-static Workbuf*
-scanobject(byte *b, uintptr n, byte *ptrmask, Workbuf *wbuf)
-{
-	byte *obj, *arena_start, *arena_used, *ptrbitp;
-	uintptr i, j;
-	int32 bits;
-	Markbits mbits;
-
-	arena_start = (byte*)runtime·mheap.arena_start;
-	arena_used = runtime·mheap.arena_used;
-	ptrbitp = nil;
-
-	// Find bits of the beginning of the object.
-	if(ptrmask == nil) {
-		b = objectstart(b, &mbits);
-		if(b == nil)
-			return wbuf;
-		ptrbitp = mbits.bitp; //arena_start - off/wordsPerBitmapByte - 1;
-	}
-	for(i = 0; i < n; i += PtrSize) {
-		// Find bits for this word.
-		if(ptrmask != nil) {
-			// dense mask (stack or data)
-			bits = (ptrmask[(i/PtrSize)/4]>>(((i/PtrSize)%4)*BitsPerPointer))&BitsMask;
-		} else {
-			// Check if we have reached end of span.
-			// n is an overestimate of the size of the object.
-			if((((uintptr)b+i)%PageSize) == 0 &&
-				runtime·mheap.spans[(b-arena_start)>>PageShift] != runtime·mheap.spans[(b+i-arena_start)>>PageShift])
-				break;
-			// Consult GC bitmap.
-			bits = *ptrbitp;
-			if(wordsPerBitmapByte != 2)
-				runtime·throw("alg doesn't work for wordsPerBitmapByte != 2");
-			j = ((uintptr)b+i)/PtrSize & 1; // j indicates upper nibble or lower nibble
-			bits >>= gcBits*j;
-			if(i == 0)
-				bits &= ~bitBoundary;
-			ptrbitp -= j;
-		
-			if((bits&bitBoundary) != 0 && i != 0)
-				break; // reached beginning of the next object
-			bits = (bits&bitPtrMask)>>2; // bits refer to the type bits.
-			
-			if(i != 0 && bits == BitsDead) // BitsDead in first nibble not valid during checkmark
-				break; // reached no-scan part of the object
-		}
-
-		if(bits <= BitsScalar) // Bits Scalar ||
-			               // BitsDead    ||       // default encoding 
-			               // BitsScalarMarked     // checkmark encoding
-				continue;
-
-		if((bits&BitsPointer) != BitsPointer) {
-			runtime·printf("gc checkmark=%d, b=%p ptrmask=%p, mbits.bitp=%p, mbits.xbits=%x, bits=%x\n", checkmark, b, ptrmask, mbits.bitp, mbits.xbits, bits);
-			runtime·throw("unexpected garbage collection bits");
-		}
-
-		obj = *(byte**)(b+i);
-		// At this point we have extracted the next potential pointer.
-		// Check if it points into heap.
-		if(obj == nil || obj < arena_start || obj >= arena_used)
-			continue;
-		// Mark the object. return some important bits.
-		// We we combine the following two rotines we don't have to pass mbits or obj around.
-		obj = objectstart(obj, &mbits);
-		// In the case of the span being MSpan_Stack mbits is useless and will not have 
-		// the boundary bit set. It does not need to be greyed since it will be
-		// scanned using the scan stack mechanism.
-		if(obj == nil)
-			continue;
-		wbuf = greyobject(obj, &mbits, wbuf);
-	}
-	return wbuf;
-}
-
-// scanblock starts by scanning b as scanobject would.
-// If the gcphase is GCscan, that's all scanblock does.
-// Otherwise it traverses some fraction of the pointers it found in b, recursively.
-// As a special case, scanblock(nil, 0, nil) means to scan previously queued work,
-// stopping only when no work is left in the system.
-static void
-scanblock(byte *b, uintptr n, byte *ptrmask)
-{
-	Workbuf *wbuf;
-	bool keepworking;
-
-	wbuf = getpartialorempty();
-	if(b != nil) {
-		wbuf = scanobject(b, n, ptrmask, wbuf);
-		if(runtime·gcphase == GCscan) {
-			if(inheap(b) && !ptrmask)
-				// b is in heap, we are in GCscan so there should be a ptrmask.
-				runtime·throw("scanblock: In GCscan phase and inheap is true.");
-			// GCscan only goes one level deep since mark wb not turned on.
-			putpartial(wbuf);
-			return;
-		}
-	}
-	if(runtime·gcphase == GCscan) {
-		runtime·throw("scanblock: In GCscan phase but no b passed in.");
-	}
-	
-	keepworking = b == nil;
-
-	// ptrmask can have 2 possible values:
-	// 1. nil - obtain pointer mask from GC bitmap.
-	// 2. pointer to a compact mask (for stacks and data).
-	for(;;) {
-		if(wbuf->nobj == 0) {
-			if(!keepworking) {
-				putempty(wbuf);
-				return;
-			}
-			// Refill workbuf from global queue.
-			wbuf = getfull(wbuf);
-			if(wbuf == nil) // nil means out of work barrier reached
-				return;
-
-			if(wbuf->nobj<=0) {
-				runtime·throw("runtime:scanblock getfull returns empty buffer");
-			}
-
-		}
-
-		// If another proc wants a pointer, give it some.
-		if(runtime·work.nwait > 0 && wbuf->nobj > 4 && runtime·work.full == 0) {
-			wbuf = handoff(wbuf);
-		}
-
-		// This might be a good place to add prefetch code...
-		// if(wbuf->nobj > 4) {
-		//         PREFETCH(wbuf->obj[wbuf->nobj - 3];
-		//  }
-		--wbuf->nobj;
-		b = wbuf->obj[wbuf->nobj];
-		wbuf = scanobject(b, runtime·mheap.arena_used - b, nil, wbuf);
-	}
-}
-
-static void
-markroot(ParFor *desc, uint32 i)
-{
-	FinBlock *fb;
-	MSpan *s;
-	uint32 spanidx, sg;
-	G *gp;
-	void *p;
-	uint32 status;
-	bool restart;
- 
-	USED(&desc);
-	// Note: if you add a case here, please also update heapdump.c:dumproots.
-	switch(i) {
-	case RootData:
-		scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
-		break;
-
-	case RootBss:
-		scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
-		break;
-
-	case RootFinalizers:
-		for(fb=runtime·allfin; fb; fb=fb->alllink)
-			scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
-		break;
-
-	case RootSpans:
-		// mark MSpan.specials
-		sg = runtime·mheap.sweepgen;
-		for(spanidx=0; spanidx<runtime·work.nspan; spanidx++) {
-			Special *sp;
-			SpecialFinalizer *spf;
-
-			s = runtime·work.spans[spanidx];
-			if(s->state != MSpanInUse)
-				continue;
-			if(!checkmark && s->sweepgen != sg) { 
-				// sweepgen was updated (+2) during non-checkmark GC pass
-				runtime·printf("sweep %d %d\n", s->sweepgen, sg);
-				runtime·throw("gc: unswept span");
-			}
-			for(sp = s->specials; sp != nil; sp = sp->next) {
-				if(sp->kind != KindSpecialFinalizer)
-					continue;
-				// don't mark finalized object, but scan it so we
-				// retain everything it points to.
-				spf = (SpecialFinalizer*)sp;
-				// A finalizer can be set for an inner byte of an object, find object beginning.
-				p = (void*)((s->start << PageShift) + spf->special.offset/s->elemsize*s->elemsize);
-				if(runtime·gcphase != GCscan)
-					scanblock(p, s->elemsize, nil); // Scanned during mark phase
-				scanblock((void*)&spf->fn, PtrSize, oneptr);
-			}
-		}
-		break;
-
-	case RootFlushCaches:
-		if (runtime·gcphase != GCscan) // Do not flush mcaches during GCscan phase.
-			flushallmcaches();
-		break;
-
-	default:
-		// the rest is scanning goroutine stacks
-		if(i - RootCount >= runtime·allglen)
-			runtime·throw("markroot: bad index");
-		gp = runtime·allg[i - RootCount];
-		// remember when we've first observed the G blocked
-		// needed only to output in traceback
-		status = runtime·readgstatus(gp); // We are not in a scan state
-		if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
-			gp->waitsince = runtime·work.tstart;
-		// Shrink a stack if not much of it is being used but not in the scan phase.
-		if (runtime·gcphase != GCscan) // Do not shrink during GCscan phase.
-			runtime·shrinkstack(gp);
-		if(runtime·readgstatus(gp) == Gdead)
-			gp->gcworkdone = true;
-		else 
-			gp->gcworkdone = false; 
-		restart = runtime·stopg(gp);
-
-		// goroutine will scan its own stack when it stops running.
-		// Wait until it has.
-		while(runtime·readgstatus(gp) == Grunning && !gp->gcworkdone) {
-		}
-
-		// scanstack(gp) is done as part of gcphasework
-		// But to make sure we finished we need to make sure that
-		// the stack traps have all responded so drop into
-		// this while loop until they respond.
-		while(!gp->gcworkdone){
-			status = runtime·readgstatus(gp);
-			if(status == Gdead) {
-				gp->gcworkdone = true; // scan is a noop
-				break;
-				//do nothing, scan not needed. 
-			}
-			if(status == Gwaiting || status == Grunnable)
-				restart = runtime·stopg(gp);
-		}
-		if(restart)
-			runtime·restartg(gp);
-		break;
-	}
-}
-
-// Get an empty work buffer off the work.empty list,
-// allocating new buffers as needed.
-static Workbuf*
-getempty(Workbuf *b)
-{
-	if(b != nil) {
-		putfull(b);
-		b = nil;
-	}
-	if(runtime·work.empty)
-		b = (Workbuf*)runtime·lfstackpop(&runtime·work.empty);
-
-	if(b && b->nobj != 0) {
-		runtime·printf("m%d: getempty: popped b=%p with non-zero b->nobj=%d\n", g->m->id, b, (uint32)b->nobj);
-		runtime·throw("getempty: workbuffer not empty, b->nobj not 0");
-	}
-	if(b == nil) {
-		b = runtime·persistentalloc(sizeof(*b), CacheLineSize, &mstats.gc_sys);
-		b->nobj = 0;
-	}
-	return b;
-}
-
-static void
-putempty(Workbuf *b)
-{
-	if(b->nobj != 0) {
-		runtime·throw("putempty: b->nobj not 0\n");
-	}
-	runtime·lfstackpush(&runtime·work.empty, &b->node);
-}
-
-// Put a full or partially full workbuf on the full list.
-static void
-putfull(Workbuf *b)
-{
-	if(b->nobj <= 0) {
-		runtime·throw("putfull: b->nobj <= 0\n");
-	}
-	runtime·lfstackpush(&runtime·work.full, &b->node);
-}
-
-// Get an partially empty work buffer
-// if none are available get an empty one.
-static Workbuf*
-getpartialorempty(void)
-{
-	Workbuf *b;
-
-	b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
-	if(b == nil)
-		b = getempty(nil);
-	return b;
-}
-
-static void
-putpartial(Workbuf *b)
-{
-
-	if(b->nobj == 0)
-		runtime·lfstackpush(&runtime·work.empty, &b->node);
-	else if (b->nobj < nelem(b->obj))
-		runtime·lfstackpush(&runtime·work.partial, &b->node);
-	else if (b->nobj == nelem(b->obj))
-		runtime·lfstackpush(&runtime·work.full, &b->node);
-	else {
-		runtime·printf("b=%p, b->nobj=%d, nelem(b->obj)=%d\n", b, (uint32)b->nobj, (uint32)nelem(b->obj));
-		runtime·throw("putpartial: bad Workbuf b->nobj");
-	}
-}
-
-// Get a full work buffer off the work.full or a partially
-// filled one off the work.partial list. If nothing is available
-// wait until all the other gc helpers have finished and then
-// return nil.
-// getfull acts as a barrier for work.nproc helpers. As long as one
-// gchelper is actively marking objects it
-// may create a workbuffer that the other helpers can work on.
-// The for loop either exits when a work buffer is found
-// or when _all_ of the work.nproc GC helpers are in the loop 
-// looking for work and thus not capable of creating new work.
-// This is in fact the termination condition for the STW mark 
-// phase.
-static Workbuf*
-getfull(Workbuf *b)
-{
-	int32 i;
-
-	if(b != nil)
-		putempty(b);
-
-	b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
-	if(b==nil)
-		b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
-	if(b != nil || runtime·work.nproc == 1)
-		return b;
-
-	runtime·xadd(&runtime·work.nwait, +1);
-	for(i=0;; i++) {
-		if(runtime·work.full != 0) {
-			runtime·xadd(&runtime·work.nwait, -1);
-			b = (Workbuf*)runtime·lfstackpop(&runtime·work.full);
-			if(b==nil)
-				b = (Workbuf*)runtime·lfstackpop(&runtime·work.partial);
-			if(b != nil) 
-				return b;
-			runtime·xadd(&runtime·work.nwait, +1);
-		}
-		if(runtime·work.nwait == runtime·work.nproc)
-			return nil;
-		if(i < 10) {
-			g->m->gcstats.nprocyield++;
-			runtime·procyield(20);
-		} else if(i < 20) {
-			g->m->gcstats.nosyield++;
-			runtime·osyield();
-		} else {
-			g->m->gcstats.nsleep++;
-			runtime·usleep(100);
-		}
-	}
-}
-
-static Workbuf*
-handoff(Workbuf *b)
-{
-	int32 n;
-	Workbuf *b1;
-
-	// Make new buffer with half of b's pointers.
-	b1 = getempty(nil);
-	n = b->nobj/2;
-	b->nobj -= n;
-	b1->nobj = n;
-	runtime·memmove(b1->obj, b->obj+b->nobj, n*sizeof b1->obj[0]);
-	g->m->gcstats.nhandoff++;
-	g->m->gcstats.nhandoffcnt += n;
-
-	// Put b on full list - let first half of b get stolen.
-	runtime·lfstackpush(&runtime·work.full, &b->node);
-	return b1;
-}
-
-BitVector
-runtime·stackmapdata(StackMap *stackmap, int32 n)
-{
-	if(n < 0 || n >= stackmap->n)
-		runtime·throw("stackmapdata: index out of range");
-	return (BitVector){stackmap->nbit, stackmap->bytedata + n*((stackmap->nbit+31)/32*4)};
-}
-
-// Scan a stack frame: local variables and function arguments/results.
-static bool
-scanframe(Stkframe *frame, void *unused)
-{
-	Func *f;
-	StackMap *stackmap;
-	BitVector bv;
-	uintptr size, minsize;
-	uintptr targetpc;
-	int32 pcdata;
-
-	USED(unused);
-	f = frame->fn;
-	targetpc = frame->continpc;
-	if(targetpc == 0) {
-		// Frame is dead.
-		return true;
-	}
-	if(Debug > 1)
-		runtime·printf("scanframe %s\n", runtime·funcname(f));
-	if(targetpc != f->entry)
-		targetpc--;
-	pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
-	if(pcdata == -1) {
-		// We do not have a valid pcdata value but there might be a
-		// stackmap for this function.  It is likely that we are looking
-		// at the function prologue, assume so and hope for the best.
-		pcdata = 0;
-	}
-
-	// Scan local variables if stack frame has been allocated.
-	size = frame->varp - frame->sp;
-	if(thechar != '6' && thechar != '8')
-		minsize = sizeof(uintptr);
-	else
-		minsize = 0;
-	if(size > minsize) {
-		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-		if(stackmap == nil || stackmap->n <= 0) {
-			runtime·printf("runtime: frame %s untyped locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
-			runtime·throw("missing stackmap");
-		}
-
-		// Locals bitmap information, scan just the pointers in locals.
-		if(pcdata < 0 || pcdata >= stackmap->n) {
-			// don't know where we are
-			runtime·printf("runtime: pcdata is %d and %d locals stack map entries for %s (targetpc=%p)\n",
-				pcdata, stackmap->n, runtime·funcname(f), targetpc);
-			runtime·throw("scanframe: bad symbol table");
-		}
-		bv = runtime·stackmapdata(stackmap, pcdata);
-		size = (bv.n * PtrSize) / BitsPerPointer;
-		scanblock((byte*)(frame->varp - size), bv.n/BitsPerPointer*PtrSize, bv.bytedata);
-	}
-
-	// Scan arguments.
-	if(frame->arglen > 0) {
-		if(frame->argmap != nil)
-			bv = *frame->argmap;
-		else {
-			stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
-			if(stackmap == nil || stackmap->n <= 0) {
-				runtime·printf("runtime: frame %s untyped args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
-				runtime·throw("missing stackmap");
-			}
-			if(pcdata < 0 || pcdata >= stackmap->n) {
-				// don't know where we are
-				runtime·printf("runtime: pcdata is %d and %d args stack map entries for %s (targetpc=%p)\n",
-					pcdata, stackmap->n, runtime·funcname(f), targetpc);
-				runtime·throw("scanframe: bad symbol table");
-			}
- 			bv = runtime·stackmapdata(stackmap, pcdata);
-		}
-		scanblock((byte*)frame->argp, bv.n/BitsPerPointer*PtrSize, bv.bytedata);
- 	}
- 	return true;
-}
-
-static void
-scanstack(G *gp)
-{
-	M *mp;
-	bool (*fn)(Stkframe*, void*);
-
-	if(runtime·readgstatus(gp)&Gscan == 0) {
-		runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
-		runtime·throw("mark - bad status");
-	}
-
-	switch(runtime·readgstatus(gp)&~Gscan) {
-	default:
-		runtime·printf("runtime: gp=%p, goid=%D, gp->atomicstatus=%d\n", gp, gp->goid, runtime·readgstatus(gp));
-		runtime·throw("mark - bad status");
-	case Gdead:
-		return;
-	case Grunning:
-		runtime·throw("scanstack: - goroutine not stopped");
-	case Grunnable:
-	case Gsyscall:
-	case Gwaiting:
-		break;
-	}
-
-	if(gp == g)
-		runtime·throw("can't scan our own stack");
-	if((mp = gp->m) != nil && mp->helpgc)
-		runtime·throw("can't scan gchelper stack");
-
-	fn = scanframe;
-	runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &fn, nil, 0);
-	runtime·tracebackdefers(gp, &fn, nil);
-}
-
-// If the slot is grey or black return true, if white return false.
-// If the slot is not in the known heap and thus does not have a valid GC bitmap then
-// it is considered grey. Globals and stacks can hold such slots.
-// The slot is grey if its mark bit is set and it is enqueued to be scanned.
-// The slot is black if it has already been scanned.
-// It is white if it has a valid mark bit and the bit is not set. 
-static bool
-shaded(byte *slot)
-{
-	Markbits mbits;
-	byte *valid;
-
-	if(!inheap(slot)) // non-heap slots considered grey
-		return true;
-
-	valid = objectstart(slot, &mbits);
-	if(valid == nil)
-		return true;
-
-	if(checkmark)
-		return ischeckmarked(&mbits);
-
-	return (mbits.bits&bitMarked) != 0;
-}
-
-// Shade the object if it isn't already.
-// The object is not nil and known to be in the heap.
-static void
-shade(byte *b)
-{
-	byte *obj;
-	Workbuf *wbuf;
-	Markbits mbits;
-	
-	if(!inheap(b))
-		runtime·throw("shade: passed an address not in the heap");
-	
-	wbuf = getpartialorempty();
-	// Mark the object, return some important bits.
-	// If we combine the following two rotines we don't have to pass mbits or obj around.
-	obj = objectstart(b, &mbits);
-	if(obj != nil)
-		wbuf = greyobject(obj, &mbits, wbuf); // augments the wbuf
-
-	putpartial(wbuf);
-	return;
-}
-
-// This is the Dijkstra barrier coarsened to always shade the ptr (dst) object.
-// The original Dijkstra barrier only shaded ptrs being placed in black slots.
-//
-// Shade indicates that it has seen a white pointer by adding the referent
-// to wbuf as well as marking it.
-//
-// slot is the destination (dst) in go code
-// ptr is the value that goes into the slot (src) in the go code
-//
-// Dijkstra pointed out that maintaining the no black to white
-// pointers means that white to white pointers not need 
-// to be noted by the write barrier. Furthermore if either 
-// white object dies before it is reached by the 
-// GC then the object can be collected during this GC cycle 
-// instead of waiting for the next cycle. Unfortunately the cost of 
-// ensure that the object holding the slot doesn't concurrently
-// change to black without the mutator noticing seems prohibitive.
-//
-// Consider the following example where the mutator writes into 
-// a slot and then loads the slot's mark bit while the GC thread 
-// writes to the slot's mark bit and then as part of scanning reads 
-// the slot.
-// 
-// Initially both [slot] and [slotmark] are 0 (nil)
-// Mutator thread          GC thread
-// st [slot], ptr          st [slotmark], 1
-// 
-// ld r1, [slotmark]       ld r2, [slot]
-//
-// This is a classic example of independent reads of independent writes,
-// aka IRIW. The question is if r1==r2==0 is allowed and for most HW the 
-// answer is yes without inserting a memory barriers between the st and the ld. 
-// These barriers are expensive so we have decided that we will 
-// always grey the ptr object regardless of the slot's color.
-// 
-void
-runtime·gcmarkwb_m()
-{
-	byte *ptr;
-	ptr = (byte*)g->m->scalararg[1];
-
-	switch(runtime·gcphase) {
-	default:
-		runtime·throw("gcphasework in bad gcphase");
-	case GCoff:
-	case GCquiesce:
-	case GCstw:
-	case GCsweep:
-	case GCscan:
-		break;
-	case GCmark:
-		if(ptr != nil && inheap(ptr))
-			shade(ptr);
-		break;
-	case GCmarktermination:
-		if(ptr != nil && inheap(ptr))
-			shade(ptr);
-		break;
-	}
-}
-
-// The gp has been moved to a GC safepoint. GC phase specific
-// work is done here. 
-void
-runtime·gcphasework(G *gp)
-{
-	switch(runtime·gcphase) {
-	default:
-		runtime·throw("gcphasework in bad gcphase");
-	case GCoff:
-	case GCquiesce:
-	case GCstw:
-	case GCsweep:
-		// No work.
-		break;
-	case GCscan:
-		// scan the stack, mark the objects, put pointers in work buffers
-		// hanging off the P where this is being run.
-		scanstack(gp);
-		break;
-	case GCmark:
-		break;
-	case GCmarktermination:
-		scanstack(gp);
-		// All available mark work will be emptied before returning.
-		break;
-	}
-	gp->gcworkdone = true;
-}
-
-#pragma dataflag NOPTR
-static byte finalizer1[] = {
-	// Each Finalizer is 5 words, ptr ptr uintptr ptr ptr.
-	// Each byte describes 4 words.
-	// Need 4 Finalizers described by 5 bytes before pattern repeats:
-	//	ptr ptr uintptr ptr ptr
-	//	ptr ptr uintptr ptr ptr
-	//	ptr ptr uintptr ptr ptr
-	//	ptr ptr uintptr ptr ptr
-	// aka
-	//	ptr ptr uintptr ptr
-	//	ptr ptr ptr uintptr
-	//	ptr ptr ptr ptr
-	//	uintptr ptr ptr ptr
-	//	ptr uintptr ptr ptr
-	// Assumptions about Finalizer layout checked below.
-	BitsPointer | BitsPointer<<2 | BitsScalar<<4 | BitsPointer<<6,
-	BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsScalar<<6,
-	BitsPointer | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
-	BitsScalar | BitsPointer<<2 | BitsPointer<<4 | BitsPointer<<6,
-	BitsPointer | BitsScalar<<2 | BitsPointer<<4 | BitsPointer<<6,
-};
-
-void
-runtime·queuefinalizer(byte *p, FuncVal *fn, uintptr nret, Type *fint, PtrType *ot)
-{
-	FinBlock *block;
-	Finalizer *f;
-	int32 i;
-
-	runtime·lock(&runtime·finlock);
-	if(runtime·finq == nil || runtime·finq->cnt == runtime·finq->cap) {
-		if(runtime·finc == nil) {
-			runtime·finc = runtime·persistentalloc(FinBlockSize, 0, &mstats.gc_sys);
-			runtime·finc->cap = (FinBlockSize - sizeof(FinBlock)) / sizeof(Finalizer) + 1;
-			runtime·finc->alllink = runtime·allfin;
-			runtime·allfin = runtime·finc;
-			if(finptrmask[0] == 0) {
-				// Build pointer mask for Finalizer array in block.
-				// Check assumptions made in finalizer1 array above.
-				if(sizeof(Finalizer) != 5*PtrSize ||
-					offsetof(Finalizer, fn) != 0 ||
-					offsetof(Finalizer, arg) != PtrSize ||
-					offsetof(Finalizer, nret) != 2*PtrSize ||
-					offsetof(Finalizer, fint) != 3*PtrSize ||
-					offsetof(Finalizer, ot) != 4*PtrSize ||
-					BitsPerPointer != 2) {
-					runtime·throw("finalizer out of sync");
-				}
-				for(i=0; i<nelem(finptrmask); i++)
-					finptrmask[i] = finalizer1[i%nelem(finalizer1)];
-			}
-		}
-		block = runtime·finc;
-		runtime·finc = block->next;
-		block->next = runtime·finq;
-		runtime·finq = block;
-	}
-	f = &runtime·finq->fin[runtime·finq->cnt];
-	runtime·finq->cnt++;
-	f->fn = fn;
-	f->nret = nret;
-	f->fint = fint;
-	f->ot = ot;
-	f->arg = p;
-	runtime·fingwake = true;
-	runtime·unlock(&runtime·finlock);
-}
-
-void
-runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*))
-{
-	FinBlock *fb;
-	Finalizer *f;
-	uintptr i;
-
-	for(fb = runtime·allfin; fb; fb = fb->alllink) {
-		for(i = 0; i < fb->cnt; i++) {
-			f = &fb->fin[i];
-			callback(f->fn, f->arg, f->nret, f->fint, f->ot);
-		}
-	}
-}
-
-// Returns only when span s has been swept.
-void
-runtime·MSpan_EnsureSwept(MSpan *s)
-{
-	uint32 sg;
-
-	// Caller must disable preemption.
-	// Otherwise when this function returns the span can become unswept again
-	// (if GC is triggered on another goroutine).
-	if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
-		runtime·throw("MSpan_EnsureSwept: m is not locked");
-
-	sg = runtime·mheap.sweepgen;
-	if(runtime·atomicload(&s->sweepgen) == sg)
-		return;
-	// The caller must be sure that the span is a MSpanInUse span.
-	if(runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-		runtime·MSpan_Sweep(s, false);
-		return;
-	}
-	// unfortunate condition, and we don't have efficient means to wait
-	while(runtime·atomicload(&s->sweepgen) != sg)
-		runtime·osyield();
-}
-
-// Sweep frees or collects finalizers for blocks not marked in the mark phase.
-// It clears the mark bits in preparation for the next GC round.
-// Returns true if the span was returned to heap.
-// If preserve=true, don't return it to heap nor relink in MCentral lists;
-// caller takes care of it.
-bool
-runtime·MSpan_Sweep(MSpan *s, bool preserve)
-{
-	int32 cl, n, npages, nfree;
-	uintptr size, off, step;
-	uint32 sweepgen;
-	byte *p, *bitp, shift, xbits, bits;
-	MCache *c;
-	byte *arena_start;
-	MLink head, *end, *link;
-	Special *special, **specialp, *y;
-	bool res, sweepgenset;
-
-	if(checkmark)
-		runtime·throw("MSpan_Sweep: checkmark only runs in STW and after the sweep.");
-
-	// It's critical that we enter this function with preemption disabled,
-	// GC must not start while we are in the middle of this function.
-	if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
-		runtime·throw("MSpan_Sweep: m is not locked");
-	sweepgen = runtime·mheap.sweepgen;
-	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
-		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
-			s->state, s->sweepgen, sweepgen);
-		runtime·throw("MSpan_Sweep: bad span state");
-	}
-	arena_start = runtime·mheap.arena_start;
-	cl = s->sizeclass;
-	size = s->elemsize;
-	if(cl == 0) {
-		n = 1;
-	} else {
-		// Chunk full of small blocks.
-		npages = runtime·class_to_allocnpages[cl];
-		n = (npages << PageShift) / size;
-	}
-	res = false;
-	nfree = 0;
-	end = &head;
-	c = g->m->mcache;
-	sweepgenset = false;
-
-	// Mark any free objects in this span so we don't collect them.
-	for(link = s->freelist; link != nil; link = link->next) {
-		off = (uintptr*)link - (uintptr*)arena_start;
-		bitp = arena_start - off/wordsPerBitmapByte - 1;
-		shift = (off % wordsPerBitmapByte) * gcBits;
-		*bitp |= bitMarked<<shift;
-	}
-
-	// Unlink & free special records for any objects we're about to free.
-	specialp = &s->specials;
-	special = *specialp;
-	while(special != nil) {
-		// A finalizer can be set for an inner byte of an object, find object beginning.
-		p = (byte*)(s->start << PageShift) + special->offset/size*size;
-		off = (uintptr*)p - (uintptr*)arena_start;
-		bitp = arena_start - off/wordsPerBitmapByte - 1;
-		shift = (off % wordsPerBitmapByte) * gcBits;
-		bits = (*bitp>>shift) & bitMask;
-		if((bits&bitMarked) == 0) {
-			// Find the exact byte for which the special was setup
-			// (as opposed to object beginning).
-			p = (byte*)(s->start << PageShift) + special->offset;
-			// about to free object: splice out special record
-			y = special;
-			special = special->next;
-			*specialp = special;
-			if(!runtime·freespecial(y, p, size, false)) {
-				// stop freeing of object if it has a finalizer
-				*bitp |= bitMarked << shift;
-			}
-		} else {
-			// object is still live: keep special record
-			specialp = &special->next;
-			special = *specialp;
-		}
-	}
-
-	// Sweep through n objects of given size starting at p.
-	// This thread owns the span now, so it can manipulate
-	// the block bitmap without atomic operations.
-	p = (byte*)(s->start << PageShift);
-	// Find bits for the beginning of the span.
-	off = (uintptr*)p - (uintptr*)arena_start;
-	bitp = arena_start - off/wordsPerBitmapByte - 1;
-	shift = 0;
-	step = size/(PtrSize*wordsPerBitmapByte);
-	// Rewind to the previous quadruple as we move to the next
-	// in the beginning of the loop.
-	bitp += step;
-	if(step == 0) {
-		// 8-byte objects.
-		bitp++;
-		shift = gcBits;
-	}
-	for(; n > 0; n--, p += size) {
-		bitp -= step;
-		if(step == 0) {
-			if(shift != 0)
-				bitp--;
-			shift = gcBits - shift;
-		}
-
-		xbits = *bitp;
-		bits = (xbits>>shift) & bitMask;
-
-		// Allocated and marked object, reset bits to allocated.
-		if((bits&bitMarked) != 0) {
-			*bitp &= ~(bitMarked<<shift);
-			continue;
-		}
-		// At this point we know that we are looking at garbage object
-		// that needs to be collected.
-		if(runtime·debug.allocfreetrace)
-			runtime·tracefree(p, size);
-		// Reset to allocated+noscan.
-		*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
-		if(cl == 0) {
-			// Free large span.
-			if(preserve)
-				runtime·throw("can't preserve large span");
-			runtime·unmarkspan(p, s->npages<<PageShift);
-			s->needzero = 1;
-			// important to set sweepgen before returning it to heap
-			runtime·atomicstore(&s->sweepgen, sweepgen);
-			sweepgenset = true;
-			// NOTE(rsc,dvyukov): The original implementation of efence
-			// in CL 22060046 used SysFree instead of SysFault, so that
-			// the operating system would eventually give the memory
-			// back to us again, so that an efence program could run
-			// longer without running out of memory. Unfortunately,
-			// calling SysFree here without any kind of adjustment of the
-			// heap data structures means that when the memory does
-			// come back to us, we have the wrong metadata for it, either in
-			// the MSpan structures or in the garbage collection bitmap.
-			// Using SysFault here means that the program will run out of
-			// memory fairly quickly in efence mode, but at least it won't
-			// have mysterious crashes due to confused memory reuse.
-			// It should be possible to switch back to SysFree if we also
-			// implement and then call some kind of MHeap_DeleteSpan.
-			if(runtime·debug.efence) {
-				s->limit = nil;	// prevent mlookup from finding this span
-				runtime·SysFault(p, size);
-			} else
-				runtime·MHeap_Free(&runtime·mheap, s, 1);
-			c->local_nlargefree++;
-			c->local_largefree += size;
-			runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
-			res = true;
-		} else {
-			// Free small object.
-			if(size > 2*sizeof(uintptr))
-				((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
-			else if(size > sizeof(uintptr))
-				((uintptr*)p)[1] = 0;
-
-			end->next = (MLink*)p;
-			end = (MLink*)p;
-			nfree++;
-		}
-	}
-
-	// We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
-	// because of the potential for a concurrent free/SetFinalizer.
-	// But we need to set it before we make the span available for allocation
-	// (return it to heap or mcentral), because allocation code assumes that a
-	// span is already swept if available for allocation.
-
-	if(!sweepgenset && nfree == 0) {
-		// The span must be in our exclusive ownership until we update sweepgen,
-		// check for potential races.
-		if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
-			runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
-				s->state, s->sweepgen, sweepgen);
-			runtime·throw("MSpan_Sweep: bad span state after sweep");
-		}
-		runtime·atomicstore(&s->sweepgen, sweepgen);
-	}
-	if(nfree > 0) {
-		c->local_nsmallfree[cl] += nfree;
-		c->local_cachealloc -= nfree * size;
-		runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
-		res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
-		// MCentral_FreeSpan updates sweepgen
-	}
-	return res;
-}
-
-// State of background runtime·sweep.
-// Protected by runtime·gclock.
-typedef struct SweepData SweepData;
-struct SweepData
-{
-	G*	g;
-	bool	parked;
-
-	uint32	spanidx;	// background sweeper position
-
-	uint32	nbgsweep;
-	uint32	npausesweep;
-};
-SweepData runtime·sweep;
-
-// sweeps one span
-// returns number of pages returned to heap, or -1 if there is nothing to sweep
-uintptr
-runtime·sweepone(void)
-{
-	MSpan *s;
-	uint32 idx, sg;
-	uintptr npages;
-
-	// increment locks to ensure that the goroutine is not preempted
-	// in the middle of sweep thus leaving the span in an inconsistent state for next GC
-	g->m->locks++;
-	sg = runtime·mheap.sweepgen;
-	for(;;) {
-		idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
-		if(idx >= runtime·work.nspan) {
-			runtime·mheap.sweepdone = true;
-			g->m->locks--;
-			return -1;
-		}
-		s = runtime·work.spans[idx];
-		if(s->state != MSpanInUse) {
-			s->sweepgen = sg;
-			continue;
-		}
-		if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
-			continue;
-		npages = s->npages;
-		if(!runtime·MSpan_Sweep(s, false))
-			npages = 0;
-		g->m->locks--;
-		return npages;
-	}
-}
-
-static void
-sweepone_m(void)
-{
-	g->m->scalararg[0] = runtime·sweepone();
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·gosweepone(void)
-{
-	void (*fn)(void);
-	
-	fn = sweepone_m;
-	runtime·onM(&fn);
-	return g->m->scalararg[0];
-}
-
-#pragma textflag NOSPLIT
-bool
-runtime·gosweepdone(void)
-{
-	return runtime·mheap.sweepdone;
-}
-
-
-void
-runtime·gchelper(void)
-{
-	uint32 nproc;
-
-	g->m->traceback = 2;
-	gchelperstart();
-
-	// parallel mark for over GC roots
-	runtime·parfordo(runtime·work.markfor);
-	if(runtime·gcphase != GCscan) 
-		scanblock(nil, 0, nil); // blocks in getfull
-	nproc = runtime·work.nproc;  // work.nproc can change right after we increment work.ndone
-	if(runtime·xadd(&runtime·work.ndone, +1) == nproc-1)
-		runtime·notewakeup(&runtime·work.alldone);
-	g->m->traceback = 0;
-}
-
-static void
-cachestats(void)
-{
-	MCache *c;
-	P *p, **pp;
-
-	for(pp=runtime·allp; p=*pp; pp++) {
-		c = p->mcache;
-		if(c==nil)
-			continue;
-		runtime·purgecachedstats(c);
-	}
-}
-
-static void
-flushallmcaches(void)
-{
-	P *p, **pp;
-	MCache *c;
-
-	// Flush MCache's to MCentral.
-	for(pp=runtime·allp; p=*pp; pp++) {
-		c = p->mcache;
-		if(c==nil)
-			continue;
-		runtime·MCache_ReleaseAll(c);
-		runtime·stackcache_clear(c);
-	}
-}
-
-static void
-flushallmcaches_m(G *gp)
-{
-	flushallmcaches();
-	runtime·gogo(&gp->sched);
-}
-
-void
-runtime·updatememstats(GCStats *stats)
-{
-	M *mp;
-	MSpan *s;
-	int32 i;
-	uint64 smallfree;
-	uint64 *src, *dst;
-	void (*fn)(G*);
-
-	if(stats)
-		runtime·memclr((byte*)stats, sizeof(*stats));
-	for(mp=runtime·allm; mp; mp=mp->alllink) {
-		if(stats) {
-			src = (uint64*)&mp->gcstats;
-			dst = (uint64*)stats;
-			for(i=0; i<sizeof(*stats)/sizeof(uint64); i++)
-				dst[i] += src[i];
-			runtime·memclr((byte*)&mp->gcstats, sizeof(mp->gcstats));
-		}
-	}
-	mstats.mcache_inuse = runtime·mheap.cachealloc.inuse;
-	mstats.mspan_inuse = runtime·mheap.spanalloc.inuse;
-	mstats.sys = mstats.heap_sys + mstats.stacks_sys + mstats.mspan_sys +
-		mstats.mcache_sys + mstats.buckhash_sys + mstats.gc_sys + mstats.other_sys;
-	
-	// Calculate memory allocator stats.
-	// During program execution we only count number of frees and amount of freed memory.
-	// Current number of alive object in the heap and amount of alive heap memory
-	// are calculated by scanning all spans.
-	// Total number of mallocs is calculated as number of frees plus number of alive objects.
-	// Similarly, total amount of allocated memory is calculated as amount of freed memory
-	// plus amount of alive heap memory.
-	mstats.alloc = 0;
-	mstats.total_alloc = 0;
-	mstats.nmalloc = 0;
-	mstats.nfree = 0;
-	for(i = 0; i < nelem(mstats.by_size); i++) {
-		mstats.by_size[i].nmalloc = 0;
-		mstats.by_size[i].nfree = 0;
-	}
-
-	// Flush MCache's to MCentral.
-	if(g == g->m->g0)
-		flushallmcaches();
-	else {
-		fn = flushallmcaches_m;
-		runtime·mcall(&fn);
-	}
-
-	// Aggregate local stats.
-	cachestats();
-
-	// Scan all spans and count number of alive objects.
-	runtime·lock(&runtime·mheap.lock);
-	for(i = 0; i < runtime·mheap.nspan; i++) {
-		s = runtime·mheap.allspans[i];
-		if(s->state != MSpanInUse)
-			continue;
-		if(s->sizeclass == 0) {
-			mstats.nmalloc++;
-			mstats.alloc += s->elemsize;
-		} else {
-			mstats.nmalloc += s->ref;
-			mstats.by_size[s->sizeclass].nmalloc += s->ref;
-			mstats.alloc += s->ref*s->elemsize;
-		}
-	}
-	runtime·unlock(&runtime·mheap.lock);
-
-	// Aggregate by size class.
-	smallfree = 0;
-	mstats.nfree = runtime·mheap.nlargefree;
-	for(i = 0; i < nelem(mstats.by_size); i++) {
-		mstats.nfree += runtime·mheap.nsmallfree[i];
-		mstats.by_size[i].nfree = runtime·mheap.nsmallfree[i];
-		mstats.by_size[i].nmalloc += runtime·mheap.nsmallfree[i];
-		smallfree += runtime·mheap.nsmallfree[i] * runtime·class_to_size[i];
-	}
-	mstats.nfree += mstats.tinyallocs;
-	mstats.nmalloc += mstats.nfree;
-
-	// Calculate derived stats.
-	mstats.total_alloc = mstats.alloc + runtime·mheap.largefree + smallfree;
-	mstats.heap_alloc = mstats.alloc;
-	mstats.heap_objects = mstats.nmalloc - mstats.nfree;
-}
-
-// Structure of arguments passed to function gc().
-// This allows the arguments to be passed via runtime·mcall.
-struct gc_args
-{
-	int64 start_time; // start time of GC in ns (just before stoptheworld)
-	bool  eagersweep;
-};
-
-static void gc(struct gc_args *args);
-
-int32
-runtime·readgogc(void)
-{
-	byte *p;
-
-	p = runtime·getenv("GOGC");
-	if(p == nil || p[0] == '\0')
-		return 100;
-	if(runtime·strcmp(p, (byte*)"off") == 0)
-		return -1;
-	return runtime·atoi(p);
-}
-
-void
-runtime·gcinit(void)
-{
-	if(sizeof(Workbuf) != WorkbufSize)
-		runtime·throw("runtime: size of Workbuf is suboptimal");
-
-	runtime·work.markfor = runtime·parforalloc(MaxGcproc);
-	runtime·gcpercent = runtime·readgogc();
-	runtime·gcdatamask = unrollglobgcprog(runtime·gcdata, runtime·edata - runtime·data);
-	runtime·gcbssmask = unrollglobgcprog(runtime·gcbss, runtime·ebss - runtime·bss);
-}
-
-// Called from malloc.go using onM, stopping and starting the world handled in caller.
-void
-runtime·gc_m(void)
-{
-	struct gc_args a;
-	G *gp;
-
-	gp = g->m->curg;
-	runtime·casgstatus(gp, Grunning, Gwaiting);
-	gp->waitreason = runtime·gostringnocopy((byte*)"garbage collection");
-
-	a.start_time = (uint64)(g->m->scalararg[0]) | ((uint64)(g->m->scalararg[1]) << 32);
-	a.eagersweep = g->m->scalararg[2];
-	gc(&a);
-	runtime·casgstatus(gp, Gwaiting, Grunning);
-}
-
-// Similar to clearcheckmarkbits but works on a single span. 
-// It preforms two tasks. 
-// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
-//    for nibbles with the BoundaryBit set.
-// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and 
-//    BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
-// For the second case it is possible to restore the BitsDead pattern but since
-// clearmark is a debug tool performance has a lower priority than simplicity.
-// The span is MSpanInUse and the world is stopped.
-static void
-clearcheckmarkbitsspan(MSpan *s)
-{
-	int32 cl, n, npages, i;
-	uintptr size, off, step;
-	byte *p, *bitp, *arena_start, b;
-
-	if(s->state != MSpanInUse) {
-		runtime·printf("runtime:clearcheckmarkbitsspan: state=%d\n",
-			s->state);
-		runtime·throw("clearcheckmarkbitsspan: bad span state");
-	}
-	arena_start = runtime·mheap.arena_start;
-	cl = s->sizeclass;
-	size = s->elemsize;
-	if(cl == 0) {
-		n = 1;
-	} else {
-		// Chunk full of small blocks.
-		npages = runtime·class_to_allocnpages[cl];
-		n = (npages << PageShift) / size;
-	}
-
-	// MSpan_Sweep has similar code but instead of overloading and 
-	// complicating that routine we do a simpler walk here.
-	// Sweep through n objects of given size starting at p.
-	// This thread owns the span now, so it can manipulate
-	// the block bitmap without atomic operations.
-	p = (byte*)(s->start << PageShift);
-	// Find bits for the beginning of the span.
-	off = (uintptr*)p - (uintptr*)arena_start;
-	bitp = arena_start - off/wordsPerBitmapByte - 1;
-	step = size/(PtrSize*wordsPerBitmapByte);
-
-	// The type bit values are:
-	//	00 - BitsDead, for us BitsScalarMarked
-	//	01 - BitsScalar
-	//	10 - BitsPointer
-	//	11 - unused, for us BitsPointerMarked
-	//
-	// When called to prepare for the checkmark phase (checkmark==1),
-	// we change BitsDead to BitsScalar, so that there are no BitsScalarMarked
-	// type bits anywhere.
-	//
-	// The checkmark phase marks by changing BitsScalar to BitsScalarMarked
-	// and BitsPointer to BitsPointerMarked.
-	//
-	// When called to clean up after the checkmark phase (checkmark==0),
-	// we unmark by changing BitsScalarMarked back to BitsScalar and
-	// BitsPointerMarked back to BitsPointer.
-	//
-	// There are two problems with the scheme as just described.
-	// First, the setup rewrites BitsDead to BitsScalar, but the type bits
-	// following a BitsDead are uninitialized and must not be used.
-	// Second, objects that are free are expected to have their type
-	// bits zeroed (BitsDead), so in the cleanup we need to restore
-	// any BitsDeads that were there originally.
-	//
-	// In a one-word object (8-byte allocation on 64-bit system),
-	// there is no difference between BitsScalar and BitsDead, because
-	// neither is a pointer and there are no more words in the object,
-	// so using BitsScalar during the checkmark is safe and mapping
-	// both back to BitsDead during cleanup is also safe.
-	//
-	// In a larger object, we need to be more careful. During setup,
-	// if the type of the first word is BitsDead, we change it to BitsScalar
-	// (as we must) but also initialize the type of the second
-	// word to BitsDead, so that a scan during the checkmark phase
-	// will still stop before seeing the uninitialized type bits in the
-	// rest of the object. The sequence 'BitsScalar BitsDead' never
-	// happens in real type bitmaps - BitsDead is always as early
-	// as possible, so immediately after the last BitsPointer.
-	// During cleanup, if we see a BitsScalar, we can check to see if it
-	// is followed by BitsDead. If so, it was originally BitsDead and
-	// we can change it back.
-
-	if(step == 0) {
-		// updating top and bottom nibbles, all boundaries
-		for(i=0; i<n/2; i++, bitp--) {
-			if((*bitp & bitBoundary) != bitBoundary)
-				runtime·throw("missing bitBoundary");      
-			b = (*bitp & bitPtrMask)>>2;
-			if(!checkmark && (b == BitsScalar || b == BitsScalarMarked))
-				*bitp &= ~0x0c; // convert to BitsDead
-			else if(b == BitsScalarMarked || b == BitsPointerMarked)
-				*bitp ^= BitsCheckMarkXor<<2;
- 			
-			if(((*bitp>>gcBits) & bitBoundary) != bitBoundary)
-				runtime·throw("missing bitBoundary");            
-			b = ((*bitp>>gcBits) & bitPtrMask)>>2;
-			if(!checkmark && (b == BitsScalar || b == BitsScalarMarked))
-				*bitp &= ~0xc0; // convert to BitsDead
-			else if(b == BitsScalarMarked || b == BitsPointerMarked)
-				*bitp ^= BitsCheckMarkXor<<(2+gcBits);
-		}
-	} else {
-		// updating bottom nibble for first word of each object
-		for(i=0; i<n; i++, bitp -= step) {
-			if((*bitp & bitBoundary) != bitBoundary)
-				runtime·throw("missing bitBoundary");            
-			b = (*bitp & bitPtrMask)>>2;
-			
-			if(checkmark && b == BitsDead) {
-				// move BitsDead into second word.
-				// set bits to BitsScalar in preparation for checkmark phase.
-				*bitp &= ~0xc0;
-				*bitp |= BitsScalar<<2;
-			} else if(!checkmark && (b == BitsScalar || b == BitsScalarMarked) && (*bitp & 0xc0) == 0) {
-				// Cleaning up after checkmark phase.
-				// First word is scalar or dead (we forgot)
-				// and second word is dead.
-				// First word might as well be dead too.
-				*bitp &= ~0x0c;
-			} else if(b == BitsScalarMarked || b == BitsPointerMarked)
-				*bitp ^= BitsCheckMarkXor<<2;
-		}
-	}
-}
-
-// clearcheckmarkbits preforms two tasks.
-// 1. When used before the checkmark phase it converts BitsDead (00) to bitsScalar (01)
-//    for nibbles with the BoundaryBit set.
-// 2. When used after the checkmark phase it converts BitsPointerMark (11) to BitsPointer 10 and 
-//    BitsScalarMark (00) to BitsScalar (01), thus clearing the checkmark mark encoding.
-// This is a bit expensive but preserves the BitsDead encoding during the normal marking.
-// BitsDead remains valid for every nibble except the ones with BitsBoundary set.
-static void
-clearcheckmarkbits(void)
-{
-	uint32 idx;
-	MSpan *s;
-	for(idx=0; idx<runtime·work.nspan; idx++) {
-		s = runtime·work.spans[idx];
-		if(s->state == MSpanInUse) {
-			clearcheckmarkbitsspan(s);
-		}
-	}
-}
-
-// Called from malloc.go using onM. 
-// The world is stopped. Rerun the scan and mark phases
-// using the bitMarkedCheck bit instead of the
-// bitMarked bit. If the marking encounters an
-// bitMarked bit that is not set then we throw.
-void
-runtime·gccheckmark_m(void)
-{
-	if(!gccheckmarkenable)
-		return;
-
-	if(checkmark)
-		runtime·throw("gccheckmark_m, entered with checkmark already true.");
-
-	checkmark = true;
-	clearcheckmarkbits(); // Converts BitsDead to BitsScalar.
-	runtime·gc_m(); // turns off checkmark
-	// Work done, fixed up the GC bitmap to remove the checkmark bits.
-	clearcheckmarkbits();
-}
-
-// checkmarkenable is initially false
-void
-runtime·gccheckmarkenable_m(void)
-{
-	gccheckmarkenable = true;
-}
-
-void
-runtime·gccheckmarkdisable_m(void)
-{
-	gccheckmarkenable = false;
-}
-
-void
-runtime·finishsweep_m(void)
-{
-	uint32 i, sg;
-	MSpan *s;
-
-	// The world is stopped so we should be able to complete the sweeps 
-	// quickly. 
-	while(runtime·sweepone() != -1)
-		runtime·sweep.npausesweep++;
-
-	// There may be some other spans being swept concurrently that 
-	// we need to wait for. If finishsweep_m is done with the world stopped
-	// this code is not required.
-	sg = runtime·mheap.sweepgen;
-	for(i=0; i<runtime·work.nspan; i++) {
-		s = runtime·work.spans[i];
-		if(s->sweepgen == sg) {
-			continue;
-		}
-		if(s->state != MSpanInUse) // Span is not part of the GCed heap so no need to ensure it is swept.
-			continue;
-		runtime·MSpan_EnsureSwept(s);
-	}	
-}
-
-// Scan all of the stacks, greying (or graying if in America) the referents
-// but not blackening them since the mark write barrier isn't installed.
-void
-runtime·gcscan_m(void)
-{
-	uint32 i, allglen, oldphase;
-	G *gp, *mastergp, **allg;
-
-	// Grab the g that called us and potentially allow rescheduling.
-	// This allows it to be scanned like other goroutines.
-	mastergp = g->m->curg;
-
-	runtime·casgstatus(mastergp, Grunning, Gwaiting);
-	mastergp->waitreason = runtime·gostringnocopy((byte*)"garbage collection scan");
-
-	// Span sweeping has been done by finishsweep_m.
-	// Long term we will want to make this goroutine runnable 
-	// by placing it onto a scanenqueue state and then calling 
-	// runtime·restartg(mastergp) to make it Grunnable.  
-	// At the bottom we will want to return this p back to the scheduler.
-
-	oldphase = runtime·gcphase;
-
-	runtime·lock(&runtime·allglock);
-	allglen = runtime·allglen;
-	allg = runtime·allg;
-	// Prepare flag indicating that the scan has not been completed.
-	for(i = 0; i < allglen; i++) {
-		gp = allg[i];
-		gp->gcworkdone = false;  // set to true in gcphasework
-	}
-	runtime·unlock(&runtime·allglock);
-
-	runtime·work.nwait = 0;
-	runtime·work.ndone = 0;
-	runtime·work.nproc = 1; // For now do not do this in parallel.
-	runtime·gcphase = GCscan;
-	//	ackgcphase is not needed since we are not scanning running goroutines.
-	runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + allglen, nil, false, markroot);
-	runtime·parfordo(runtime·work.markfor);
-	
-	runtime·lock(&runtime·allglock);	
-
-	allg = runtime·allg;
-	// Check that gc work is done. 
-	for(i = 0; i < allglen; i++) {
-		gp = allg[i];
-		if(!gp->gcworkdone) {
-			runtime·throw("scan missed a g");
-		}
-	}
-	runtime·unlock(&runtime·allglock);
-
-	runtime·gcphase = oldphase;
-	runtime·casgstatus(mastergp, Gwaiting, Grunning);
-	// Let the g that called us continue to run.
-}
-
-// Mark all objects that are known about.
-void
-runtime·gcmark_m(void)
-{
-	scanblock(nil, 0, nil);
-}
-
-// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
-// all go routines see the new barrier.
-void
-runtime·gcinstallmarkwb_m(void)
-{
-	runtime·gcphase = GCmark;
-}
-
-// For now this must be bracketed with a stoptheworld and a starttheworld to ensure
-// all go routines see the new barrier.
-void
-runtime·gcinstalloffwb_m(void)
-{
-	runtime·gcphase = GCoff;
-}
-
-static void
-gc(struct gc_args *args)
-{
-	int64 t0, t1, t2, t3, t4;
-	uint64 heap0, heap1, obj;
-	GCStats stats;
-	uint32 oldphase;
-	uint32 i;
-	G *gp;
-
-	if(runtime·debug.allocfreetrace)
-		runtime·tracegc();
-
-	g->m->traceback = 2;
-	t0 = args->start_time;
-	runtime·work.tstart = args->start_time; 
-
-	t1 = 0;
-	if(runtime·debug.gctrace)
-		t1 = runtime·nanotime();
-
-	if(!checkmark)
-		runtime·finishsweep_m(); // skip during checkmark debug phase.
-
-	// Cache runtime·mheap.allspans in work.spans to avoid conflicts with
-	// resizing/freeing allspans.
-	// New spans can be created while GC progresses, but they are not garbage for
-	// this round:
-	//  - new stack spans can be created even while the world is stopped.
-	//  - new malloc spans can be created during the concurrent sweep
-
-	// Even if this is stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
-	runtime·lock(&runtime·mheap.lock);
-	// Free the old cached sweep array if necessary.
-	if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
-		runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
-	// Cache the current array for marking.
-	runtime·mheap.gcspans = runtime·mheap.allspans;
-	runtime·work.spans = runtime·mheap.allspans;
-	runtime·work.nspan = runtime·mheap.nspan;
-	runtime·unlock(&runtime·mheap.lock);
-	oldphase = runtime·gcphase;
-
-	runtime·work.nwait = 0;
-	runtime·work.ndone = 0;
-	runtime·work.nproc = runtime·gcprocs(); 
-	runtime·gcphase = GCmarktermination;
-
-	// World is stopped so allglen will not change.
-	for(i = 0; i < runtime·allglen; i++) {
-		gp = runtime·allg[i];
-		gp->gcworkdone = false;  // set to true in gcphasework
-	}
-
-	runtime·parforsetup(runtime·work.markfor, runtime·work.nproc, RootCount + runtime·allglen, nil, false, markroot);
-	if(runtime·work.nproc > 1) {
-		runtime·noteclear(&runtime·work.alldone);
-		runtime·helpgc(runtime·work.nproc);
-	}
-
-	t2 = 0;
-	if(runtime·debug.gctrace)
-		t2 = runtime·nanotime();
-
-	gchelperstart();
-	runtime·parfordo(runtime·work.markfor);
-
-	scanblock(nil, 0, nil);
-
-	if(runtime·work.full)
-		runtime·throw("runtime·work.full != nil");
-	if(runtime·work.partial)
-		runtime·throw("runtime·work.partial != nil");
-
-	runtime·gcphase = oldphase;
-	t3 = 0;
-	if(runtime·debug.gctrace)
-		t3 = runtime·nanotime();
-
-	if(runtime·work.nproc > 1)
-		runtime·notesleep(&runtime·work.alldone);
-
-	runtime·shrinkfinish();
-
-	cachestats();
-	// next_gc calculation is tricky with concurrent sweep since we don't know size of live heap
-	// estimate what was live heap size after previous GC (for tracing only)
-	heap0 = mstats.next_gc*100/(runtime·gcpercent+100);
-	// conservatively set next_gc to high value assuming that everything is live
-	// concurrent/lazy sweep will reduce this number while discovering new garbage
-	mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*runtime·gcpercent/100;
-
-	t4 = runtime·nanotime();
-	runtime·atomicstore64(&mstats.last_gc, runtime·unixnanotime());  // must be Unix time to make sense to user
-	mstats.pause_ns[mstats.numgc%nelem(mstats.pause_ns)] = t4 - t0;
-	mstats.pause_end[mstats.numgc%nelem(mstats.pause_end)] = t4;
-	mstats.pause_total_ns += t4 - t0;
-	mstats.numgc++;
-	if(mstats.debuggc)
-		runtime·printf("pause %D\n", t4-t0);
-
-	if(runtime·debug.gctrace) {
-		heap1 = mstats.heap_alloc;
-		runtime·updatememstats(&stats);
-		if(heap1 != mstats.heap_alloc) {
-			runtime·printf("runtime: mstats skew: heap=%D/%D\n", heap1, mstats.heap_alloc);
-			runtime·throw("mstats skew");
-		}
-		obj = mstats.nmalloc - mstats.nfree;
-
-		stats.nprocyield += runtime·work.markfor->nprocyield;
-		stats.nosyield += runtime·work.markfor->nosyield;
-		stats.nsleep += runtime·work.markfor->nsleep;
-
-		runtime·printf("gc%d(%d): %D+%D+%D+%D us, %D -> %D MB, %D (%D-%D) objects,"
-				" %d goroutines,"
-				" %d/%d/%d sweeps,"
-				" %D(%D) handoff, %D(%D) steal, %D/%D/%D yields\n",
-			mstats.numgc, runtime·work.nproc, (t1-t0)/1000, (t2-t1)/1000, (t3-t2)/1000, (t4-t3)/1000,
-			heap0>>20, heap1>>20, obj,
-			mstats.nmalloc, mstats.nfree,
-			runtime·gcount(),
-			runtime·work.nspan, runtime·sweep.nbgsweep, runtime·sweep.npausesweep,
-			stats.nhandoff, stats.nhandoffcnt,
-			runtime·work.markfor->nsteal, runtime·work.markfor->nstealcnt,
-			stats.nprocyield, stats.nosyield, stats.nsleep);
-		runtime·sweep.nbgsweep = runtime·sweep.npausesweep = 0;
-	}
-
-	// See the comment in the beginning of this function as to why we need the following.
-	// Even if this is still stop-the-world, a concurrent exitsyscall can allocate a stack from heap.
-	runtime·lock(&runtime·mheap.lock);
-	// Free the old cached mark array if necessary.
-	if(runtime·work.spans != nil && runtime·work.spans != runtime·mheap.allspans)
-		runtime·SysFree(runtime·work.spans, runtime·work.nspan*sizeof(runtime·work.spans[0]), &mstats.other_sys);
-	
-	if(gccheckmarkenable) {
-		if(!checkmark) {
-			// first half of two-pass; don't set up sweep
-			runtime·unlock(&runtime·mheap.lock);
-			return;
-		}
-		checkmark = false; // done checking marks
-	}
-
-	// Cache the current array for sweeping.
-	runtime·mheap.gcspans = runtime·mheap.allspans;
-	runtime·mheap.sweepgen += 2;
-	runtime·mheap.sweepdone = false;
-	runtime·work.spans = runtime·mheap.allspans;
-	runtime·work.nspan = runtime·mheap.nspan;
-	runtime·sweep.spanidx = 0;
-	runtime·unlock(&runtime·mheap.lock);
-
-
-	if(ConcurrentSweep && !args->eagersweep) {
-		runtime·lock(&runtime·gclock);
-		if(runtime·sweep.g == nil)
-			runtime·sweep.g = runtime·newproc1(&bgsweepv, nil, 0, 0, gc);
-		else if(runtime·sweep.parked) {
-			runtime·sweep.parked = false;
-			runtime·ready(runtime·sweep.g);
-		}
-		runtime·unlock(&runtime·gclock);
-	} else {
-		// Sweep all spans eagerly.
-		while(runtime·sweepone() != -1)
-			runtime·sweep.npausesweep++;
-		// Do an additional mProf_GC, because all 'free' events are now real as well.
-		runtime·mProf_GC();
-	}
-
-	runtime·mProf_GC();
-	g->m->traceback = 0;
-}
-
-extern uintptr runtime·sizeof_C_MStats;
-
-static void readmemstats_m(void);
-
-void
-runtime·readmemstats_m(void)
-{
-	MStats *stats;
-	
-	stats = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-
-	runtime·updatememstats(nil);
-	// Size of the trailing by_size array differs between Go and C,
-	// NumSizeClasses was changed, but we can not change Go struct because of backward compatibility.
-	runtime·memmove(stats, &mstats, runtime·sizeof_C_MStats);
-
-	// Stack numbers are part of the heap numbers, separate those out for user consumption
-	stats->stacks_sys = stats->stacks_inuse;
-	stats->heap_inuse -= stats->stacks_inuse;
-	stats->heap_sys -= stats->stacks_inuse;
-}
-
-static void readgcstats_m(void);
-
-#pragma textflag NOSPLIT
-void
-runtime∕debug·readGCStats(Slice *pauses)
-{
-	void (*fn)(void);
-	
-	g->m->ptrarg[0] = pauses;
-	fn = readgcstats_m;
-	runtime·onM(&fn);
-}
-
-static void
-readgcstats_m(void)
-{
-	Slice *pauses;	
-	uint64 *p;
-	uint32 i, j, n;
-	
-	pauses = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-
-	// Calling code in runtime/debug should make the slice large enough.
-	if(pauses->cap < nelem(mstats.pause_ns)+3)
-		runtime·throw("runtime: short slice passed to readGCStats");
-
-	// Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
-	p = (uint64*)pauses->array;
-	runtime·lock(&runtime·mheap.lock);
-
-	n = mstats.numgc;
-	if(n > nelem(mstats.pause_ns))
-		n = nelem(mstats.pause_ns);
-
-	// The pause buffer is circular. The most recent pause is at
-	// pause_ns[(numgc-1)%nelem(pause_ns)], and then backward
-	// from there to go back farther in time. We deliver the times
-	// most recent first (in p[0]).
-	for(i=0; i<n; i++) {
-		j = (mstats.numgc-1-i)%nelem(mstats.pause_ns);
-		p[i] = mstats.pause_ns[j];
-		p[n+i] = mstats.pause_end[j];
-	}
-
-	p[n+n] = mstats.last_gc;
-	p[n+n+1] = mstats.numgc;
-	p[n+n+2] = mstats.pause_total_ns;	
-	runtime·unlock(&runtime·mheap.lock);
-	pauses->len = n+n+3;
-}
-
-void
-runtime·setgcpercent_m(void)
-{
-	int32 in;
-	int32 out;
-
-	in = (int32)(intptr)g->m->scalararg[0];
-
-	runtime·lock(&runtime·mheap.lock);
-	out = runtime·gcpercent;
-	if(in < 0)
-		in = -1;
-	runtime·gcpercent = in;
-	runtime·unlock(&runtime·mheap.lock);
-
-	g->m->scalararg[0] = (uintptr)(intptr)out;
-}
-
-static void
-gchelperstart(void)
-{
-	if(g->m->helpgc < 0 || g->m->helpgc >= MaxGcproc)
-		runtime·throw("gchelperstart: bad m->helpgc");
-	if(g != g->m->g0)
-		runtime·throw("gchelper not running on g0 stack");
-}
-
-G*
-runtime·wakefing(void)
-{
-	G *res;
-
-	res = nil;
-	runtime·lock(&runtime·finlock);
-	if(runtime·fingwait && runtime·fingwake) {
-		runtime·fingwait = false;
-		runtime·fingwake = false;
-		res = runtime·fing;
-	}
-	runtime·unlock(&runtime·finlock);
-	return res;
-}
-
-// Recursively unrolls GC program in prog.
-// mask is where to store the result.
-// ppos is a pointer to position in mask, in bits.
-// sparse says to generate 4-bits per word mask for heap (2-bits for data/bss otherwise).
-static byte*
-unrollgcprog1(byte *mask, byte *prog, uintptr *ppos, bool inplace, bool sparse)
-{
-	uintptr pos, siz, i, off;
-	byte *arena_start, *prog1, v, *bitp, shift;
-
-	arena_start = runtime·mheap.arena_start;
-	pos = *ppos;
-	for(;;) {
-		switch(prog[0]) {
-		case insData:
-			prog++;
-			siz = prog[0];
-			prog++;
-			for(i = 0; i < siz; i++) {
-				v = prog[i/PointersPerByte];
-				v >>= (i%PointersPerByte)*BitsPerPointer;
-				v &= BitsMask;
-				if(inplace) {
-					// Store directly into GC bitmap.
-					off = (uintptr*)(mask+pos) - (uintptr*)arena_start;
-					bitp = arena_start - off/wordsPerBitmapByte - 1;
-					shift = (off % wordsPerBitmapByte) * gcBits;
-					if(shift==0)
-						*bitp = 0;
-					*bitp |= v<<(shift+2);
-					pos += PtrSize;
-				} else if(sparse) {
-					// 4-bits per word
-					v <<= (pos%8)+2;
-					mask[pos/8] |= v;
-					pos += gcBits;
-				} else {
-					// 2-bits per word
-					v <<= pos%8;
-					mask[pos/8] |= v;
-					pos += BitsPerPointer;
-				}
-			}
-			prog += ROUND(siz*BitsPerPointer, 8)/8;
-			break;
-		case insArray:
-			prog++;
-			siz = 0;
-			for(i = 0; i < PtrSize; i++)
-				siz = (siz<<8) + prog[PtrSize-i-1];
-			prog += PtrSize;
-			prog1 = nil;
-			for(i = 0; i < siz; i++)
-				prog1 = unrollgcprog1(mask, prog, &pos, inplace, sparse);
-			if(prog1[0] != insArrayEnd)
-				runtime·throw("unrollgcprog: array does not end with insArrayEnd");
-			prog = prog1+1;
-			break;
-		case insArrayEnd:
-		case insEnd:
-			*ppos = pos;
-			return prog;
-		default:
-			runtime·throw("unrollgcprog: unknown instruction");
-		}
-	}
-}
-
-// Unrolls GC program prog for data/bss, returns dense GC mask.
-static BitVector
-unrollglobgcprog(byte *prog, uintptr size)
-{
-	byte *mask;
-	uintptr pos, masksize;
-
-	masksize = ROUND(ROUND(size, PtrSize)/PtrSize*BitsPerPointer, 8)/8;
-	mask = runtime·persistentalloc(masksize+1, 0, &mstats.gc_sys);
-	mask[masksize] = 0xa1;
-	pos = 0;
-	prog = unrollgcprog1(mask, prog, &pos, false, false);
-	if(pos != size/PtrSize*BitsPerPointer) {
-		runtime·printf("unrollglobgcprog: bad program size, got %D, expect %D\n",
-			(uint64)pos, (uint64)size/PtrSize*BitsPerPointer);
-		runtime·throw("unrollglobgcprog: bad program size");
-	}
-	if(prog[0] != insEnd)
-		runtime·throw("unrollglobgcprog: program does not end with insEnd");
-	if(mask[masksize] != 0xa1)
-		runtime·throw("unrollglobgcprog: overflow");
-	return (BitVector){masksize*8, mask};
-}
-
-void
-runtime·unrollgcproginplace_m(void)
-{
-	uintptr size, size0, pos, off;
-	byte *arena_start, *prog, *bitp, shift;
-	Type *typ;
-	void *v;
-
-	v = g->m->ptrarg[0];
-	typ = g->m->ptrarg[1];
-	size = g->m->scalararg[0];
-	size0 = g->m->scalararg[1];
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-
-	pos = 0;
-	prog = (byte*)typ->gc[1];
-	while(pos != size0)
-		unrollgcprog1(v, prog, &pos, true, true);
-	// Mark first word as bitAllocated.
-	arena_start = runtime·mheap.arena_start;
-	off = (uintptr*)v - (uintptr*)arena_start;
-	bitp = arena_start - off/wordsPerBitmapByte - 1;
-	shift = (off % wordsPerBitmapByte) * gcBits;
-	*bitp |= bitBoundary<<shift;
-	// Mark word after last as BitsDead.
-	if(size0 < size) {
-		off = (uintptr*)((byte*)v + size0) - (uintptr*)arena_start;
-		bitp = arena_start - off/wordsPerBitmapByte - 1;
-		shift = (off % wordsPerBitmapByte) * gcBits;
-		*bitp &= ~(bitPtrMask<<shift) | ((uintptr)BitsDead<<(shift+2));
-	}
-}
-
-// Unrolls GC program in typ->gc[1] into typ->gc[0]
-void
-runtime·unrollgcprog_m(void)
-{
-	static Mutex lock;
-	Type *typ;
-	byte *mask, *prog;
-	uintptr pos;
-	uintptr x;
-
-	typ = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-
-	runtime·lock(&lock);
-	mask = (byte*)typ->gc[0];
-	if(mask[0] == 0) {
-		pos = 8;  // skip the unroll flag
-		prog = (byte*)typ->gc[1];
-		prog = unrollgcprog1(mask, prog, &pos, false, true);
-		if(prog[0] != insEnd)
-			runtime·throw("unrollgcprog: program does not end with insEnd");
-		if(((typ->size/PtrSize)%2) != 0) {
-			// repeat the program twice
-			prog = (byte*)typ->gc[1];
-			unrollgcprog1(mask, prog, &pos, false, true);
-		}
-
-		// atomic way to say mask[0] = 1
-		x = *(uintptr*)mask;
-		((byte*)&x)[0] = 1;
-		runtime·atomicstorep((void**)mask, (void*)x);
-	}
-	runtime·unlock(&lock);
-}
-
-// mark the span of memory at v as having n blocks of the given size.
-// if leftover is true, there is left over space at the end of the span.
-void
-runtime·markspan(void *v, uintptr size, uintptr n, bool leftover)
-{
-	uintptr i, off, step;
-	byte *b;
-
-	if((byte*)v+size*n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
-		runtime·throw("markspan: bad pointer");
-
-	// Find bits of the beginning of the span.
-	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
-	b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-	if((off%wordsPerBitmapByte) != 0)
-		runtime·throw("markspan: unaligned length");
-
-	// Okay to use non-atomic ops here, because we control
-	// the entire span, and each bitmap byte has bits for only
-	// one span, so no other goroutines are changing these bitmap words.
-
-	if(size == PtrSize) {
-		// Possible only on 64-bits (minimal size class is 8 bytes).
-		// Poor man's memset(0x11).
-		if(0x11 != ((bitBoundary+BitsDead)<<gcBits) + (bitBoundary+BitsDead))
-			runtime·throw("markspan: bad bits");
-		if((n%(wordsPerBitmapByte*PtrSize)) != 0)
-			runtime·throw("markspan: unaligned length");
-		b = b - n/wordsPerBitmapByte + 1;	// find first byte
-		if(((uintptr)b%PtrSize) != 0)
-			runtime·throw("markspan: unaligned pointer");
-		for(i = 0; i != n; i += wordsPerBitmapByte*PtrSize, b += PtrSize)
-			*(uintptr*)b = (uintptr)0x1111111111111111ULL;  // bitBoundary+BitsDead
-		return;
-	}
-
-	if(leftover)
-		n++;	// mark a boundary just past end of last block too
-	step = size/(PtrSize*wordsPerBitmapByte);
-	for(i = 0; i != n; i++, b -= step)
-		*b = bitBoundary|(BitsDead<<2);
-}
-
-// unmark the span of memory at v of length n bytes.
-void
-runtime·unmarkspan(void *v, uintptr n)
-{
-	uintptr off;
-	byte *b;
-
-	if((byte*)v+n > (byte*)runtime·mheap.arena_used || (byte*)v < runtime·mheap.arena_start)
-		runtime·throw("markspan: bad pointer");
-
-	off = (uintptr*)v - (uintptr*)runtime·mheap.arena_start;  // word offset
-	if((off % (PtrSize*wordsPerBitmapByte)) != 0)
-		runtime·throw("markspan: unaligned pointer");
-	b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-	n /= PtrSize;
-	if(n%(PtrSize*wordsPerBitmapByte) != 0)
-		runtime·throw("unmarkspan: unaligned length");
-	// Okay to use non-atomic ops here, because we control
-	// the entire span, and each bitmap word has bits for only
-	// one span, so no other goroutines are changing these
-	// bitmap words.
-	n /= wordsPerBitmapByte;
-	runtime·memclr(b - n + 1, n);
-}
-
-void
-runtime·MHeap_MapBits(MHeap *h)
-{
-	// Caller has added extra mappings to the arena.
-	// Add extra mappings of bitmap words as needed.
-	// We allocate extra bitmap pieces in chunks of bitmapChunk.
-	enum {
-		bitmapChunk = 8192
-	};
-	uintptr n;
-
-	n = (h->arena_used - h->arena_start) / (PtrSize*wordsPerBitmapByte);
-	n = ROUND(n, bitmapChunk);
-	n = ROUND(n, PhysPageSize);
-	if(h->bitmap_mapped >= n)
-		return;
-
-	runtime·SysMap(h->arena_start - n, n - h->bitmap_mapped, h->arena_reserved, &mstats.gc_sys);
-	h->bitmap_mapped = n;
-}
-
-static bool
-getgcmaskcb(Stkframe *frame, void *ctxt)
-{
-	Stkframe *frame0;
-
-	frame0 = ctxt;
-	if(frame->sp <= frame0->sp && frame0->sp < frame->varp) {
-		*frame0 = *frame;
-		return false;
-	}
-	return true;
-}
-
-// Returns GC type info for object p for testing.
-void
-runtime·getgcmask(byte *p, Type *t, byte **mask, uintptr *len)
-{
-	Stkframe frame;
-	uintptr i, n, off;
-	byte *base, bits, shift, *b;
-	bool (*cb)(Stkframe*, void*);
-
-	*mask = nil;
-	*len = 0;
-
-	// data
-	if(p >= runtime·data && p < runtime·edata) {
-		n = ((PtrType*)t)->elem->size;
-		*len = n/PtrSize;
-		*mask = runtime·mallocgc(*len, nil, FlagNoScan);
-		for(i = 0; i < n; i += PtrSize) {
-			off = (p+i-runtime·data)/PtrSize;
-			bits = (runtime·gcdatamask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
-			(*mask)[i/PtrSize] = bits;
-		}
-		return;
-	}
-	// bss
-	if(p >= runtime·bss && p < runtime·ebss) {
-		n = ((PtrType*)t)->elem->size;
-		*len = n/PtrSize;
-		*mask = runtime·mallocgc(*len, nil, FlagNoScan);
-		for(i = 0; i < n; i += PtrSize) {
-			off = (p+i-runtime·bss)/PtrSize;
-			bits = (runtime·gcbssmask.bytedata[off/PointersPerByte] >> ((off%PointersPerByte)*BitsPerPointer))&BitsMask;
-			(*mask)[i/PtrSize] = bits;
-		}
-		return;
-	}
-	// heap
-	if(runtime·mlookup(p, &base, &n, nil)) {
-		*len = n/PtrSize;
-		*mask = runtime·mallocgc(*len, nil, FlagNoScan);
-		for(i = 0; i < n; i += PtrSize) {
-			off = (uintptr*)(base+i) - (uintptr*)runtime·mheap.arena_start;
-			b = runtime·mheap.arena_start - off/wordsPerBitmapByte - 1;
-			shift = (off % wordsPerBitmapByte) * gcBits;
-			bits = (*b >> (shift+2))&BitsMask;
-			(*mask)[i/PtrSize] = bits;
-		}
-		return;
-	}
-	// stack
-	frame.fn = nil;
-	frame.sp = (uintptr)p;
-	cb = getgcmaskcb;
-	runtime·gentraceback(g->m->curg->sched.pc, g->m->curg->sched.sp, 0, g->m->curg, 0, nil, 1000, &cb, &frame, 0);
-	if(frame.fn != nil) {
-		Func *f;
-		StackMap *stackmap;
-		BitVector bv;
-		uintptr size;
-		uintptr targetpc;
-		int32 pcdata;
-
-		f = frame.fn;
-		targetpc = frame.continpc;
-		if(targetpc == 0)
-			return;
-		if(targetpc != f->entry)
-			targetpc--;
-		pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
-		if(pcdata == -1)
-			return;
-		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-		if(stackmap == nil || stackmap->n <= 0)
-			return;
-		bv = runtime·stackmapdata(stackmap, pcdata);
-		size = bv.n/BitsPerPointer*PtrSize;
-		n = ((PtrType*)t)->elem->size;
-		*len = n/PtrSize;
-		*mask = runtime·mallocgc(*len, nil, FlagNoScan);
-		for(i = 0; i < n; i += PtrSize) {
-			off = (p+i-(byte*)frame.varp+size)/PtrSize;
-			bits = (bv.bytedata[off*BitsPerPointer/8] >> ((off*BitsPerPointer)%8))&BitsMask;
-			(*mask)[i/PtrSize] = bits;
-		}
-	}
-}
-
-void runtime·gc_unixnanotime(int64 *now);
-
-int64
-runtime·unixnanotime(void)
-{
-	int64 now;
-
-	runtime·gc_unixnanotime(&now);
-	return now;
-}
diff --git a/src/runtime/mgc0.go b/src/runtime/mgc0.go
index dc4eec5..00e64c0 100644
--- a/src/runtime/mgc0.go
+++ b/src/runtime/mgc0.go
@@ -28,7 +28,7 @@
 
 func freeOSMemory() {
 	gogc(2) // force GC and do eager sweep
-	onM(scavenge_m)
+	systemstack(scavenge_m)
 }
 
 var poolcleanup func()
@@ -60,10 +60,8 @@
 	}
 }
 
-func gosweepone() uintptr
-func gosweepdone() bool
-
 func bgsweep() {
+	sweep.g = getg()
 	getg().issystem = true
 	for {
 		for gosweepone() != ^uintptr(0) {
@@ -105,7 +103,7 @@
 	}
 
 	if src != 0 && (src < _PageSize || src == _PoisonGC || src == _PoisonStack) {
-		onM(func() { gothrow("bad pointer in write barrier") })
+		systemstack(func() { gothrow("bad pointer in write barrier") })
 	}
 
 	mp := acquirem()
@@ -114,13 +112,9 @@
 		return
 	}
 	mp.inwb = true
-	oldscalar0 := mp.scalararg[0]
-	oldscalar1 := mp.scalararg[1]
-	mp.scalararg[0] = uintptr(unsafe.Pointer(dst))
-	mp.scalararg[1] = src
-	onM_signalok(gcmarkwb_m)
-	mp.scalararg[0] = oldscalar0
-	mp.scalararg[1] = oldscalar1
+	systemstack(func() {
+		gcmarkwb_m(dst, src)
+	})
 	mp.inwb = false
 	releasem(mp)
 }
diff --git a/src/runtime/mgc0.h b/src/runtime/mgc0.h
index 519d720..dd0c460 100644
--- a/src/runtime/mgc0.h
+++ b/src/runtime/mgc0.h
@@ -2,81 +2,21 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Garbage collector (GC)
+// Used by cmd/gc.
 
 enum {
-	// Four bits per word (see #defines below).
 	gcBits = 4,
-	wordsPerBitmapByte = 8/gcBits,
-
-	// GC type info programs.
-	// The programs allow to store type info required for GC in a compact form.
-	// Most importantly arrays take O(1) space instead of O(n).
-	// The program grammar is:
-	//
-	// Program = {Block} "insEnd"
-	// Block = Data | Array
-	// Data = "insData" DataSize DataBlock
-	// DataSize = int // size of the DataBlock in bit pairs, 1 byte
-	// DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes
-	// Array = "insArray" ArrayLen Block "insArrayEnd"
-	// ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch)
-	//
-	// Each instruction (insData, insArray, etc) is 1 byte.
-	// For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
-	// the program looks as:
-	//
-	// insData 3 (BitsMultiWord BitsSlice BitsScalar)
-	//	insArray 20 insData 2 (BitsScalar BitsPointer) insArrayEnd insEnd
-	//
-	// Total size of the program is 17 bytes (13 bytes on 32-bits).
-	// The corresponding GC mask would take 43 bytes (it would be repeated
-	// because the type has odd number of words).
+	BitsPerPointer = 2,
+	BitsDead = 0,
+	BitsScalar = 1,
+	BitsPointer = 2,
+	BitsMask = 3,
+	PointersPerByte = 8/BitsPerPointer,
 	insData = 1,
 	insArray,
 	insArrayEnd,
 	insEnd,
 
-	// Pointer map
-	BitsPerPointer	= 2,
-	BitsMask	= (1<<BitsPerPointer)-1,
-	PointersPerByte	= 8/BitsPerPointer,
-
-	// If you change these, also change scanblock.
-	// scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)".
-	BitsDead	= 0,
-	BitsScalar	= 1,                                // 01
-	BitsPointer	= 2,                                // 10
-	BitsCheckMarkXor = 1,                               // 10
-	BitsScalarMarked = BitsScalar ^ BitsCheckMarkXor,   // 00
-	BitsPointerMarked = BitsPointer ^ BitsCheckMarkXor, // 11
-
-	BitsMultiWord	= 3,
-	// BitsMultiWord will be set for the first word of a multi-word item.
-	// When it is set, one of the following will be set for the second word.
-	// NOT USED ANYMORE: BitsString	= 0,
-	// NOT USED ANYMORE: BitsSlice	= 1,
-	BitsIface	= 2,
-	BitsEface	= 3,
-
 	// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
 	MaxGCMask	= 65536, // TODO(rsc): change back to 64
 };
-
-// Bits in per-word bitmap.
-// #defines because we shift the values beyond 32 bits.
-//
-// Each word in the bitmap describes wordsPerBitmapWord words
-// of heap memory.  There are 4 bitmap bits dedicated to each heap word,
-// so on a 64-bit system there is one bitmap word per 16 heap words.
-//
-// The bitmap starts at mheap.arena_start and extends *backward* from
-// there.  On a 64-bit system the off'th word in the arena is tracked by
-// the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
-// the only difference is that the divisor is 8.)
-enum {
-	bitBoundary = 1, // boundary of an object
-	bitMarked = 2, // marked object
-	bitMask = bitBoundary | bitMarked,
-	bitPtrMask = BitsMask<<2,
-};
diff --git a/src/runtime/mgc1.go b/src/runtime/mgc1.go
new file mode 100644
index 0000000..04a5207
--- /dev/null
+++ b/src/runtime/mgc1.go
@@ -0,0 +1,80 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Garbage collector (GC)
+
+package runtime
+
+const (
+	// Four bits per word (see #defines below).
+	gcBits             = 4
+	wordsPerBitmapByte = 8 / gcBits
+)
+
+const (
+	// GC type info programs.
+	// The programs allow to store type info required for GC in a compact form.
+	// Most importantly arrays take O(1) space instead of O(n).
+	// The program grammar is:
+	//
+	// Program = {Block} "insEnd"
+	// Block = Data | Array
+	// Data = "insData" DataSize DataBlock
+	// DataSize = int // size of the DataBlock in bit pairs, 1 byte
+	// DataBlock = binary // dense GC mask (2 bits per word) of size ]DataSize/4[ bytes
+	// Array = "insArray" ArrayLen Block "insArrayEnd"
+	// ArrayLen = int // length of the array, 8 bytes (4 bytes for 32-bit arch)
+	//
+	// Each instruction (insData, insArray, etc) is 1 byte.
+	// For example, for type struct { x []byte; y [20]struct{ z int; w *byte }; }
+	// the program looks as:
+	//
+	// insData 3 (BitsPointer BitsScalar BitsScalar)
+	//	insArray 20 insData 2 (BitsScalar BitsPointer) insArrayEnd insEnd
+	//
+	// Total size of the program is 17 bytes (13 bytes on 32-bits).
+	// The corresponding GC mask would take 43 bytes (it would be repeated
+	// because the type has odd number of words).
+	insData = 1 + iota
+	insArray
+	insArrayEnd
+	insEnd
+)
+
+const (
+	// Pointer map
+	_BitsPerPointer  = 2
+	_BitsMask        = (1 << _BitsPerPointer) - 1
+	_PointersPerByte = 8 / _BitsPerPointer
+
+	// If you change these, also change scanblock.
+	// scanblock does "if(bits == BitsScalar || bits == BitsDead)" as "if(bits <= BitsScalar)".
+	_BitsDead          = 0
+	_BitsScalar        = 1                                // 01
+	_BitsPointer       = 2                                // 10
+	_BitsCheckMarkXor  = 1                                // 10
+	_BitsScalarMarked  = _BitsScalar ^ _BitsCheckMarkXor  // 00
+	_BitsPointerMarked = _BitsPointer ^ _BitsCheckMarkXor // 11
+
+	// 64 bytes cover objects of size 1024/512 on 64/32 bits, respectively.
+	_MaxGCMask = 65536 // TODO(rsc): change back to 64
+)
+
+// Bits in per-word bitmap.
+// #defines because we shift the values beyond 32 bits.
+//
+// Each word in the bitmap describes wordsPerBitmapWord words
+// of heap memory.  There are 4 bitmap bits dedicated to each heap word,
+// so on a 64-bit system there is one bitmap word per 16 heap words.
+//
+// The bitmap starts at mheap.arena_start and extends *backward* from
+// there.  On a 64-bit system the off'th word in the arena is tracked by
+// the off/16+1'th word before mheap.arena_start.  (On a 32-bit system,
+// the only difference is that the divisor is 8.)
+const (
+	bitBoundary = 1 // boundary of an object
+	bitMarked   = 2 // marked object
+	bitMask     = bitBoundary | bitMarked
+	bitPtrMask  = _BitsMask << 2
+)
diff --git a/src/runtime/mheap.c b/src/runtime/mheap.c
deleted file mode 100644
index bb203d5..0000000
--- a/src/runtime/mheap.c
+++ /dev/null
@@ -1,889 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Page heap.
-//
-// See malloc.h for overview.
-//
-// When a MSpan is in the heap free list, state == MSpanFree
-// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
-//
-// When a MSpan is allocated, state == MSpanInUse or MSpanStack
-// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-static MSpan *MHeap_AllocSpanLocked(MHeap*, uintptr);
-static void MHeap_FreeSpanLocked(MHeap*, MSpan*, bool, bool);
-static bool MHeap_Grow(MHeap*, uintptr);
-static MSpan *MHeap_AllocLarge(MHeap*, uintptr);
-static MSpan *BestFit(MSpan*, uintptr, MSpan*);
-
-static void
-RecordSpan(void *vh, byte *p)
-{
-	MHeap *h;
-	MSpan *s;
-	MSpan **all;
-	uint32 cap;
-
-	h = vh;
-	s = (MSpan*)p;
-	if(h->nspan >= h->nspancap) {
-		cap = 64*1024/sizeof(all[0]);
-		if(cap < h->nspancap*3/2)
-			cap = h->nspancap*3/2;
-		all = (MSpan**)runtime·sysAlloc(cap*sizeof(all[0]), &mstats.other_sys);
-		if(all == nil)
-			runtime·throw("runtime: cannot allocate memory");
-		if(h->allspans) {
-			runtime·memmove(all, h->allspans, h->nspancap*sizeof(all[0]));
-			// Don't free the old array if it's referenced by sweep.
-			// See the comment in mgc0.c.
-			if(h->allspans != runtime·mheap.gcspans)
-				runtime·SysFree(h->allspans, h->nspancap*sizeof(all[0]), &mstats.other_sys);
-		}
-		h->allspans = all;
-		h->nspancap = cap;
-	}
-	h->allspans[h->nspan++] = s;
-}
-
-// Initialize the heap; fetch memory using alloc.
-void
-runtime·MHeap_Init(MHeap *h)
-{
-	uint32 i;
-
-	runtime·FixAlloc_Init(&h->spanalloc, sizeof(MSpan), RecordSpan, h, &mstats.mspan_sys);
-	runtime·FixAlloc_Init(&h->cachealloc, sizeof(MCache), nil, nil, &mstats.mcache_sys);
-	runtime·FixAlloc_Init(&h->specialfinalizeralloc, sizeof(SpecialFinalizer), nil, nil, &mstats.other_sys);
-	runtime·FixAlloc_Init(&h->specialprofilealloc, sizeof(SpecialProfile), nil, nil, &mstats.other_sys);
-	// h->mapcache needs no init
-	for(i=0; i<nelem(h->free); i++) {
-		runtime·MSpanList_Init(&h->free[i]);
-		runtime·MSpanList_Init(&h->busy[i]);
-	}
-	runtime·MSpanList_Init(&h->freelarge);
-	runtime·MSpanList_Init(&h->busylarge);
-	for(i=0; i<nelem(h->central); i++)
-		runtime·MCentral_Init(&h->central[i].mcentral, i);
-}
-
-void
-runtime·MHeap_MapSpans(MHeap *h)
-{
-	uintptr n;
-
-	// Map spans array, PageSize at a time.
-	n = (uintptr)h->arena_used;
-	n -= (uintptr)h->arena_start;
-	n = n / PageSize * sizeof(h->spans[0]);
-	n = ROUND(n, PhysPageSize);
-	if(h->spans_mapped >= n)
-		return;
-	runtime·SysMap((byte*)h->spans + h->spans_mapped, n - h->spans_mapped, h->arena_reserved, &mstats.other_sys);
-	h->spans_mapped = n;
-}
-
-// Sweeps spans in list until reclaims at least npages into heap.
-// Returns the actual number of pages reclaimed.
-static uintptr
-MHeap_ReclaimList(MHeap *h, MSpan *list, uintptr npages)
-{
-	MSpan *s;
-	uintptr n;
-	uint32 sg;
-
-	n = 0;
-	sg = runtime·mheap.sweepgen;
-retry:
-	for(s = list->next; s != list; s = s->next) {
-		if(s->sweepgen == sg-2 && runtime·cas(&s->sweepgen, sg-2, sg-1)) {
-			runtime·MSpanList_Remove(s);
-			// swept spans are at the end of the list
-			runtime·MSpanList_InsertBack(list, s);
-			runtime·unlock(&h->lock);
-			n += runtime·MSpan_Sweep(s, false);
-			runtime·lock(&h->lock);
-			if(n >= npages)
-				return n;
-			// the span could have been moved elsewhere
-			goto retry;
-		}
-		if(s->sweepgen == sg-1) {
-			// the span is being sweept by background sweeper, skip
-			continue;
-		}
-		// already swept empty span,
-		// all subsequent ones must also be either swept or in process of sweeping
-		break;
-	}
-	return n;
-}
-
-// Sweeps and reclaims at least npage pages into heap.
-// Called before allocating npage pages.
-static void
-MHeap_Reclaim(MHeap *h, uintptr npage)
-{
-	uintptr reclaimed, n;
-
-	// First try to sweep busy spans with large objects of size >= npage,
-	// this has good chances of reclaiming the necessary space.
-	for(n=npage; n < nelem(h->busy); n++) {
-		if(MHeap_ReclaimList(h, &h->busy[n], npage))
-			return;  // Bingo!
-	}
-
-	// Then -- even larger objects.
-	if(MHeap_ReclaimList(h, &h->busylarge, npage))
-		return;  // Bingo!
-
-	// Now try smaller objects.
-	// One such object is not enough, so we need to reclaim several of them.
-	reclaimed = 0;
-	for(n=0; n < npage && n < nelem(h->busy); n++) {
-		reclaimed += MHeap_ReclaimList(h, &h->busy[n], npage-reclaimed);
-		if(reclaimed >= npage)
-			return;
-	}
-
-	// Now sweep everything that is not yet swept.
-	runtime·unlock(&h->lock);
-	for(;;) {
-		n = runtime·sweepone();
-		if(n == -1)  // all spans are swept
-			break;
-		reclaimed += n;
-		if(reclaimed >= npage)
-			break;
-	}
-	runtime·lock(&h->lock);
-}
-
-// Allocate a new span of npage pages from the heap for GC'd memory
-// and record its size class in the HeapMap and HeapMapCache.
-static MSpan*
-mheap_alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large)
-{
-	MSpan *s;
-
-	if(g != g->m->g0)
-		runtime·throw("mheap_alloc not on M stack");
-	runtime·lock(&h->lock);
-
-	// To prevent excessive heap growth, before allocating n pages
-	// we need to sweep and reclaim at least n pages.
-	if(!h->sweepdone)
-		MHeap_Reclaim(h, npage);
-
-	// transfer stats from cache to global
-	mstats.heap_alloc += g->m->mcache->local_cachealloc;
-	g->m->mcache->local_cachealloc = 0;
-	mstats.tinyallocs += g->m->mcache->local_tinyallocs;
-	g->m->mcache->local_tinyallocs = 0;
-
-	s = MHeap_AllocSpanLocked(h, npage);
-	if(s != nil) {
-		// Record span info, because gc needs to be
-		// able to map interior pointer to containing span.
-		runtime·atomicstore(&s->sweepgen, h->sweepgen);
-		s->state = MSpanInUse;
-		s->freelist = nil;
-		s->ref = 0;
-		s->sizeclass = sizeclass;
-		s->elemsize = (sizeclass==0 ? s->npages<<PageShift : runtime·class_to_size[sizeclass]);
-
-		// update stats, sweep lists
-		if(large) {
-			mstats.heap_objects++;
-			mstats.heap_alloc += npage<<PageShift;
-			// Swept spans are at the end of lists.
-			if(s->npages < nelem(h->free))
-				runtime·MSpanList_InsertBack(&h->busy[s->npages], s);
-			else
-				runtime·MSpanList_InsertBack(&h->busylarge, s);
-		}
-	}
-	runtime·unlock(&h->lock);
-	return s;
-}
-
-static void
-mheap_alloc_m(G *gp)
-{
-	MHeap *h;
-	MSpan *s;
-
-	h = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	s = mheap_alloc(h, g->m->scalararg[0], g->m->scalararg[1], g->m->scalararg[2]);
-	g->m->ptrarg[0] = s;
-
-	runtime·gogo(&gp->sched);
-}
-
-MSpan*
-runtime·MHeap_Alloc(MHeap *h, uintptr npage, int32 sizeclass, bool large, bool needzero)
-{
-	MSpan *s;
-	void (*fn)(G*);
-
-	// Don't do any operations that lock the heap on the G stack.
-	// It might trigger stack growth, and the stack growth code needs
-	// to be able to allocate heap.
-	if(g == g->m->g0) {
-		s = mheap_alloc(h, npage, sizeclass, large);
-	} else {
-		g->m->ptrarg[0] = h;
-		g->m->scalararg[0] = npage;
-		g->m->scalararg[1] = sizeclass;
-		g->m->scalararg[2] = large;
-		fn = mheap_alloc_m;
-		runtime·mcall(&fn);
-		s = g->m->ptrarg[0];
-		g->m->ptrarg[0] = nil;
-	}
-	if(s != nil) {
-		if(needzero && s->needzero)
-			runtime·memclr((byte*)(s->start<<PageShift), s->npages<<PageShift);
-		s->needzero = 0;
-	}
-	return s;
-}
-
-MSpan*
-runtime·MHeap_AllocStack(MHeap *h, uintptr npage)
-{
-	MSpan *s;
-
-	if(g != g->m->g0)
-		runtime·throw("mheap_allocstack not on M stack");
-	runtime·lock(&h->lock);
-	s = MHeap_AllocSpanLocked(h, npage);
-	if(s != nil) {
-		s->state = MSpanStack;
-		s->freelist = nil;
-		s->ref = 0;
-		mstats.stacks_inuse += s->npages<<PageShift;
-	}
-	runtime·unlock(&h->lock);
-	return s;
-}
-
-// Allocates a span of the given size.  h must be locked.
-// The returned span has been removed from the
-// free list, but its state is still MSpanFree.
-static MSpan*
-MHeap_AllocSpanLocked(MHeap *h, uintptr npage)
-{
-	uintptr n;
-	MSpan *s, *t;
-	pageID p;
-
-	// Try in fixed-size lists up to max.
-	for(n=npage; n < nelem(h->free); n++) {
-		if(!runtime·MSpanList_IsEmpty(&h->free[n])) {
-			s = h->free[n].next;
-			goto HaveSpan;
-		}
-	}
-
-	// Best fit in list of large spans.
-	if((s = MHeap_AllocLarge(h, npage)) == nil) {
-		if(!MHeap_Grow(h, npage))
-			return nil;
-		if((s = MHeap_AllocLarge(h, npage)) == nil)
-			return nil;
-	}
-
-HaveSpan:
-	// Mark span in use.
-	if(s->state != MSpanFree)
-		runtime·throw("MHeap_AllocLocked - MSpan not free");
-	if(s->npages < npage)
-		runtime·throw("MHeap_AllocLocked - bad npages");
-	runtime·MSpanList_Remove(s);
-	if(s->next != nil || s->prev != nil)
-		runtime·throw("still in list");
-	if(s->npreleased > 0) {
-		runtime·SysUsed((void*)(s->start<<PageShift), s->npages<<PageShift);
-		mstats.heap_released -= s->npreleased<<PageShift;
-		s->npreleased = 0;
-	}
-
-	if(s->npages > npage) {
-		// Trim extra and put it back in the heap.
-		t = runtime·FixAlloc_Alloc(&h->spanalloc);
-		runtime·MSpan_Init(t, s->start + npage, s->npages - npage);
-		s->npages = npage;
-		p = t->start;
-		p -= ((uintptr)h->arena_start>>PageShift);
-		if(p > 0)
-			h->spans[p-1] = s;
-		h->spans[p] = t;
-		h->spans[p+t->npages-1] = t;
-		t->needzero = s->needzero;
-		s->state = MSpanStack; // prevent coalescing with s
-		t->state = MSpanStack;
-		MHeap_FreeSpanLocked(h, t, false, false);
-		t->unusedsince = s->unusedsince; // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
-		s->state = MSpanFree;
-	}
-	s->unusedsince = 0;
-
-	p = s->start;
-	p -= ((uintptr)h->arena_start>>PageShift);
-	for(n=0; n<npage; n++)
-		h->spans[p+n] = s;
-
-	mstats.heap_inuse += npage<<PageShift;
-	mstats.heap_idle -= npage<<PageShift;
-
-	//runtime·printf("spanalloc %p\n", s->start << PageShift);
-	if(s->next != nil || s->prev != nil)
-		runtime·throw("still in list");
-	return s;
-}
-
-// Allocate a span of exactly npage pages from the list of large spans.
-static MSpan*
-MHeap_AllocLarge(MHeap *h, uintptr npage)
-{
-	return BestFit(&h->freelarge, npage, nil);
-}
-
-// Search list for smallest span with >= npage pages.
-// If there are multiple smallest spans, take the one
-// with the earliest starting address.
-static MSpan*
-BestFit(MSpan *list, uintptr npage, MSpan *best)
-{
-	MSpan *s;
-
-	for(s=list->next; s != list; s=s->next) {
-		if(s->npages < npage)
-			continue;
-		if(best == nil
-		|| s->npages < best->npages
-		|| (s->npages == best->npages && s->start < best->start))
-			best = s;
-	}
-	return best;
-}
-
-// Try to add at least npage pages of memory to the heap,
-// returning whether it worked.
-static bool
-MHeap_Grow(MHeap *h, uintptr npage)
-{
-	uintptr ask;
-	void *v;
-	MSpan *s;
-	pageID p;
-
-	// Ask for a big chunk, to reduce the number of mappings
-	// the operating system needs to track; also amortizes
-	// the overhead of an operating system mapping.
-	// Allocate a multiple of 64kB.
-	npage = ROUND(npage, (64<<10)/PageSize);
-	ask = npage<<PageShift;
-	if(ask < HeapAllocChunk)
-		ask = HeapAllocChunk;
-
-	v = runtime·MHeap_SysAlloc(h, ask);
-	if(v == nil) {
-		if(ask > (npage<<PageShift)) {
-			ask = npage<<PageShift;
-			v = runtime·MHeap_SysAlloc(h, ask);
-		}
-		if(v == nil) {
-			runtime·printf("runtime: out of memory: cannot allocate %D-byte block (%D in use)\n", (uint64)ask, mstats.heap_sys);
-			return false;
-		}
-	}
-
-	// Create a fake "in use" span and free it, so that the
-	// right coalescing happens.
-	s = runtime·FixAlloc_Alloc(&h->spanalloc);
-	runtime·MSpan_Init(s, (uintptr)v>>PageShift, ask>>PageShift);
-	p = s->start;
-	p -= ((uintptr)h->arena_start>>PageShift);
-	h->spans[p] = s;
-	h->spans[p + s->npages - 1] = s;
-	runtime·atomicstore(&s->sweepgen, h->sweepgen);
-	s->state = MSpanInUse;
-	MHeap_FreeSpanLocked(h, s, false, true);
-	return true;
-}
-
-// Look up the span at the given address.
-// Address is guaranteed to be in map
-// and is guaranteed to be start or end of span.
-MSpan*
-runtime·MHeap_Lookup(MHeap *h, void *v)
-{
-	uintptr p;
-	
-	p = (uintptr)v;
-	p -= (uintptr)h->arena_start;
-	return h->spans[p >> PageShift];
-}
-
-// Look up the span at the given address.
-// Address is *not* guaranteed to be in map
-// and may be anywhere in the span.
-// Map entries for the middle of a span are only
-// valid for allocated spans.  Free spans may have
-// other garbage in their middles, so we have to
-// check for that.
-MSpan*
-runtime·MHeap_LookupMaybe(MHeap *h, void *v)
-{
-	MSpan *s;
-	pageID p, q;
-
-	if((byte*)v < h->arena_start || (byte*)v >= h->arena_used)
-		return nil;
-	p = (uintptr)v>>PageShift;
-	q = p;
-	q -= (uintptr)h->arena_start >> PageShift;
-	s = h->spans[q];
-	if(s == nil || p < s->start || v >= s->limit || s->state != MSpanInUse)
-		return nil;
-	return s;
-}
-
-// Free the span back into the heap.
-static void
-mheap_free(MHeap *h, MSpan *s, int32 acct)
-{
-	if(g != g->m->g0)
-		runtime·throw("mheap_free not on M stack");
-	runtime·lock(&h->lock);
-	mstats.heap_alloc += g->m->mcache->local_cachealloc;
-	g->m->mcache->local_cachealloc = 0;
-	mstats.tinyallocs += g->m->mcache->local_tinyallocs;
-	g->m->mcache->local_tinyallocs = 0;
-	if(acct) {
-		mstats.heap_alloc -= s->npages<<PageShift;
-		mstats.heap_objects--;
-	}
-	MHeap_FreeSpanLocked(h, s, true, true);
-	runtime·unlock(&h->lock);
-}
-
-static void
-mheap_free_m(G *gp)
-{
-	MHeap *h;
-	MSpan *s;
-	
-	h = g->m->ptrarg[0];
-	s = g->m->ptrarg[1];
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-	mheap_free(h, s, g->m->scalararg[0]);
-	runtime·gogo(&gp->sched);
-}
-
-void
-runtime·MHeap_Free(MHeap *h, MSpan *s, int32 acct)
-{
-	void (*fn)(G*);
-
-	if(g == g->m->g0) {
-		mheap_free(h, s, acct);
-	} else {
-		g->m->ptrarg[0] = h;
-		g->m->ptrarg[1] = s;
-		g->m->scalararg[0] = acct;
-		fn = mheap_free_m;
-		runtime·mcall(&fn);
-	}
-}
-
-void
-runtime·MHeap_FreeStack(MHeap *h, MSpan *s)
-{
-	if(g != g->m->g0)
-		runtime·throw("mheap_freestack not on M stack");
-	s->needzero = 1;
-	runtime·lock(&h->lock);
-	mstats.stacks_inuse -= s->npages<<PageShift;
-	MHeap_FreeSpanLocked(h, s, true, true);
-	runtime·unlock(&h->lock);
-}
-
-static void
-MHeap_FreeSpanLocked(MHeap *h, MSpan *s, bool acctinuse, bool acctidle)
-{
-	MSpan *t;
-	pageID p;
-
-	switch(s->state) {
-	case MSpanStack:
-		if(s->ref != 0)
-			runtime·throw("MHeap_FreeSpanLocked - invalid stack free");
-		break;
-	case MSpanInUse:
-		if(s->ref != 0 || s->sweepgen != h->sweepgen) {
-			runtime·printf("MHeap_FreeSpanLocked - span %p ptr %p ref %d sweepgen %d/%d\n",
-				       s, s->start<<PageShift, s->ref, s->sweepgen, h->sweepgen);
-			runtime·throw("MHeap_FreeSpanLocked - invalid free");
-		}
-		break;
-	default:
-		runtime·throw("MHeap_FreeSpanLocked - invalid span state");
-		break;
-	}
-	if(acctinuse)
-		mstats.heap_inuse -= s->npages<<PageShift;
-	if(acctidle)
-		mstats.heap_idle += s->npages<<PageShift;
-	s->state = MSpanFree;
-	runtime·MSpanList_Remove(s);
-	// Stamp newly unused spans. The scavenger will use that
-	// info to potentially give back some pages to the OS.
-	s->unusedsince = runtime·nanotime();
-	s->npreleased = 0;
-
-	// Coalesce with earlier, later spans.
-	p = s->start;
-	p -= (uintptr)h->arena_start >> PageShift;
-	if(p > 0 && (t = h->spans[p-1]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
-		s->start = t->start;
-		s->npages += t->npages;
-		s->npreleased = t->npreleased; // absorb released pages
-		s->needzero |= t->needzero;
-		p -= t->npages;
-		h->spans[p] = s;
-		runtime·MSpanList_Remove(t);
-		t->state = MSpanDead;
-		runtime·FixAlloc_Free(&h->spanalloc, t);
-	}
-	if((p+s->npages)*sizeof(h->spans[0]) < h->spans_mapped && (t = h->spans[p+s->npages]) != nil && t->state != MSpanInUse && t->state != MSpanStack) {
-		s->npages += t->npages;
-		s->npreleased += t->npreleased;
-		s->needzero |= t->needzero;
-		h->spans[p + s->npages - 1] = s;
-		runtime·MSpanList_Remove(t);
-		t->state = MSpanDead;
-		runtime·FixAlloc_Free(&h->spanalloc, t);
-	}
-
-	// Insert s into appropriate list.
-	if(s->npages < nelem(h->free))
-		runtime·MSpanList_Insert(&h->free[s->npages], s);
-	else
-		runtime·MSpanList_Insert(&h->freelarge, s);
-}
-
-static uintptr
-scavengelist(MSpan *list, uint64 now, uint64 limit)
-{
-	uintptr released, sumreleased;
-	MSpan *s;
-
-	if(runtime·MSpanList_IsEmpty(list))
-		return 0;
-
-	sumreleased = 0;
-	for(s=list->next; s != list; s=s->next) {
-		if((now - s->unusedsince) > limit && s->npreleased != s->npages) {
-			released = (s->npages - s->npreleased) << PageShift;
-			mstats.heap_released += released;
-			sumreleased += released;
-			s->npreleased = s->npages;
-			runtime·SysUnused((void*)(s->start << PageShift), s->npages << PageShift);
-		}
-	}
-	return sumreleased;
-}
-
-void
-runtime·MHeap_Scavenge(int32 k, uint64 now, uint64 limit)
-{
-	uint32 i;
-	uintptr sumreleased;
-	MHeap *h;
-	
-	h = &runtime·mheap;
-	runtime·lock(&h->lock);
-	sumreleased = 0;
-	for(i=0; i < nelem(h->free); i++)
-		sumreleased += scavengelist(&h->free[i], now, limit);
-	sumreleased += scavengelist(&h->freelarge, now, limit);
-	runtime·unlock(&h->lock);
-
-	if(runtime·debug.gctrace > 0) {
-		if(sumreleased > 0)
-			runtime·printf("scvg%d: %D MB released\n", k, (uint64)sumreleased>>20);
-		// TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
-		// But we can't call ReadMemStats on g0 holding locks.
-		runtime·printf("scvg%d: inuse: %D, idle: %D, sys: %D, released: %D, consumed: %D (MB)\n",
-			k, mstats.heap_inuse>>20, mstats.heap_idle>>20, mstats.heap_sys>>20,
-			mstats.heap_released>>20, (mstats.heap_sys - mstats.heap_released)>>20);
-	}
-}
-
-void
-runtime·scavenge_m(void)
-{
-	runtime·MHeap_Scavenge(-1, ~(uintptr)0, 0);
-}
-
-// Initialize a new span with the given start and npages.
-void
-runtime·MSpan_Init(MSpan *span, pageID start, uintptr npages)
-{
-	span->next = nil;
-	span->prev = nil;
-	span->start = start;
-	span->npages = npages;
-	span->freelist = nil;
-	span->ref = 0;
-	span->sizeclass = 0;
-	span->incache = false;
-	span->elemsize = 0;
-	span->state = MSpanDead;
-	span->unusedsince = 0;
-	span->npreleased = 0;
-	span->specialLock.key = 0;
-	span->specials = nil;
-	span->needzero = 0;
-}
-
-// Initialize an empty doubly-linked list.
-void
-runtime·MSpanList_Init(MSpan *list)
-{
-	list->state = MSpanListHead;
-	list->next = list;
-	list->prev = list;
-}
-
-void
-runtime·MSpanList_Remove(MSpan *span)
-{
-	if(span->prev == nil && span->next == nil)
-		return;
-	span->prev->next = span->next;
-	span->next->prev = span->prev;
-	span->prev = nil;
-	span->next = nil;
-}
-
-bool
-runtime·MSpanList_IsEmpty(MSpan *list)
-{
-	return list->next == list;
-}
-
-void
-runtime·MSpanList_Insert(MSpan *list, MSpan *span)
-{
-	if(span->next != nil || span->prev != nil) {
-		runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
-		runtime·throw("MSpanList_Insert");
-	}
-	span->next = list->next;
-	span->prev = list;
-	span->next->prev = span;
-	span->prev->next = span;
-}
-
-void
-runtime·MSpanList_InsertBack(MSpan *list, MSpan *span)
-{
-	if(span->next != nil || span->prev != nil) {
-		runtime·printf("failed MSpanList_Insert %p %p %p\n", span, span->next, span->prev);
-		runtime·throw("MSpanList_Insert");
-	}
-	span->next = list;
-	span->prev = list->prev;
-	span->next->prev = span;
-	span->prev->next = span;
-}
-
-// Adds the special record s to the list of special records for
-// the object p.  All fields of s should be filled in except for
-// offset & next, which this routine will fill in.
-// Returns true if the special was successfully added, false otherwise.
-// (The add will fail only if a record with the same p and s->kind
-//  already exists.)
-static bool
-addspecial(void *p, Special *s)
-{
-	MSpan *span;
-	Special **t, *x;
-	uintptr offset;
-	byte kind;
-
-	span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
-	if(span == nil)
-		runtime·throw("addspecial on invalid pointer");
-
-	// Ensure that the span is swept.
-	// GC accesses specials list w/o locks. And it's just much safer.
-	g->m->locks++;
-	runtime·MSpan_EnsureSwept(span);
-
-	offset = (uintptr)p - (span->start << PageShift);
-	kind = s->kind;
-
-	runtime·lock(&span->specialLock);
-
-	// Find splice point, check for existing record.
-	t = &span->specials;
-	while((x = *t) != nil) {
-		if(offset == x->offset && kind == x->kind) {
-			runtime·unlock(&span->specialLock);
-			g->m->locks--;
-			return false; // already exists
-		}
-		if(offset < x->offset || (offset == x->offset && kind < x->kind))
-			break;
-		t = &x->next;
-	}
-	// Splice in record, fill in offset.
-	s->offset = offset;
-	s->next = x;
-	*t = s;
-	runtime·unlock(&span->specialLock);
-	g->m->locks--;
-	return true;
-}
-
-// Removes the Special record of the given kind for the object p.
-// Returns the record if the record existed, nil otherwise.
-// The caller must FixAlloc_Free the result.
-static Special*
-removespecial(void *p, byte kind)
-{
-	MSpan *span;
-	Special *s, **t;
-	uintptr offset;
-
-	span = runtime·MHeap_LookupMaybe(&runtime·mheap, p);
-	if(span == nil)
-		runtime·throw("removespecial on invalid pointer");
-
-	// Ensure that the span is swept.
-	// GC accesses specials list w/o locks. And it's just much safer.
-	g->m->locks++;
-	runtime·MSpan_EnsureSwept(span);
-
-	offset = (uintptr)p - (span->start << PageShift);
-
-	runtime·lock(&span->specialLock);
-	t = &span->specials;
-	while((s = *t) != nil) {
-		// This function is used for finalizers only, so we don't check for
-		// "interior" specials (p must be exactly equal to s->offset).
-		if(offset == s->offset && kind == s->kind) {
-			*t = s->next;
-			runtime·unlock(&span->specialLock);
-			g->m->locks--;
-			return s;
-		}
-		t = &s->next;
-	}
-	runtime·unlock(&span->specialLock);
-	g->m->locks--;
-	return nil;
-}
-
-// Adds a finalizer to the object p.  Returns true if it succeeded.
-bool
-runtime·addfinalizer(void *p, FuncVal *f, uintptr nret, Type *fint, PtrType *ot)
-{
-	SpecialFinalizer *s;
-
-	runtime·lock(&runtime·mheap.speciallock);
-	s = runtime·FixAlloc_Alloc(&runtime·mheap.specialfinalizeralloc);
-	runtime·unlock(&runtime·mheap.speciallock);
-	s->special.kind = KindSpecialFinalizer;
-	s->fn = f;
-	s->nret = nret;
-	s->fint = fint;
-	s->ot = ot;
-	if(addspecial(p, &s->special))
-		return true;
-
-	// There was an old finalizer
-	runtime·lock(&runtime·mheap.speciallock);
-	runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
-	runtime·unlock(&runtime·mheap.speciallock);
-	return false;
-}
-
-// Removes the finalizer (if any) from the object p.
-void
-runtime·removefinalizer(void *p)
-{
-	SpecialFinalizer *s;
-
-	s = (SpecialFinalizer*)removespecial(p, KindSpecialFinalizer);
-	if(s == nil)
-		return; // there wasn't a finalizer to remove
-	runtime·lock(&runtime·mheap.speciallock);
-	runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, s);
-	runtime·unlock(&runtime·mheap.speciallock);
-}
-
-// Set the heap profile bucket associated with addr to b.
-void
-runtime·setprofilebucket_m(void)
-{	
-	void *p;
-	Bucket *b;
-	SpecialProfile *s;
-	
-	p = g->m->ptrarg[0];
-	b = g->m->ptrarg[1];
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-
-	runtime·lock(&runtime·mheap.speciallock);
-	s = runtime·FixAlloc_Alloc(&runtime·mheap.specialprofilealloc);
-	runtime·unlock(&runtime·mheap.speciallock);
-	s->special.kind = KindSpecialProfile;
-	s->b = b;
-	if(!addspecial(p, &s->special))
-		runtime·throw("setprofilebucket: profile already set");
-}
-
-// Do whatever cleanup needs to be done to deallocate s.  It has
-// already been unlinked from the MSpan specials list.
-// Returns true if we should keep working on deallocating p.
-bool
-runtime·freespecial(Special *s, void *p, uintptr size, bool freed)
-{
-	SpecialFinalizer *sf;
-	SpecialProfile *sp;
-
-	switch(s->kind) {
-	case KindSpecialFinalizer:
-		sf = (SpecialFinalizer*)s;
-		runtime·queuefinalizer(p, sf->fn, sf->nret, sf->fint, sf->ot);
-		runtime·lock(&runtime·mheap.speciallock);
-		runtime·FixAlloc_Free(&runtime·mheap.specialfinalizeralloc, sf);
-		runtime·unlock(&runtime·mheap.speciallock);
-		return false; // don't free p until finalizer is done
-	case KindSpecialProfile:
-		sp = (SpecialProfile*)s;
-		runtime·mProf_Free(sp->b, size, freed);
-		runtime·lock(&runtime·mheap.speciallock);
-		runtime·FixAlloc_Free(&runtime·mheap.specialprofilealloc, sp);
-		runtime·unlock(&runtime·mheap.speciallock);
-		return true;
-	default:
-		runtime·throw("bad special kind");
-		return true;
-	}
-}
diff --git a/src/runtime/mheap.go b/src/runtime/mheap.go
new file mode 100644
index 0000000..fedcd69
--- /dev/null
+++ b/src/runtime/mheap.go
@@ -0,0 +1,785 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Page heap.
+//
+// See malloc.h for overview.
+//
+// When a MSpan is in the heap free list, state == MSpanFree
+// and heapmap(s->start) == span, heapmap(s->start+s->npages-1) == span.
+//
+// When a MSpan is allocated, state == MSpanInUse or MSpanStack
+// and heapmap(i) == span for all s->start <= i < s->start+s->npages.
+
+package runtime
+
+import "unsafe"
+
+var h_allspans []*mspan // TODO: make this h.allspans once mheap can be defined in Go
+var h_spans []*mspan    // TODO: make this h.spans once mheap can be defined in Go
+
+func recordspan(vh unsafe.Pointer, p unsafe.Pointer) {
+	h := (*mheap)(vh)
+	s := (*mspan)(p)
+	if len(h_allspans) >= cap(h_allspans) {
+		n := 64 * 1024 / ptrSize
+		if n < cap(h_allspans)*3/2 {
+			n = cap(h_allspans) * 3 / 2
+		}
+		var new []*mspan
+		sp := (*slice)(unsafe.Pointer(&new))
+		sp.array = (*byte)(sysAlloc(uintptr(n)*ptrSize, &memstats.other_sys))
+		if sp.array == nil {
+			gothrow("runtime: cannot allocate memory")
+		}
+		sp.len = uint(len(h_allspans))
+		sp.cap = uint(n)
+		if len(h_allspans) > 0 {
+			copy(new, h_allspans)
+			// Don't free the old array if it's referenced by sweep.
+			// See the comment in mgc0.c.
+			if h.allspans != mheap_.gcspans {
+				sysFree(unsafe.Pointer(h.allspans), uintptr(cap(h_allspans))*ptrSize, &memstats.other_sys)
+			}
+		}
+		h_allspans = new
+		h.allspans = (**mspan)(unsafe.Pointer(sp.array))
+	}
+	h_allspans = append(h_allspans, s)
+	h.nspan = uint32(len(h_allspans))
+}
+
+// Initialize the heap.
+func mHeap_Init(h *mheap, spans_size uintptr) {
+	fixAlloc_Init(&h.spanalloc, unsafe.Sizeof(mspan{}), recordspan, unsafe.Pointer(h), &memstats.mspan_sys)
+	fixAlloc_Init(&h.cachealloc, unsafe.Sizeof(mcache{}), nil, nil, &memstats.mcache_sys)
+	fixAlloc_Init(&h.specialfinalizeralloc, unsafe.Sizeof(specialfinalizer{}), nil, nil, &memstats.other_sys)
+	fixAlloc_Init(&h.specialprofilealloc, unsafe.Sizeof(specialprofile{}), nil, nil, &memstats.other_sys)
+
+	// h->mapcache needs no init
+	for i := range h.free {
+		mSpanList_Init(&h.free[i])
+		mSpanList_Init(&h.busy[i])
+	}
+
+	mSpanList_Init(&h.freelarge)
+	mSpanList_Init(&h.busylarge)
+	for i := range h.central {
+		mCentral_Init(&h.central[i].mcentral, int32(i))
+	}
+
+	sp := (*slice)(unsafe.Pointer(&h_spans))
+	sp.array = (*byte)(unsafe.Pointer(h.spans))
+	sp.len = uint(spans_size / ptrSize)
+	sp.cap = uint(spans_size / ptrSize)
+}
+
+func mHeap_MapSpans(h *mheap) {
+	// Map spans array, PageSize at a time.
+	n := uintptr(unsafe.Pointer(h.arena_used))
+	n -= uintptr(unsafe.Pointer(h.arena_start))
+	n = n / _PageSize * ptrSize
+	n = round(n, _PhysPageSize)
+	if h.spans_mapped >= n {
+		return
+	}
+	sysMap(add(unsafe.Pointer(h.spans), h.spans_mapped), n-h.spans_mapped, h.arena_reserved, &memstats.other_sys)
+	h.spans_mapped = n
+}
+
+// Sweeps spans in list until reclaims at least npages into heap.
+// Returns the actual number of pages reclaimed.
+func mHeap_ReclaimList(h *mheap, list *mspan, npages uintptr) uintptr {
+	n := uintptr(0)
+	sg := mheap_.sweepgen
+retry:
+	for s := list.next; s != list; s = s.next {
+		if s.sweepgen == sg-2 && cas(&s.sweepgen, sg-2, sg-1) {
+			mSpanList_Remove(s)
+			// swept spans are at the end of the list
+			mSpanList_InsertBack(list, s)
+			unlock(&h.lock)
+			if mSpan_Sweep(s, false) {
+				// TODO(rsc,dvyukov): This is probably wrong.
+				// It is undercounting the number of pages reclaimed.
+				// See golang.org/issue/9048.
+				// Note that if we want to add the true count of s's pages,
+				// we must record that before calling mSpan_Sweep,
+				// because if mSpan_Sweep returns true the span has
+				// been
+				n++
+			}
+			lock(&h.lock)
+			if n >= npages {
+				return n
+			}
+			// the span could have been moved elsewhere
+			goto retry
+		}
+		if s.sweepgen == sg-1 {
+			// the span is being sweept by background sweeper, skip
+			continue
+		}
+		// already swept empty span,
+		// all subsequent ones must also be either swept or in process of sweeping
+		break
+	}
+	return n
+}
+
+// Sweeps and reclaims at least npage pages into heap.
+// Called before allocating npage pages.
+func mHeap_Reclaim(h *mheap, npage uintptr) {
+	// First try to sweep busy spans with large objects of size >= npage,
+	// this has good chances of reclaiming the necessary space.
+	for i := int(npage); i < len(h.busy); i++ {
+		if mHeap_ReclaimList(h, &h.busy[i], npage) != 0 {
+			return // Bingo!
+		}
+	}
+
+	// Then -- even larger objects.
+	if mHeap_ReclaimList(h, &h.busylarge, npage) != 0 {
+		return // Bingo!
+	}
+
+	// Now try smaller objects.
+	// One such object is not enough, so we need to reclaim several of them.
+	reclaimed := uintptr(0)
+	for i := 0; i < int(npage) && i < len(h.busy); i++ {
+		reclaimed += mHeap_ReclaimList(h, &h.busy[i], npage-reclaimed)
+		if reclaimed >= npage {
+			return
+		}
+	}
+
+	// Now sweep everything that is not yet swept.
+	unlock(&h.lock)
+	for {
+		n := sweepone()
+		if n == ^uintptr(0) { // all spans are swept
+			break
+		}
+		reclaimed += n
+		if reclaimed >= npage {
+			break
+		}
+	}
+	lock(&h.lock)
+}
+
+// Allocate a new span of npage pages from the heap for GC'd memory
+// and record its size class in the HeapMap and HeapMapCache.
+func mHeap_Alloc_m(h *mheap, npage uintptr, sizeclass int32, large bool) *mspan {
+	_g_ := getg()
+	if _g_ != _g_.m.g0 {
+		gothrow("_mheap_alloc not on g0 stack")
+	}
+	lock(&h.lock)
+
+	// To prevent excessive heap growth, before allocating n pages
+	// we need to sweep and reclaim at least n pages.
+	if h.sweepdone == 0 {
+		mHeap_Reclaim(h, npage)
+	}
+
+	// transfer stats from cache to global
+	memstats.heap_alloc += uint64(_g_.m.mcache.local_cachealloc)
+	_g_.m.mcache.local_cachealloc = 0
+	memstats.tinyallocs += uint64(_g_.m.mcache.local_tinyallocs)
+	_g_.m.mcache.local_tinyallocs = 0
+
+	s := mHeap_AllocSpanLocked(h, npage)
+	if s != nil {
+		// Record span info, because gc needs to be
+		// able to map interior pointer to containing span.
+		atomicstore(&s.sweepgen, h.sweepgen)
+		s.state = _MSpanInUse
+		s.freelist = nil
+		s.ref = 0
+		s.sizeclass = uint8(sizeclass)
+		if sizeclass == 0 {
+			s.elemsize = s.npages << _PageShift
+		} else {
+			s.elemsize = uintptr(class_to_size[sizeclass])
+		}
+
+		// update stats, sweep lists
+		if large {
+			memstats.heap_objects++
+			memstats.heap_alloc += uint64(npage << _PageShift)
+			// Swept spans are at the end of lists.
+			if s.npages < uintptr(len(h.free)) {
+				mSpanList_InsertBack(&h.busy[s.npages], s)
+			} else {
+				mSpanList_InsertBack(&h.busylarge, s)
+			}
+		}
+	}
+	unlock(&h.lock)
+	return s
+}
+
+func mHeap_Alloc(h *mheap, npage uintptr, sizeclass int32, large bool, needzero bool) *mspan {
+	// Don't do any operations that lock the heap on the G stack.
+	// It might trigger stack growth, and the stack growth code needs
+	// to be able to allocate heap.
+	var s *mspan
+	systemstack(func() {
+		s = mHeap_Alloc_m(h, npage, sizeclass, large)
+	})
+
+	if s != nil {
+		if needzero && s.needzero != 0 {
+			memclr(unsafe.Pointer(s.start<<_PageShift), s.npages<<_PageShift)
+		}
+		s.needzero = 0
+	}
+	return s
+}
+
+func mHeap_AllocStack(h *mheap, npage uintptr) *mspan {
+	_g_ := getg()
+	if _g_ != _g_.m.g0 {
+		gothrow("mheap_allocstack not on g0 stack")
+	}
+	lock(&h.lock)
+	s := mHeap_AllocSpanLocked(h, npage)
+	if s != nil {
+		s.state = _MSpanStack
+		s.freelist = nil
+		s.ref = 0
+		memstats.stacks_inuse += uint64(s.npages << _PageShift)
+	}
+	unlock(&h.lock)
+	return s
+}
+
+// Allocates a span of the given size.  h must be locked.
+// The returned span has been removed from the
+// free list, but its state is still MSpanFree.
+func mHeap_AllocSpanLocked(h *mheap, npage uintptr) *mspan {
+	var s *mspan
+
+	// Try in fixed-size lists up to max.
+	for i := int(npage); i < len(h.free); i++ {
+		if !mSpanList_IsEmpty(&h.free[i]) {
+			s = h.free[i].next
+			goto HaveSpan
+		}
+	}
+
+	// Best fit in list of large spans.
+	s = mHeap_AllocLarge(h, npage)
+	if s == nil {
+		if !mHeap_Grow(h, npage) {
+			return nil
+		}
+		s = mHeap_AllocLarge(h, npage)
+		if s == nil {
+			return nil
+		}
+	}
+
+HaveSpan:
+	// Mark span in use.
+	if s.state != _MSpanFree {
+		gothrow("MHeap_AllocLocked - MSpan not free")
+	}
+	if s.npages < npage {
+		gothrow("MHeap_AllocLocked - bad npages")
+	}
+	mSpanList_Remove(s)
+	if s.next != nil || s.prev != nil {
+		gothrow("still in list")
+	}
+	if s.npreleased > 0 {
+		sysUsed((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
+		memstats.heap_released -= uint64(s.npreleased << _PageShift)
+		s.npreleased = 0
+	}
+
+	if s.npages > npage {
+		// Trim extra and put it back in the heap.
+		t := (*mspan)(fixAlloc_Alloc(&h.spanalloc))
+		mSpan_Init(t, s.start+pageID(npage), s.npages-npage)
+		s.npages = npage
+		p := uintptr(t.start)
+		p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
+		if p > 0 {
+			h_spans[p-1] = s
+		}
+		h_spans[p] = t
+		h_spans[p+t.npages-1] = t
+		t.needzero = s.needzero
+		s.state = _MSpanStack // prevent coalescing with s
+		t.state = _MSpanStack
+		mHeap_FreeSpanLocked(h, t, false, false)
+		t.unusedsince = s.unusedsince // preserve age (TODO: wrong: t is possibly merged and/or deallocated at this point)
+		s.state = _MSpanFree
+	}
+	s.unusedsince = 0
+
+	p := uintptr(s.start)
+	p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
+	for n := uintptr(0); n < npage; n++ {
+		h_spans[p+n] = s
+	}
+
+	memstats.heap_inuse += uint64(npage << _PageShift)
+	memstats.heap_idle -= uint64(npage << _PageShift)
+
+	//println("spanalloc", hex(s.start<<_PageShift))
+	if s.next != nil || s.prev != nil {
+		gothrow("still in list")
+	}
+	return s
+}
+
+// Allocate a span of exactly npage pages from the list of large spans.
+func mHeap_AllocLarge(h *mheap, npage uintptr) *mspan {
+	return bestFit(&h.freelarge, npage, nil)
+}
+
+// Search list for smallest span with >= npage pages.
+// If there are multiple smallest spans, take the one
+// with the earliest starting address.
+func bestFit(list *mspan, npage uintptr, best *mspan) *mspan {
+	for s := list.next; s != list; s = s.next {
+		if s.npages < npage {
+			continue
+		}
+		if best == nil || s.npages < best.npages || (s.npages == best.npages && s.start < best.start) {
+			best = s
+		}
+	}
+	return best
+}
+
+// Try to add at least npage pages of memory to the heap,
+// returning whether it worked.
+func mHeap_Grow(h *mheap, npage uintptr) bool {
+	// Ask for a big chunk, to reduce the number of mappings
+	// the operating system needs to track; also amortizes
+	// the overhead of an operating system mapping.
+	// Allocate a multiple of 64kB.
+	npage = round(npage, (64<<10)/_PageSize)
+	ask := npage << _PageShift
+	if ask < _HeapAllocChunk {
+		ask = _HeapAllocChunk
+	}
+
+	v := mHeap_SysAlloc(h, ask)
+	if v == nil {
+		if ask > npage<<_PageShift {
+			ask = npage << _PageShift
+			v = mHeap_SysAlloc(h, ask)
+		}
+		if v == nil {
+			print("runtime: out of memory: cannot allocate ", ask, "-byte block (", memstats.heap_sys, " in use)\n")
+			return false
+		}
+	}
+
+	// Create a fake "in use" span and free it, so that the
+	// right coalescing happens.
+	s := (*mspan)(fixAlloc_Alloc(&h.spanalloc))
+	mSpan_Init(s, pageID(uintptr(v)>>_PageShift), ask>>_PageShift)
+	p := uintptr(s.start)
+	p -= (uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift)
+	h_spans[p] = s
+	h_spans[p+s.npages-1] = s
+	atomicstore(&s.sweepgen, h.sweepgen)
+	s.state = _MSpanInUse
+	mHeap_FreeSpanLocked(h, s, false, true)
+	return true
+}
+
+// Look up the span at the given address.
+// Address is guaranteed to be in map
+// and is guaranteed to be start or end of span.
+func mHeap_Lookup(h *mheap, v unsafe.Pointer) *mspan {
+	p := uintptr(v)
+	p -= uintptr(unsafe.Pointer(h.arena_start))
+	return h_spans[p>>_PageShift]
+}
+
+// Look up the span at the given address.
+// Address is *not* guaranteed to be in map
+// and may be anywhere in the span.
+// Map entries for the middle of a span are only
+// valid for allocated spans.  Free spans may have
+// other garbage in their middles, so we have to
+// check for that.
+func mHeap_LookupMaybe(h *mheap, v unsafe.Pointer) *mspan {
+	if uintptr(v) < uintptr(unsafe.Pointer(h.arena_start)) || uintptr(v) >= uintptr(unsafe.Pointer(h.arena_used)) {
+		return nil
+	}
+	p := uintptr(v) >> _PageShift
+	q := p
+	q -= uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift
+	s := h_spans[q]
+	if s == nil || p < uintptr(s.start) || uintptr(v) >= uintptr(unsafe.Pointer(s.limit)) || s.state != _MSpanInUse {
+		return nil
+	}
+	return s
+}
+
+// Free the span back into the heap.
+func mHeap_Free(h *mheap, s *mspan, acct int32) {
+	systemstack(func() {
+		mp := getg().m
+		lock(&h.lock)
+		memstats.heap_alloc += uint64(mp.mcache.local_cachealloc)
+		mp.mcache.local_cachealloc = 0
+		memstats.tinyallocs += uint64(mp.mcache.local_tinyallocs)
+		mp.mcache.local_tinyallocs = 0
+		if acct != 0 {
+			memstats.heap_alloc -= uint64(s.npages << _PageShift)
+			memstats.heap_objects--
+		}
+		mHeap_FreeSpanLocked(h, s, true, true)
+		unlock(&h.lock)
+	})
+}
+
+func mHeap_FreeStack(h *mheap, s *mspan) {
+	_g_ := getg()
+	if _g_ != _g_.m.g0 {
+		gothrow("mheap_freestack not on g0 stack")
+	}
+	s.needzero = 1
+	lock(&h.lock)
+	memstats.stacks_inuse -= uint64(s.npages << _PageShift)
+	mHeap_FreeSpanLocked(h, s, true, true)
+	unlock(&h.lock)
+}
+
+func mHeap_FreeSpanLocked(h *mheap, s *mspan, acctinuse, acctidle bool) {
+	switch s.state {
+	case _MSpanStack:
+		if s.ref != 0 {
+			gothrow("MHeap_FreeSpanLocked - invalid stack free")
+		}
+	case _MSpanInUse:
+		if s.ref != 0 || s.sweepgen != h.sweepgen {
+			print("MHeap_FreeSpanLocked - span ", s, " ptr ", hex(s.start<<_PageShift), " ref ", s.ref, " sweepgen ", s.sweepgen, "/", h.sweepgen, "\n")
+			gothrow("MHeap_FreeSpanLocked - invalid free")
+		}
+	default:
+		gothrow("MHeap_FreeSpanLocked - invalid span state")
+	}
+
+	if acctinuse {
+		memstats.heap_inuse -= uint64(s.npages << _PageShift)
+	}
+	if acctidle {
+		memstats.heap_idle += uint64(s.npages << _PageShift)
+	}
+	s.state = _MSpanFree
+	mSpanList_Remove(s)
+
+	// Stamp newly unused spans. The scavenger will use that
+	// info to potentially give back some pages to the OS.
+	s.unusedsince = nanotime()
+	s.npreleased = 0
+
+	// Coalesce with earlier, later spans.
+	p := uintptr(s.start)
+	p -= uintptr(unsafe.Pointer(h.arena_start)) >> _PageShift
+	if p > 0 {
+		t := h_spans[p-1]
+		if t != nil && t.state != _MSpanInUse && t.state != _MSpanStack {
+			s.start = t.start
+			s.npages += t.npages
+			s.npreleased = t.npreleased // absorb released pages
+			s.needzero |= t.needzero
+			p -= t.npages
+			h_spans[p] = s
+			mSpanList_Remove(t)
+			t.state = _MSpanDead
+			fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
+		}
+	}
+	if (p+s.npages)*ptrSize < h.spans_mapped {
+		t := h_spans[p+s.npages]
+		if t != nil && t.state != _MSpanInUse && t.state != _MSpanStack {
+			s.npages += t.npages
+			s.npreleased += t.npreleased
+			s.needzero |= t.needzero
+			h_spans[p+s.npages-1] = s
+			mSpanList_Remove(t)
+			t.state = _MSpanDead
+			fixAlloc_Free(&h.spanalloc, (unsafe.Pointer)(t))
+		}
+	}
+
+	// Insert s into appropriate list.
+	if s.npages < uintptr(len(h.free)) {
+		mSpanList_Insert(&h.free[s.npages], s)
+	} else {
+		mSpanList_Insert(&h.freelarge, s)
+	}
+}
+
+func scavengelist(list *mspan, now, limit uint64) uintptr {
+	if mSpanList_IsEmpty(list) {
+		return 0
+	}
+
+	var sumreleased uintptr
+	for s := list.next; s != list; s = s.next {
+		if (now-uint64(s.unusedsince)) > limit && s.npreleased != s.npages {
+			released := (s.npages - s.npreleased) << _PageShift
+			memstats.heap_released += uint64(released)
+			sumreleased += released
+			s.npreleased = s.npages
+			sysUnused((unsafe.Pointer)(s.start<<_PageShift), s.npages<<_PageShift)
+		}
+	}
+	return sumreleased
+}
+
+func mHeap_Scavenge(k int32, now, limit uint64) {
+	h := &mheap_
+	lock(&h.lock)
+	var sumreleased uintptr
+	for i := 0; i < len(h.free); i++ {
+		sumreleased += scavengelist(&h.free[i], now, limit)
+	}
+	sumreleased += scavengelist(&h.freelarge, now, limit)
+	unlock(&h.lock)
+
+	if debug.gctrace > 0 {
+		if sumreleased > 0 {
+			print("scvg", k, ": ", sumreleased>>20, " MB released\n")
+		}
+		// TODO(dvyukov): these stats are incorrect as we don't subtract stack usage from heap.
+		// But we can't call ReadMemStats on g0 holding locks.
+		print("scvg", k, ": inuse: ", memstats.heap_inuse>>20, ", idle: ", memstats.heap_idle>>20, ", sys: ", memstats.heap_sys>>20, ", released: ", memstats.heap_released>>20, ", consumed: ", (memstats.heap_sys-memstats.heap_released)>>20, " (MB)\n")
+	}
+}
+
+func scavenge_m() {
+	mHeap_Scavenge(-1, ^uint64(0), 0)
+}
+
+// Initialize a new span with the given start and npages.
+func mSpan_Init(span *mspan, start pageID, npages uintptr) {
+	span.next = nil
+	span.prev = nil
+	span.start = start
+	span.npages = npages
+	span.freelist = nil
+	span.ref = 0
+	span.sizeclass = 0
+	span.incache = false
+	span.elemsize = 0
+	span.state = _MSpanDead
+	span.unusedsince = 0
+	span.npreleased = 0
+	span.speciallock.key = 0
+	span.specials = nil
+	span.needzero = 0
+}
+
+// Initialize an empty doubly-linked list.
+func mSpanList_Init(list *mspan) {
+	list.state = _MSpanListHead
+	list.next = list
+	list.prev = list
+}
+
+func mSpanList_Remove(span *mspan) {
+	if span.prev == nil && span.next == nil {
+		return
+	}
+	span.prev.next = span.next
+	span.next.prev = span.prev
+	span.prev = nil
+	span.next = nil
+}
+
+func mSpanList_IsEmpty(list *mspan) bool {
+	return list.next == list
+}
+
+func mSpanList_Insert(list *mspan, span *mspan) {
+	if span.next != nil || span.prev != nil {
+		println("failed MSpanList_Insert", span, span.next, span.prev)
+		gothrow("MSpanList_Insert")
+	}
+	span.next = list.next
+	span.prev = list
+	span.next.prev = span
+	span.prev.next = span
+}
+
+func mSpanList_InsertBack(list *mspan, span *mspan) {
+	if span.next != nil || span.prev != nil {
+		println("failed MSpanList_InsertBack", span, span.next, span.prev)
+		gothrow("MSpanList_InsertBack")
+	}
+	span.next = list
+	span.prev = list.prev
+	span.next.prev = span
+	span.prev.next = span
+}
+
+// Adds the special record s to the list of special records for
+// the object p.  All fields of s should be filled in except for
+// offset & next, which this routine will fill in.
+// Returns true if the special was successfully added, false otherwise.
+// (The add will fail only if a record with the same p and s->kind
+//  already exists.)
+func addspecial(p unsafe.Pointer, s *special) bool {
+	span := mHeap_LookupMaybe(&mheap_, p)
+	if span == nil {
+		gothrow("addspecial on invalid pointer")
+	}
+
+	// Ensure that the span is swept.
+	// GC accesses specials list w/o locks. And it's just much safer.
+	mp := acquirem()
+	mSpan_EnsureSwept(span)
+
+	offset := uintptr(p) - uintptr(span.start<<_PageShift)
+	kind := s.kind
+
+	lock(&span.speciallock)
+
+	// Find splice point, check for existing record.
+	t := &span.specials
+	for {
+		x := *t
+		if x == nil {
+			break
+		}
+		if offset == uintptr(x.offset) && kind == x.kind {
+			unlock(&span.speciallock)
+			releasem(mp)
+			return false // already exists
+		}
+		if offset < uintptr(x.offset) || (offset == uintptr(x.offset) && kind < x.kind) {
+			break
+		}
+		t = &x.next
+	}
+
+	// Splice in record, fill in offset.
+	s.offset = uint16(offset)
+	s.next = *t
+	*t = s
+	unlock(&span.speciallock)
+	releasem(mp)
+
+	return true
+}
+
+// Removes the Special record of the given kind for the object p.
+// Returns the record if the record existed, nil otherwise.
+// The caller must FixAlloc_Free the result.
+func removespecial(p unsafe.Pointer, kind uint8) *special {
+	span := mHeap_LookupMaybe(&mheap_, p)
+	if span == nil {
+		gothrow("removespecial on invalid pointer")
+	}
+
+	// Ensure that the span is swept.
+	// GC accesses specials list w/o locks. And it's just much safer.
+	mp := acquirem()
+	mSpan_EnsureSwept(span)
+
+	offset := uintptr(p) - uintptr(span.start<<_PageShift)
+
+	lock(&span.speciallock)
+	t := &span.specials
+	for {
+		s := *t
+		if s == nil {
+			break
+		}
+		// This function is used for finalizers only, so we don't check for
+		// "interior" specials (p must be exactly equal to s->offset).
+		if offset == uintptr(s.offset) && kind == s.kind {
+			*t = s.next
+			unlock(&span.speciallock)
+			releasem(mp)
+			return s
+		}
+		t = &s.next
+	}
+	unlock(&span.speciallock)
+	releasem(mp)
+	return nil
+}
+
+// Adds a finalizer to the object p.  Returns true if it succeeded.
+func addfinalizer(p unsafe.Pointer, f *funcval, nret uintptr, fint *_type, ot *ptrtype) bool {
+	lock(&mheap_.speciallock)
+	s := (*specialfinalizer)(fixAlloc_Alloc(&mheap_.specialfinalizeralloc))
+	unlock(&mheap_.speciallock)
+	s.special.kind = _KindSpecialFinalizer
+	s.fn = f
+	s.nret = nret
+	s.fint = fint
+	s.ot = ot
+	if addspecial(p, &s.special) {
+		return true
+	}
+
+	// There was an old finalizer
+	lock(&mheap_.speciallock)
+	fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
+	unlock(&mheap_.speciallock)
+	return false
+}
+
+// Removes the finalizer (if any) from the object p.
+func removefinalizer(p unsafe.Pointer) {
+	s := (*specialfinalizer)(unsafe.Pointer(removespecial(p, _KindSpecialFinalizer)))
+	if s == nil {
+		return // there wasn't a finalizer to remove
+	}
+	lock(&mheap_.speciallock)
+	fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(s))
+	unlock(&mheap_.speciallock)
+}
+
+// Set the heap profile bucket associated with addr to b.
+func setprofilebucket(p unsafe.Pointer, b *bucket) {
+	lock(&mheap_.speciallock)
+	s := (*specialprofile)(fixAlloc_Alloc(&mheap_.specialprofilealloc))
+	unlock(&mheap_.speciallock)
+	s.special.kind = _KindSpecialProfile
+	s.b = b
+	if !addspecial(p, &s.special) {
+		gothrow("setprofilebucket: profile already set")
+	}
+}
+
+// Do whatever cleanup needs to be done to deallocate s.  It has
+// already been unlinked from the MSpan specials list.
+// Returns true if we should keep working on deallocating p.
+func freespecial(s *special, p unsafe.Pointer, size uintptr, freed bool) bool {
+	switch s.kind {
+	case _KindSpecialFinalizer:
+		sf := (*specialfinalizer)(unsafe.Pointer(s))
+		queuefinalizer(p, sf.fn, sf.nret, sf.fint, sf.ot)
+		lock(&mheap_.speciallock)
+		fixAlloc_Free(&mheap_.specialfinalizeralloc, (unsafe.Pointer)(sf))
+		unlock(&mheap_.speciallock)
+		return false // don't free p until finalizer is done
+	case _KindSpecialProfile:
+		sp := (*specialprofile)(unsafe.Pointer(s))
+		mProf_Free(sp.b, size, freed)
+		lock(&mheap_.speciallock)
+		fixAlloc_Free(&mheap_.specialprofilealloc, (unsafe.Pointer)(sp))
+		unlock(&mheap_.speciallock)
+		return true
+	default:
+		gothrow("bad special kind")
+		panic("not reached")
+	}
+}
diff --git a/src/runtime/mprof.go b/src/runtime/mprof.go
index d409c6c..ba989b1 100644
--- a/src/runtime/mprof.go
+++ b/src/runtime/mprof.go
@@ -190,8 +190,6 @@
 	return b
 }
 
-func sysAlloc(n uintptr, stat *uint64) unsafe.Pointer
-
 func eqslice(x, y []uintptr) bool {
 	if len(x) != len(y) {
 		return false
@@ -246,16 +244,9 @@
 	// This reduces potential contention and chances of deadlocks.
 	// Since the object must be alive during call to mProf_Malloc,
 	// it's fine to do this non-atomically.
-	setprofilebucket(p, b)
-}
-
-func setprofilebucket_m() // mheap.c
-
-func setprofilebucket(p unsafe.Pointer, b *bucket) {
-	g := getg()
-	g.m.ptrarg[0] = p
-	g.m.ptrarg[1] = unsafe.Pointer(b)
-	onM(setprofilebucket_m)
+	systemstack(func() {
+		setprofilebucket(p, b)
+	})
 }
 
 // Called when freeing a profiled block.
@@ -519,8 +510,6 @@
 	return
 }
 
-var allgs []*g // proc.c
-
 // GoroutineProfile returns n, the number of records in the active goroutine stack profile.
 // If len(p) >= n, GoroutineProfile copies the profile into p and returns n, true.
 // If len(p) < n, GoroutineProfile does not change p and returns n, false.
@@ -534,7 +523,7 @@
 		gp := getg()
 		semacquire(&worldsema, false)
 		gp.m.gcing = 1
-		onM(stoptheworld)
+		systemstack(stoptheworld)
 
 		n = NumGoroutine()
 		if n <= len(p) {
@@ -542,7 +531,7 @@
 			r := p
 			sp := getcallersp(unsafe.Pointer(&p))
 			pc := getcallerpc(unsafe.Pointer(&p))
-			onM(func() {
+			systemstack(func() {
 				saveg(pc, sp, gp, &r[0])
 			})
 			r = r[1:]
@@ -557,7 +546,7 @@
 
 		gp.m.gcing = 0
 		semrelease(&worldsema)
-		onM(starttheworld)
+		systemstack(starttheworld)
 	}
 
 	return n, ok
@@ -581,7 +570,7 @@
 		semacquire(&worldsema, false)
 		mp.gcing = 1
 		releasem(mp)
-		onM(stoptheworld)
+		systemstack(stoptheworld)
 		if mp != acquirem() {
 			gothrow("Stack: rescheduled")
 		}
@@ -591,7 +580,7 @@
 	if len(buf) > 0 {
 		sp := getcallersp(unsafe.Pointer(&buf))
 		pc := getcallerpc(unsafe.Pointer(&buf))
-		onM(func() {
+		systemstack(func() {
 			g0 := getg()
 			g0.writebuf = buf[0:0:len(buf)]
 			goroutineheader(gp)
@@ -607,7 +596,7 @@
 	if all {
 		mp.gcing = 0
 		semrelease(&worldsema)
-		onM(starttheworld)
+		systemstack(starttheworld)
 	}
 	releasem(mp)
 	return n
@@ -630,7 +619,7 @@
 		goroutineheader(gp)
 		pc := getcallerpc(unsafe.Pointer(&p))
 		sp := getcallersp(unsafe.Pointer(&p))
-		onM(func() {
+		systemstack(func() {
 			traceback(pc, sp, 0, gp)
 		})
 	} else {
@@ -650,7 +639,7 @@
 	goroutineheader(gp)
 	pc := getcallerpc(unsafe.Pointer(&p))
 	sp := getcallersp(unsafe.Pointer(&p))
-	onM(func() {
+	systemstack(func() {
 		traceback(pc, sp, 0, gp)
 	})
 	print("\n")
diff --git a/src/runtime/msize.c b/src/runtime/msize.c
deleted file mode 100644
index 7cb65da..0000000
--- a/src/runtime/msize.c
+++ /dev/null
@@ -1,184 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Malloc small size classes.
-//
-// See malloc.h for overview.
-//
-// The size classes are chosen so that rounding an allocation
-// request up to the next size class wastes at most 12.5% (1.125x).
-//
-// Each size class has its own page count that gets allocated
-// and chopped up when new objects of the size class are needed.
-// That page count is chosen so that chopping up the run of
-// pages into objects of the given size wastes at most 12.5% (1.125x)
-// of the memory.  It is not necessary that the cutoff here be
-// the same as above.
-//
-// The two sources of waste multiply, so the worst possible case
-// for the above constraints would be that allocations of some
-// size might have a 26.6% (1.266x) overhead.
-// In practice, only one of the wastes comes into play for a
-// given size (sizes < 512 waste mainly on the round-up,
-// sizes > 512 waste mainly on the page chopping).
-//
-// TODO(rsc): Compute max waste for any given size.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "textflag.h"
-
-#pragma dataflag NOPTR
-int32 runtime·class_to_size[NumSizeClasses];
-#pragma dataflag NOPTR
-int32 runtime·class_to_allocnpages[NumSizeClasses];
-
-// The SizeToClass lookup is implemented using two arrays,
-// one mapping sizes <= 1024 to their class and one mapping
-// sizes >= 1024 and <= MaxSmallSize to their class.
-// All objects are 8-aligned, so the first array is indexed by
-// the size divided by 8 (rounded up).  Objects >= 1024 bytes
-// are 128-aligned, so the second array is indexed by the
-// size divided by 128 (rounded up).  The arrays are filled in
-// by InitSizes.
-
-#pragma dataflag NOPTR
-int8 runtime·size_to_class8[1024/8 + 1];
-#pragma dataflag NOPTR
-int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
-
-void runtime·testdefersizes(void);
-
-int32
-runtime·SizeToClass(int32 size)
-{
-	if(size > MaxSmallSize)
-		runtime·throw("SizeToClass - invalid size");
-	if(size > 1024-8)
-		return runtime·size_to_class128[(size-1024+127) >> 7];
-	return runtime·size_to_class8[(size+7)>>3];
-}
-
-void
-runtime·InitSizes(void)
-{
-	int32 align, sizeclass, size, nextsize, n;
-	uint32 i;
-	uintptr allocsize, npages;
-
-	// Initialize the runtime·class_to_size table (and choose class sizes in the process).
-	runtime·class_to_size[0] = 0;
-	sizeclass = 1;	// 0 means no class
-	align = 8;
-	for(size = align; size <= MaxSmallSize; size += align) {
-		if((size&(size-1)) == 0) {	// bump alignment once in a while
-			if(size >= 2048)
-				align = 256;
-			else if(size >= 128)
-				align = size / 8;
-			else if(size >= 16)
-				align = 16;	// required for x86 SSE instructions, if we want to use them
-		}
-		if((align&(align-1)) != 0)
-			runtime·throw("InitSizes - bug");
-
-		// Make the allocnpages big enough that
-		// the leftover is less than 1/8 of the total,
-		// so wasted space is at most 12.5%.
-		allocsize = PageSize;
-		while(allocsize%size > allocsize/8)
-			allocsize += PageSize;
-		npages = allocsize >> PageShift;
-
-		// If the previous sizeclass chose the same
-		// allocation size and fit the same number of
-		// objects into the page, we might as well
-		// use just this size instead of having two
-		// different sizes.
-		if(sizeclass > 1 &&
-			npages == runtime·class_to_allocnpages[sizeclass-1] &&
-			allocsize/size == allocsize/runtime·class_to_size[sizeclass-1]) {
-			runtime·class_to_size[sizeclass-1] = size;
-			continue;
-		}
-
-		runtime·class_to_allocnpages[sizeclass] = npages;
-		runtime·class_to_size[sizeclass] = size;
-		sizeclass++;
-	}
-	if(sizeclass != NumSizeClasses) {
-		runtime·printf("sizeclass=%d NumSizeClasses=%d\n", sizeclass, NumSizeClasses);
-		runtime·throw("InitSizes - bad NumSizeClasses");
-	}
-
-	// Initialize the size_to_class tables.
-	nextsize = 0;
-	for (sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
-		for(; nextsize < 1024 && nextsize <= runtime·class_to_size[sizeclass]; nextsize+=8)
-			runtime·size_to_class8[nextsize/8] = sizeclass;
-		if(nextsize >= 1024)
-			for(; nextsize <= runtime·class_to_size[sizeclass]; nextsize += 128)
-				runtime·size_to_class128[(nextsize-1024)/128] = sizeclass;
-	}
-
-	// Double-check SizeToClass.
-	if(0) {
-		for(n=0; n < MaxSmallSize; n++) {
-			sizeclass = runtime·SizeToClass(n);
-			if(sizeclass < 1 || sizeclass >= NumSizeClasses || runtime·class_to_size[sizeclass] < n) {
-				runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
-				runtime·printf("incorrect SizeToClass");
-				goto dump;
-			}
-			if(sizeclass > 1 && runtime·class_to_size[sizeclass-1] >= n) {
-				runtime·printf("size=%d sizeclass=%d runtime·class_to_size=%d\n", n, sizeclass, runtime·class_to_size[sizeclass]);
-				runtime·printf("SizeToClass too big");
-				goto dump;
-			}
-		}
-	}
-
-	runtime·testdefersizes();
-
-	// Copy out for statistics table.
-	for(i=0; i<nelem(runtime·class_to_size); i++)
-		mstats.by_size[i].size = runtime·class_to_size[i];
-	return;
-
-dump:
-	if(1){
-		runtime·printf("NumSizeClasses=%d\n", NumSizeClasses);
-		runtime·printf("runtime·class_to_size:");
-		for(sizeclass=0; sizeclass<NumSizeClasses; sizeclass++)
-			runtime·printf(" %d", runtime·class_to_size[sizeclass]);
-		runtime·printf("\n\n");
-		runtime·printf("size_to_class8:");
-		for(i=0; i<nelem(runtime·size_to_class8); i++)
-			runtime·printf(" %d=>%d(%d)\n", i*8, runtime·size_to_class8[i],
-				runtime·class_to_size[runtime·size_to_class8[i]]);
-		runtime·printf("\n");
-		runtime·printf("size_to_class128:");
-		for(i=0; i<nelem(runtime·size_to_class128); i++)
-			runtime·printf(" %d=>%d(%d)\n", i*128, runtime·size_to_class128[i],
-				runtime·class_to_size[runtime·size_to_class128[i]]);
-		runtime·printf("\n");
-	}
-	runtime·throw("InitSizes failed");
-}
-
-// Returns size of the memory block that mallocgc will allocate if you ask for the size.
-uintptr
-runtime·roundupsize(uintptr size)
-{
-	if(size < MaxSmallSize) {
-		if(size <= 1024-8)
-			return runtime·class_to_size[runtime·size_to_class8[(size+7)>>3]];
-		else
-			return runtime·class_to_size[runtime·size_to_class128[(size-1024+127) >> 7]];
-	}
-	if(size + PageSize < size)
-		return size;
-	return ROUND(size, PageSize);
-}
diff --git a/src/runtime/msize.go b/src/runtime/msize.go
new file mode 100644
index 0000000..aa2b43e
--- /dev/null
+++ b/src/runtime/msize.go
@@ -0,0 +1,174 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Malloc small size classes.
+//
+// See malloc.h for overview.
+//
+// The size classes are chosen so that rounding an allocation
+// request up to the next size class wastes at most 12.5% (1.125x).
+//
+// Each size class has its own page count that gets allocated
+// and chopped up when new objects of the size class are needed.
+// That page count is chosen so that chopping up the run of
+// pages into objects of the given size wastes at most 12.5% (1.125x)
+// of the memory.  It is not necessary that the cutoff here be
+// the same as above.
+//
+// The two sources of waste multiply, so the worst possible case
+// for the above constraints would be that allocations of some
+// size might have a 26.6% (1.266x) overhead.
+// In practice, only one of the wastes comes into play for a
+// given size (sizes < 512 waste mainly on the round-up,
+// sizes > 512 waste mainly on the page chopping).
+//
+// TODO(rsc): Compute max waste for any given size.
+
+package runtime
+
+//var class_to_size [_NumSizeClasses]int32
+//var class_to_allocnpages [_NumSizeClasses]int32
+
+// The SizeToClass lookup is implemented using two arrays,
+// one mapping sizes <= 1024 to their class and one mapping
+// sizes >= 1024 and <= MaxSmallSize to their class.
+// All objects are 8-aligned, so the first array is indexed by
+// the size divided by 8 (rounded up).  Objects >= 1024 bytes
+// are 128-aligned, so the second array is indexed by the
+// size divided by 128 (rounded up).  The arrays are filled in
+// by InitSizes.
+//var size_to_class8 [1024/8 + 1]int8
+//var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
+
+func sizeToClass(size int32) int32 {
+	if size > _MaxSmallSize {
+		gothrow("SizeToClass - invalid size")
+	}
+	if size > 1024-8 {
+		return int32(size_to_class128[(size-1024+127)>>7])
+	}
+	return int32(size_to_class8[(size+7)>>3])
+}
+
+func initSizes() {
+	// Initialize the runtime·class_to_size table (and choose class sizes in the process).
+	class_to_size[0] = 0
+	sizeclass := 1 // 0 means no class
+	align := 8
+	for size := align; size <= _MaxSmallSize; size += align {
+		if size&(size-1) == 0 { // bump alignment once in a while
+			if size >= 2048 {
+				align = 256
+			} else if size >= 128 {
+				align = size / 8
+			} else if size >= 16 {
+				align = 16 // required for x86 SSE instructions, if we want to use them
+			}
+		}
+		if align&(align-1) != 0 {
+			gothrow("InitSizes - bug")
+		}
+
+		// Make the allocnpages big enough that
+		// the leftover is less than 1/8 of the total,
+		// so wasted space is at most 12.5%.
+		allocsize := _PageSize
+		for allocsize%size > allocsize/8 {
+			allocsize += _PageSize
+		}
+		npages := allocsize >> _PageShift
+
+		// If the previous sizeclass chose the same
+		// allocation size and fit the same number of
+		// objects into the page, we might as well
+		// use just this size instead of having two
+		// different sizes.
+		if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
+			class_to_size[sizeclass-1] = int32(size)
+			continue
+		}
+
+		class_to_allocnpages[sizeclass] = int32(npages)
+		class_to_size[sizeclass] = int32(size)
+		sizeclass++
+	}
+	if sizeclass != _NumSizeClasses {
+		print("sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
+		gothrow("InitSizes - bad NumSizeClasses")
+	}
+
+	// Initialize the size_to_class tables.
+	nextsize := 0
+	for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
+		for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
+			size_to_class8[nextsize/8] = int8(sizeclass)
+		}
+		if nextsize >= 1024 {
+			for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
+				size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
+			}
+		}
+	}
+
+	// Double-check SizeToClass.
+	if false {
+		for n := int32(0); n < _MaxSmallSize; n++ {
+			sizeclass := sizeToClass(n)
+			if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
+				print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
+				print("incorrect SizeToClass\n")
+				goto dump
+			}
+			if sizeclass > 1 && class_to_size[sizeclass-1] >= n {
+				print("size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
+				print("SizeToClass too big\n")
+				goto dump
+			}
+		}
+	}
+
+	testdefersizes()
+
+	// Copy out for statistics table.
+	for i := 0; i < len(class_to_size); i++ {
+		memstats.by_size[i].size = uint32(class_to_size[i])
+	}
+	return
+
+dump:
+	if true {
+		print("NumSizeClasses=", _NumSizeClasses, "\n")
+		print("runtime·class_to_size:")
+		for sizeclass = 0; sizeclass < _NumSizeClasses; sizeclass++ {
+			print(" ", class_to_size[sizeclass], "")
+		}
+		print("\n\n")
+		print("size_to_class8:")
+		for i := 0; i < len(size_to_class8); i++ {
+			print(" ", i*8, "=>", size_to_class8[i], "(", class_to_size[size_to_class8[i]], ")\n")
+		}
+		print("\n")
+		print("size_to_class128:")
+		for i := 0; i < len(size_to_class128); i++ {
+			print(" ", i*128, "=>", size_to_class128[i], "(", class_to_size[size_to_class128[i]], ")\n")
+		}
+		print("\n")
+	}
+	gothrow("InitSizes failed")
+}
+
+// Returns size of the memory block that mallocgc will allocate if you ask for the size.
+func roundupsize(size uintptr) uintptr {
+	if size < _MaxSmallSize {
+		if size <= 1024-8 {
+			return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
+		} else {
+			return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
+		}
+	}
+	if size+_PageSize < size {
+		return size
+	}
+	return round(size, _PageSize)
+}
diff --git a/src/runtime/netpoll.go b/src/runtime/netpoll.go
index 3456e02..7a99f18 100644
--- a/src/runtime/netpoll.go
+++ b/src/runtime/netpoll.go
@@ -49,14 +49,14 @@
 	lock    mutex // protectes the following fields
 	fd      uintptr
 	closing bool
-	seq     uintptr        // protects from stale timers and ready notifications
-	rg      uintptr        // pdReady, pdWait, G waiting for read or nil
-	rt      timer          // read deadline timer (set if rt.f != nil)
-	rd      int64          // read deadline
-	wg      uintptr        // pdReady, pdWait, G waiting for write or nil
-	wt      timer          // write deadline timer
-	wd      int64          // write deadline
-	user    unsafe.Pointer // user settable cookie
+	seq     uintptr // protects from stale timers and ready notifications
+	rg      uintptr // pdReady, pdWait, G waiting for read or nil
+	rt      timer   // read deadline timer (set if rt.f != nil)
+	rd      int64   // read deadline
+	wg      uintptr // pdReady, pdWait, G waiting for write or nil
+	wt      timer   // write deadline timer
+	wd      int64   // write deadline
+	user    uint32  // user settable cookie
 }
 
 type pollCache struct {
@@ -72,7 +72,7 @@
 var pollcache pollCache
 
 func netpollServerInit() {
-	onM(netpollinit)
+	netpollinit()
 }
 
 func netpollOpen(fd uintptr) (*pollDesc, int) {
@@ -94,9 +94,7 @@
 	unlock(&pd.lock)
 
 	var errno int32
-	onM(func() {
-		errno = netpollopen(fd, pd)
-	})
+	errno = netpollopen(fd, pd)
 	return pd, int(errno)
 }
 
@@ -110,9 +108,7 @@
 	if pd.rg != 0 && pd.rg != pdReady {
 		gothrow("netpollClose: blocked read on closing descriptor")
 	}
-	onM(func() {
-		netpollclose(uintptr(pd.fd))
-	})
+	netpollclose(uintptr(pd.fd))
 	pollcache.free(pd)
 }
 
@@ -143,9 +139,7 @@
 	}
 	// As for now only Solaris uses level-triggered IO.
 	if GOOS == "solaris" {
-		onM(func() {
-			netpollarm(pd, mode)
-		})
+		netpollarm(pd, mode)
 	}
 	for !netpollblock(pd, int32(mode), false) {
 		err = netpollcheckerr(pd, int32(mode))
@@ -263,26 +257,6 @@
 	}
 }
 
-func netpollfd(pd *pollDesc) uintptr {
-	return pd.fd
-}
-
-func netpolluser(pd *pollDesc) *unsafe.Pointer {
-	return &pd.user
-}
-
-func netpollclosing(pd *pollDesc) bool {
-	return pd.closing
-}
-
-func netpolllock(pd *pollDesc) {
-	lock(&pd.lock)
-}
-
-func netpollunlock(pd *pollDesc) {
-	unlock(&pd.lock)
-}
-
 // make pd ready, newly runnable goroutines (if any) are returned in rg/wg
 func netpollready(gpp **g, pd *pollDesc, mode int32) {
 	var rg, wg *g
@@ -343,8 +317,7 @@
 	// this is necessary because runtime_pollUnblock/runtime_pollSetDeadline/deadlineimpl
 	// do the opposite: store to closing/rd/wd, membarrier, load of rg/wg
 	if waitio || netpollcheckerr(pd, mode) == 0 {
-		f := netpollblockcommit
-		gopark(**(**unsafe.Pointer)(unsafe.Pointer(&f)), unsafe.Pointer(gpp), "IO wait")
+		gopark(netpollblockcommit, unsafe.Pointer(gpp), "IO wait")
 	}
 	// be careful to not lose concurrent READY notification
 	old := xchguintptr(gpp, 0)
diff --git a/src/runtime/netpoll_solaris.c b/src/runtime/netpoll_solaris.c
deleted file mode 100644
index d422719..0000000
--- a/src/runtime/netpoll_solaris.c
+++ /dev/null
@@ -1,264 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-
-// Solaris runtime-integrated network poller.
-// 
-// Solaris uses event ports for scalable network I/O. Event
-// ports are level-triggered, unlike epoll and kqueue which
-// can be configured in both level-triggered and edge-triggered
-// mode. Level triggering means we have to keep track of a few things
-// ourselves. After we receive an event for a file descriptor,
-// it's our responsibility to ask again to be notified for future
-// events for that descriptor. When doing this we must keep track of
-// what kind of events the goroutines are currently interested in,
-// for example a fd may be open both for reading and writing.
-// 
-// A description of the high level operation of this code
-// follows. Networking code will get a file descriptor by some means
-// and will register it with the netpolling mechanism by a code path
-// that eventually calls runtime·netpollopen. runtime·netpollopen
-// calls port_associate with an empty event set. That means that we
-// will not receive any events at this point. The association needs
-// to be done at this early point because we need to process the I/O
-// readiness notification at some point in the future. If I/O becomes
-// ready when nobody is listening, when we finally care about it,
-// nobody will tell us anymore.
-// 
-// Beside calling runtime·netpollopen, the networking code paths
-// will call runtime·netpollarm each time goroutines are interested
-// in doing network I/O. Because now we know what kind of I/O we
-// are interested in (reading/writting), we can call port_associate
-// passing the correct type of event set (POLLIN/POLLOUT). As we made
-// sure to have already associated the file descriptor with the port,
-// when we now call port_associate, we will unblock the main poller
-// loop (in runtime·netpoll) right away if the socket is actually
-// ready for I/O.
-// 
-// The main poller loop runs in its own thread waiting for events
-// using port_getn. When an event happens, it will tell the scheduler
-// about it using runtime·netpollready. Besides doing this, it must
-// also re-associate the events that were not part of this current
-// notification with the file descriptor. Failing to do this would
-// mean each notification will prevent concurrent code using the
-// same file descriptor in parallel.
-// 
-// The logic dealing with re-associations is encapsulated in
-// runtime·netpollupdate. This function takes care to associate the
-// descriptor only with the subset of events that were previously
-// part of the association, except the one that just happened. We
-// can't re-associate with that right away, because event ports
-// are level triggered so it would cause a busy loop. Instead, that
-// association is effected only by the runtime·netpollarm code path,
-// when Go code actually asks for I/O.
-// 
-// The open and arming mechanisms are serialized using the lock
-// inside PollDesc. This is required because the netpoll loop runs
-// asynchonously in respect to other Go code and by the time we get
-// to call port_associate to update the association in the loop, the
-// file descriptor might have been closed and reopened already. The
-// lock allows runtime·netpollupdate to be called synchronously from
-// the loop thread while preventing other threads operating to the
-// same PollDesc, so once we unblock in the main loop, until we loop
-// again we know for sure we are always talking about the same file
-// descriptor and can safely access the data we want (the event set).
-
-#pragma dynimport libc·fcntl fcntl "libc.so"
-#pragma dynimport libc·port_create port_create "libc.so"
-#pragma dynimport libc·port_associate port_associate "libc.so"
-#pragma dynimport libc·port_dissociate port_dissociate "libc.so"
-#pragma dynimport libc·port_getn port_getn "libc.so"
-extern uintptr libc·fcntl;
-extern uintptr libc·port_create;
-extern uintptr libc·port_associate;
-extern uintptr libc·port_dissociate;
-extern uintptr libc·port_getn;
-
-#define errno (*g->m->perrno)
-
-int32
-runtime·fcntl(int32 fd, int32 cmd, uintptr arg)
-{
-	return runtime·sysvicall3(libc·fcntl, (uintptr)fd, (uintptr)cmd, (uintptr)arg);
-}
-
-int32
-runtime·port_create(void)
-{
-	return runtime·sysvicall0(libc·port_create);
-}
-
-int32
-runtime·port_associate(int32 port, int32 source, uintptr object, int32 events, uintptr user)
-{
-	return runtime·sysvicall5(libc·port_associate, (uintptr)port, (uintptr)source, object, (uintptr)events, user);
-}
-
-int32
-runtime·port_dissociate(int32 port, int32 source, uintptr object)
-{
-	return runtime·sysvicall3(libc·port_dissociate, (uintptr)port, (uintptr)source, object);
-}
-
-int32
-runtime·port_getn(int32 port, PortEvent *evs, uint32 max, uint32 *nget, Timespec *timeout)
-{
-	return runtime·sysvicall5(libc·port_getn, (uintptr)port, (uintptr)evs, (uintptr)max, (uintptr)nget, (uintptr)timeout);
-}
-
-static int32 portfd = -1;
-
-void
-runtime·netpollinit(void)
-{
-	if((portfd = runtime·port_create()) >= 0) {
-		runtime·fcntl(portfd, F_SETFD, FD_CLOEXEC);
-		return;
-	}
-
-	runtime·printf("netpollinit: failed to create port (%d)\n", errno);
-	runtime·throw("netpollinit: failed to create port");
-}
-
-int32
-runtime·netpollopen(uintptr fd, PollDesc *pd)
-{
-	int32 r;
-
-	runtime·netpolllock(pd);
-	// We don't register for any specific type of events yet, that's
-	// netpollarm's job. We merely ensure we call port_associate before
-	// asynchonous connect/accept completes, so when we actually want
-	// to do any I/O, the call to port_associate (from netpollarm,
-	// with the interested event set) will unblock port_getn right away
-	// because of the I/O readiness notification.
-	*runtime·netpolluser(pd) = 0;
-	r = runtime·port_associate(portfd, PORT_SOURCE_FD, fd, 0, (uintptr)pd);
-	runtime·netpollunlock(pd);
-	return r;
-}
-
-int32
-runtime·netpollclose(uintptr fd)
-{
-	return runtime·port_dissociate(portfd, PORT_SOURCE_FD, fd);
-}
-
-// Updates the association with a new set of interested events. After
-// this call, port_getn will return one and only one event for that
-// particular descriptor, so this function needs to be called again.
-void
-runtime·netpollupdate(PollDesc* pd, uint32 set, uint32 clear)
-{
-	uint32 *ep, old, events;
-	uintptr fd = runtime·netpollfd(pd);
-	ep = (uint32*)runtime·netpolluser(pd);
-
-	if(runtime·netpollclosing(pd))
-		return;
-
-	old = *ep;
-	events = (old & ~clear) | set;
-	if(old == events)
-		return;
-
-	if(events && runtime·port_associate(portfd, PORT_SOURCE_FD, fd, events, (uintptr)pd) != 0) {
-		runtime·printf("netpollupdate: failed to associate (%d)\n", errno);
-		runtime·throw("netpollupdate: failed to associate");
-	} 
-	*ep = events;
-}
-
-// subscribe the fd to the port such that port_getn will return one event.
-void
-runtime·netpollarm(PollDesc* pd, int32 mode)
-{
-	runtime·netpolllock(pd);
-	switch(mode) {
-	case 'r':
-		runtime·netpollupdate(pd, POLLIN, 0);
-		break;
-	case 'w':
-		runtime·netpollupdate(pd, POLLOUT, 0);
-		break;
-	default:
-		runtime·throw("netpollarm: bad mode");
-	}
-	runtime·netpollunlock(pd);
-}
-
-// polls for ready network connections
-// returns list of goroutines that become runnable
-G*
-runtime·netpoll(bool block)
-{
-	static int32 lasterr;
-	PortEvent events[128], *ev;
-	PollDesc *pd;
-	int32 i, mode, clear;
-	uint32 n;
-	Timespec *wait = nil, zero;
-	G *gp;
-
-	if(portfd == -1)
-		return (nil);
-
-	if(!block) {
-		zero.tv_sec = 0;
-		zero.tv_nsec = 0;
-		wait = &zero;
-	}
-
-retry:
-	n = 1;
-	if(runtime·port_getn(portfd, events, nelem(events), &n, wait) < 0) {
-		if(errno != EINTR && errno != lasterr) {
-			lasterr = errno;
-			runtime·printf("runtime: port_getn on fd %d failed with %d\n", portfd, errno);
-		}
-		goto retry;
-	}
-
-	gp = nil;
-	for(i = 0; i < n; i++) {
-		ev = &events[i];
-
-		if(ev->portev_events == 0)
-			continue;
-		pd = (PollDesc *)ev->portev_user;
-
-		mode = 0;
-		clear = 0;
-		if(ev->portev_events & (POLLIN|POLLHUP|POLLERR)) {
-			mode += 'r';
-			clear |= POLLIN;
-		}
-		if(ev->portev_events & (POLLOUT|POLLHUP|POLLERR)) {
-			mode += 'w';
-			clear |= POLLOUT;
-		}
-		// To effect edge-triggered events, we need to be sure to
-		// update our association with whatever events were not
-		// set with the event. For example if we are registered
-		// for POLLIN|POLLOUT, and we get POLLIN, besides waking
-		// the goroutine interested in POLLIN we have to not forget
-		// about the one interested in POLLOUT.
-		if(clear != 0) {
-			runtime·netpolllock(pd);
-			runtime·netpollupdate(pd, 0, clear);
-			runtime·netpollunlock(pd);
-		}
-
-		if(mode)
-			runtime·netpollready(&gp, pd, mode);
-	}
-
-	if(block && gp == nil)
-		goto retry;
-	return gp;
-}
diff --git a/src/runtime/netpoll_solaris.go b/src/runtime/netpoll_solaris.go
new file mode 100644
index 0000000..40e8a1a
--- /dev/null
+++ b/src/runtime/netpoll_solaris.go
@@ -0,0 +1,243 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Solaris runtime-integrated network poller.
+//
+// Solaris uses event ports for scalable network I/O. Event
+// ports are level-triggered, unlike epoll and kqueue which
+// can be configured in both level-triggered and edge-triggered
+// mode. Level triggering means we have to keep track of a few things
+// ourselves. After we receive an event for a file descriptor,
+// it's our responsibility to ask again to be notified for future
+// events for that descriptor. When doing this we must keep track of
+// what kind of events the goroutines are currently interested in,
+// for example a fd may be open both for reading and writing.
+//
+// A description of the high level operation of this code
+// follows. Networking code will get a file descriptor by some means
+// and will register it with the netpolling mechanism by a code path
+// that eventually calls runtime·netpollopen. runtime·netpollopen
+// calls port_associate with an empty event set. That means that we
+// will not receive any events at this point. The association needs
+// to be done at this early point because we need to process the I/O
+// readiness notification at some point in the future. If I/O becomes
+// ready when nobody is listening, when we finally care about it,
+// nobody will tell us anymore.
+//
+// Beside calling runtime·netpollopen, the networking code paths
+// will call runtime·netpollarm each time goroutines are interested
+// in doing network I/O. Because now we know what kind of I/O we
+// are interested in (reading/writting), we can call port_associate
+// passing the correct type of event set (POLLIN/POLLOUT). As we made
+// sure to have already associated the file descriptor with the port,
+// when we now call port_associate, we will unblock the main poller
+// loop (in runtime·netpoll) right away if the socket is actually
+// ready for I/O.
+//
+// The main poller loop runs in its own thread waiting for events
+// using port_getn. When an event happens, it will tell the scheduler
+// about it using runtime·netpollready. Besides doing this, it must
+// also re-associate the events that were not part of this current
+// notification with the file descriptor. Failing to do this would
+// mean each notification will prevent concurrent code using the
+// same file descriptor in parallel.
+//
+// The logic dealing with re-associations is encapsulated in
+// runtime·netpollupdate. This function takes care to associate the
+// descriptor only with the subset of events that were previously
+// part of the association, except the one that just happened. We
+// can't re-associate with that right away, because event ports
+// are level triggered so it would cause a busy loop. Instead, that
+// association is effected only by the runtime·netpollarm code path,
+// when Go code actually asks for I/O.
+//
+// The open and arming mechanisms are serialized using the lock
+// inside PollDesc. This is required because the netpoll loop runs
+// asynchonously in respect to other Go code and by the time we get
+// to call port_associate to update the association in the loop, the
+// file descriptor might have been closed and reopened already. The
+// lock allows runtime·netpollupdate to be called synchronously from
+// the loop thread while preventing other threads operating to the
+// same PollDesc, so once we unblock in the main loop, until we loop
+// again we know for sure we are always talking about the same file
+// descriptor and can safely access the data we want (the event set).
+
+//go:cgo_import_dynamic libc_port_create port_create "libc.so"
+//go:cgo_import_dynamic libc_port_associate port_associate "libc.so"
+//go:cgo_import_dynamic libc_port_dissociate port_dissociate "libc.so"
+//go:cgo_import_dynamic libc_port_getn port_getn "libc.so"
+
+//go:linkname libc_port_create libc_port_create
+//go:linkname libc_port_associate libc_port_associate
+//go:linkname libc_port_dissociate libc_port_dissociate
+//go:linkname libc_port_getn libc_port_getn
+
+var (
+	libc_port_create,
+	libc_port_associate,
+	libc_port_dissociate,
+	libc_port_getn libcFunc
+)
+
+func errno() int32 {
+	return *getg().m.perrno
+}
+
+func fcntl(fd, cmd int32, arg uintptr) int32 {
+	return int32(sysvicall3(libc_fcntl, uintptr(fd), uintptr(cmd), arg))
+}
+
+func port_create() int32 {
+	return int32(sysvicall0(libc_port_create))
+}
+
+func port_associate(port, source int32, object uintptr, events uint32, user uintptr) int32 {
+	return int32(sysvicall5(libc_port_associate, uintptr(port), uintptr(source), object, uintptr(events), user))
+}
+
+func port_dissociate(port, source int32, object uintptr) int32 {
+	return int32(sysvicall3(libc_port_dissociate, uintptr(port), uintptr(source), object))
+}
+
+func port_getn(port int32, evs *portevent, max uint32, nget *uint32, timeout *timespec) int32 {
+	return int32(sysvicall5(libc_port_getn, uintptr(port), uintptr(unsafe.Pointer(evs)), uintptr(max), uintptr(unsafe.Pointer(nget)), uintptr(unsafe.Pointer(timeout))))
+}
+
+var portfd int32 = -1
+
+func netpollinit() {
+	portfd = port_create()
+	if portfd >= 0 {
+		fcntl(portfd, _F_SETFD, _FD_CLOEXEC)
+		return
+	}
+
+	print("netpollinit: failed to create port (", errno(), ")\n")
+	gothrow("netpollinit: failed to create port")
+}
+
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	lock(&pd.lock)
+	// We don't register for any specific type of events yet, that's
+	// netpollarm's job. We merely ensure we call port_associate before
+	// asynchonous connect/accept completes, so when we actually want
+	// to do any I/O, the call to port_associate (from netpollarm,
+	// with the interested event set) will unblock port_getn right away
+	// because of the I/O readiness notification.
+	pd.user = 0
+	r := port_associate(portfd, _PORT_SOURCE_FD, fd, 0, uintptr(unsafe.Pointer(pd)))
+	unlock(&pd.lock)
+	return r
+}
+
+func netpollclose(fd uintptr) int32 {
+	return port_dissociate(portfd, _PORT_SOURCE_FD, fd)
+}
+
+// Updates the association with a new set of interested events. After
+// this call, port_getn will return one and only one event for that
+// particular descriptor, so this function needs to be called again.
+func netpollupdate(pd *pollDesc, set, clear uint32) {
+	if pd.closing {
+		return
+	}
+
+	old := pd.user
+	events := (old & ^clear) | set
+	if old == events {
+		return
+	}
+
+	if events != 0 && port_associate(portfd, _PORT_SOURCE_FD, pd.fd, events, uintptr(unsafe.Pointer(pd))) != 0 {
+		print("netpollupdate: failed to associate (", errno(), ")\n")
+		gothrow("netpollupdate: failed to associate")
+	}
+	pd.user = events
+}
+
+// subscribe the fd to the port such that port_getn will return one event.
+func netpollarm(pd *pollDesc, mode int) {
+	lock(&pd.lock)
+	switch mode {
+	case 'r':
+		netpollupdate(pd, _POLLIN, 0)
+	case 'w':
+		netpollupdate(pd, _POLLOUT, 0)
+	default:
+		gothrow("netpollarm: bad mode")
+	}
+	unlock(&pd.lock)
+}
+
+// netpolllasterr holds the last error code returned by port_getn to prevent log spamming
+var netpolllasterr int32
+
+// polls for ready network connections
+// returns list of goroutines that become runnable
+func netpoll(block bool) (gp *g) {
+	if portfd == -1 {
+		return
+	}
+
+	var wait *timespec
+	var zero timespec
+	if !block {
+		wait = &zero
+	}
+
+	var events [128]portevent
+retry:
+	var n uint32 = 1
+	if port_getn(portfd, &events[0], uint32(len(events)), &n, wait) < 0 {
+		if e := errno(); e != _EINTR && e != netpolllasterr {
+			netpolllasterr = e
+			print("runtime: port_getn on fd ", portfd, " failed with ", e, "\n")
+		}
+		goto retry
+	}
+
+	gp = nil
+	for i := 0; i < int(n); i++ {
+		ev := &events[i]
+
+		if ev.portev_events == 0 {
+			continue
+		}
+		pd := (*pollDesc)(unsafe.Pointer(ev.portev_user))
+
+		var mode, clear int32
+		if (ev.portev_events & (_POLLIN | _POLLHUP | _POLLERR)) != 0 {
+			mode += 'r'
+			clear |= _POLLIN
+		}
+		if (ev.portev_events & (_POLLOUT | _POLLHUP | _POLLERR)) != 0 {
+			mode += 'w'
+			clear |= _POLLOUT
+		}
+		// To effect edge-triggered events, we need to be sure to
+		// update our association with whatever events were not
+		// set with the event. For example if we are registered
+		// for POLLIN|POLLOUT, and we get POLLIN, besides waking
+		// the goroutine interested in POLLIN we have to not forget
+		// about the one interested in POLLOUT.
+		if clear != 0 {
+			lock(&pd.lock)
+			netpollupdate(pd, 0, uint32(clear))
+			unlock(&pd.lock)
+		}
+
+		if mode != 0 {
+			netpollready((**g)(noescape(unsafe.Pointer(&gp))), pd, mode)
+		}
+	}
+
+	if block && gp == nil {
+		goto retry
+	}
+	return gp
+}
diff --git a/src/runtime/netpoll_windows.c b/src/runtime/netpoll_windows.c
deleted file mode 100644
index 64da41a..0000000
--- a/src/runtime/netpoll_windows.c
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-
-#define DWORD_MAX 0xffffffff
-
-#pragma dynimport runtime·CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
-#pragma dynimport runtime·GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
-#pragma dynimport runtime·WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
-
-extern void *runtime·CreateIoCompletionPort;
-extern void *runtime·GetQueuedCompletionStatus;
-extern void *runtime·WSAGetOverlappedResult;
-
-#define INVALID_HANDLE_VALUE ((uintptr)-1)
-
-// net_op must be the same as beginning of net.operation. Keep these in sync.
-typedef struct net_op net_op;
-struct net_op
-{
-	// used by windows
-	Overlapped	o;
-	// used by netpoll
-	PollDesc*	pd;
-	int32	mode;
-	int32	errno;
-	uint32	qty;
-};
-
-typedef struct OverlappedEntry OverlappedEntry;
-struct OverlappedEntry
-{
-	uintptr	key;
-	net_op*	op;  // In reality it's Overlapped*, but we cast it to net_op* anyway.
-	uintptr	internal;
-	uint32	qty;
-};
-
-static void handlecompletion(G **gpp, net_op *o, int32 errno, uint32 qty);
-
-static uintptr iocphandle = INVALID_HANDLE_VALUE;  // completion port io handle
-
-void
-runtime·netpollinit(void)
-{
-	iocphandle = (uintptr)runtime·stdcall4(runtime·CreateIoCompletionPort, INVALID_HANDLE_VALUE, 0, 0, DWORD_MAX);
-	if(iocphandle == 0) {
-		runtime·printf("netpoll: failed to create iocp handle (errno=%d)\n", runtime·getlasterror());
-		runtime·throw("netpoll: failed to create iocp handle");
-	}
-	return;
-}
-
-int32
-runtime·netpollopen(uintptr fd, PollDesc *pd)
-{
-	USED(pd);
-	if(runtime·stdcall4(runtime·CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0)
-		return -runtime·getlasterror();
-	return 0;
-}
-
-int32
-runtime·netpollclose(uintptr fd)
-{
-	// nothing to do
-	USED(fd);
-	return 0;
-}
-
-void
-runtime·netpollarm(PollDesc* pd, int32 mode)
-{
-	USED(pd, mode);
-	runtime·throw("unused");
-}
-
-// Polls for completed network IO.
-// Returns list of goroutines that become runnable.
-G*
-runtime·netpoll(bool block)
-{
-	OverlappedEntry entries[64];
-	uint32 wait, qty, key, flags, n, i;
-	int32 errno;
-	net_op *op;
-	G *gp;
-
-	if(iocphandle == INVALID_HANDLE_VALUE)
-		return nil;
-	gp = nil;
-	wait = 0;
-	if(block)
-		wait = INFINITE;
-retry:
-	if(runtime·GetQueuedCompletionStatusEx != nil) {
-		n = nelem(entries) / runtime·gomaxprocs;
-		if(n < 8)
-			n = 8;
-		if(block)
-			g->m->blocked = true;
-		if(runtime·stdcall6(runtime·GetQueuedCompletionStatusEx, iocphandle, (uintptr)entries, n, (uintptr)&n, wait, 0) == 0) {
-			g->m->blocked = false;
-			errno = runtime·getlasterror();
-			if(!block && errno == WAIT_TIMEOUT)
-				return nil;
-			runtime·printf("netpoll: GetQueuedCompletionStatusEx failed (errno=%d)\n", errno);
-			runtime·throw("netpoll: GetQueuedCompletionStatusEx failed");
-		}
-		g->m->blocked = false;
-		for(i = 0; i < n; i++) {
-			op = entries[i].op;
-			errno = 0;
-			qty = 0;
-			if(runtime·stdcall5(runtime·WSAGetOverlappedResult, runtime·netpollfd(op->pd), (uintptr)op, (uintptr)&qty, 0, (uintptr)&flags) == 0)
-				errno = runtime·getlasterror();
-			handlecompletion(&gp, op, errno, qty);
-		}
-	} else {
-		op = nil;
-		errno = 0;
-		qty = 0;
-		if(block)
-			g->m->blocked = true;
-		if(runtime·stdcall5(runtime·GetQueuedCompletionStatus, iocphandle, (uintptr)&qty, (uintptr)&key, (uintptr)&op, wait) == 0) {
-			g->m->blocked = false;
-			errno = runtime·getlasterror();
-			if(!block && errno == WAIT_TIMEOUT)
-				return nil;
-			if(op == nil) {
-				runtime·printf("netpoll: GetQueuedCompletionStatus failed (errno=%d)\n", errno);
-				runtime·throw("netpoll: GetQueuedCompletionStatus failed");
-			}
-			// dequeued failed IO packet, so report that
-		}
-		g->m->blocked = false;
-		handlecompletion(&gp, op, errno, qty);
-	}
-	if(block && gp == nil)
-		goto retry;
-	return gp;
-}
-
-static void
-handlecompletion(G **gpp, net_op *op, int32 errno, uint32 qty)
-{
-	int32 mode;
-
-	if(op == nil)
-		runtime·throw("netpoll: GetQueuedCompletionStatus returned op == nil");
-	mode = op->mode;
-	if(mode != 'r' && mode != 'w') {
-		runtime·printf("netpoll: GetQueuedCompletionStatus returned invalid mode=%d\n", mode);
-		runtime·throw("netpoll: GetQueuedCompletionStatus returned invalid mode");
-	}
-	op->errno = errno;
-	op->qty = qty;
-	runtime·netpollready(gpp, op->pd, mode);
-}
diff --git a/src/runtime/netpoll_windows.go b/src/runtime/netpoll_windows.go
new file mode 100644
index 0000000..88e8781
--- /dev/null
+++ b/src/runtime/netpoll_windows.go
@@ -0,0 +1,156 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+const _DWORD_MAX = 0xffffffff
+
+//go:cgo_import_dynamic runtime._CreateIoCompletionPort CreateIoCompletionPort "kernel32.dll"
+//go:cgo_import_dynamic runtime._GetQueuedCompletionStatus GetQueuedCompletionStatus "kernel32.dll"
+//go:cgo_import_dynamic runtime._WSAGetOverlappedResult WSAGetOverlappedResult "ws2_32.dll"
+
+var (
+	_CreateIoCompletionPort,
+	_GetQueuedCompletionStatus,
+	_WSAGetOverlappedResult stdFunction
+)
+
+const _INVALID_HANDLE_VALUE = ^uintptr(0)
+
+// net_op must be the same as beginning of net.operation. Keep these in sync.
+type net_op struct {
+	// used by windows
+	o overlapped
+	// used by netpoll
+	pd    *pollDesc
+	mode  int32
+	errno int32
+	qty   uint32
+}
+
+type overlappedEntry struct {
+	key      uintptr
+	op       *net_op // In reality it's *overlapped, but we cast it to *net_op anyway.
+	internal uintptr
+	qty      uint32
+}
+
+var iocphandle uintptr = _INVALID_HANDLE_VALUE // completion port io handle
+
+func netpollinit() {
+	iocphandle = uintptr(stdcall4(_CreateIoCompletionPort, _INVALID_HANDLE_VALUE, 0, 0, _DWORD_MAX))
+	if iocphandle == 0 {
+		println("netpoll: failed to create iocp handle (errno=", getlasterror(), ")")
+		gothrow("netpoll: failed to create iocp handle")
+	}
+}
+
+func netpollopen(fd uintptr, pd *pollDesc) int32 {
+	if stdcall4(_CreateIoCompletionPort, fd, iocphandle, 0, 0) == 0 {
+		return -int32(getlasterror())
+	}
+	return 0
+}
+
+func netpollclose(fd uintptr) int32 {
+	// nothing to do
+	return 0
+}
+
+func netpollarm(pd *pollDesc, mode int) {
+	gothrow("unused")
+}
+
+// Polls for completed network IO.
+// Returns list of goroutines that become runnable.
+func netpoll(block bool) *g {
+	var entries [64]overlappedEntry
+	var wait, qty, key, flags, n, i uint32
+	var errno int32
+	var op *net_op
+	var gp *g
+
+	mp := getg().m
+
+	if iocphandle == _INVALID_HANDLE_VALUE {
+		return nil
+	}
+	gp = nil
+	wait = 0
+	if block {
+		wait = _INFINITE
+	}
+retry:
+	if _GetQueuedCompletionStatusEx != nil {
+		n = uint32(len(entries) / int(gomaxprocs))
+		if n < 8 {
+			n = 8
+		}
+		if block {
+			mp.blocked = true
+		}
+		if stdcall6(_GetQueuedCompletionStatusEx, iocphandle, uintptr(unsafe.Pointer(&entries[0])), uintptr(n), uintptr(unsafe.Pointer(&n)), uintptr(wait), 0) == 0 {
+			mp.blocked = false
+			errno = int32(getlasterror())
+			if !block && errno == _WAIT_TIMEOUT {
+				return nil
+			}
+			println("netpoll: GetQueuedCompletionStatusEx failed (errno=", errno, ")")
+			gothrow("netpoll: GetQueuedCompletionStatusEx failed")
+		}
+		mp.blocked = false
+		for i = 0; i < n; i++ {
+			op = entries[i].op
+			errno = 0
+			qty = 0
+			if stdcall5(_WSAGetOverlappedResult, netpollfd(op.pd), uintptr(unsafe.Pointer(op)), uintptr(unsafe.Pointer(&qty)), 0, uintptr(unsafe.Pointer(&flags))) == 0 {
+				errno = int32(getlasterror())
+			}
+			handlecompletion(&gp, op, errno, qty)
+		}
+	} else {
+		op = nil
+		errno = 0
+		qty = 0
+		if block {
+			mp.blocked = true
+		}
+		if stdcall5(_GetQueuedCompletionStatus, iocphandle, uintptr(unsafe.Pointer(&qty)), uintptr(unsafe.Pointer(&key)), uintptr(unsafe.Pointer(&op)), uintptr(wait)) == 0 {
+			mp.blocked = false
+			errno = int32(getlasterror())
+			if !block && errno == _WAIT_TIMEOUT {
+				return nil
+			}
+			if op == nil {
+				println("netpoll: GetQueuedCompletionStatus failed (errno=", errno, ")")
+				gothrow("netpoll: GetQueuedCompletionStatus failed")
+			}
+			// dequeued failed IO packet, so report that
+		}
+		mp.blocked = false
+		handlecompletion(&gp, op, errno, qty)
+	}
+	if block && gp == nil {
+		goto retry
+	}
+	return gp
+}
+
+func handlecompletion(gpp **g, op *net_op, errno int32, qty uint32) {
+	if op == nil {
+		gothrow("netpoll: GetQueuedCompletionStatus returned op == nil")
+	}
+	mode := op.mode
+	if mode != 'r' && mode != 'w' {
+		println("netpoll: GetQueuedCompletionStatus returned invalid mode=", mode)
+		gothrow("netpoll: GetQueuedCompletionStatus returned invalid mode")
+	}
+	op.errno = errno
+	op.qty = qty
+	netpollready(gpp, op.pd, mode)
+}
diff --git a/src/runtime/norace_test.go b/src/runtime/norace_test.go
index 3b17187..3681bf1 100644
--- a/src/runtime/norace_test.go
+++ b/src/runtime/norace_test.go
@@ -34,12 +34,12 @@
 	b.RunParallel(func(pb *testing.PB) {
 		foo := 42
 		for pb.Next() {
-			runtime.Entersyscall()
+			runtime.Entersyscall(0)
 			for i := 0; i < work; i++ {
 				foo *= 2
 				foo /= 2
 			}
-			runtime.Exitsyscall()
+			runtime.Exitsyscall(0)
 		}
 		_ = foo
 	})
diff --git a/src/runtime/os1_darwin.go b/src/runtime/os1_darwin.go
new file mode 100644
index 0000000..2fbf2ca
--- /dev/null
+++ b/src/runtime/os1_darwin.go
@@ -0,0 +1,423 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//extern SigTabTT runtime·sigtab[];
+
+var sigset_none = uint32(0)
+var sigset_all = ^uint32(0)
+
+func unimplemented(name string) {
+	println(name, "not implemented")
+	*(*int)(unsafe.Pointer(uintptr(1231))) = 1231
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	mach_semrelease(uint32(mp.waitsema))
+}
+
+//go:nosplit
+func semacreate() uintptr {
+	var x uintptr
+	systemstack(func() {
+		x = uintptr(mach_semcreate())
+	})
+	return x
+}
+
+// BSD interface for threading.
+func osinit() {
+	// bsdthread_register delayed until end of goenvs so that we
+	// can look at the environment first.
+
+	// Use sysctl to fetch hw.ncpu.
+	mib := [2]uint32{6, 3}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 {
+		ncpu = int32(out)
+	}
+}
+
+var urandom_data [_HashRandomBytes]byte
+var urandom_dev = []byte("/dev/random\x00")
+
+//go:nosplit
+func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes {
+		*rnd = unsafe.Pointer(&urandom_data[0])
+		*rnd_len = _HashRandomBytes
+	} else {
+		*rnd = nil
+		*rnd_len = 0
+	}
+	close(fd)
+}
+
+func goenvs() {
+	goenvs_unix()
+
+	// Register our thread-creation callback (see sys_darwin_{amd64,386}.s)
+	// but only if we're not using cgo.  If we are using cgo we need
+	// to let the C pthread library install its own thread-creation callback.
+	if !iscgo {
+		if bsdthread_register() != 0 {
+			if gogetenv("DYLD_INSERT_LIBRARIES") != "" {
+				gothrow("runtime: bsdthread_register error (unset DYLD_INSERT_LIBRARIES)")
+			}
+			gothrow("runtime: bsdthread_register error")
+		}
+	}
+}
+
+func newosproc(mp *m, stk unsafe.Pointer) {
+	mp.tls[0] = uintptr(mp.id) // so 386 asm can find it
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, "/", int(mp.tls[0]), " ostk=", &mp, "\n")
+	}
+
+	var oset uint32
+	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
+	errno := bsdthread_create(stk, mp, mp.g0, funcPC(mstart))
+	sigprocmask(_SIG_SETMASK, &oset, nil)
+
+	if errno < 0 {
+		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -errno, ")\n")
+		gothrow("runtime.newosproc")
+	}
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024) // OS X wants >= 8K
+	mp.gsignal.m = mp
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+func minit() {
+	// Initialize signal handling.
+	_g_ := getg()
+	signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
+	sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+	signalstack(nil, 0)
+}
+
+// Mach IPC, to get at semaphores
+// Definitions are in /usr/include/mach on a Mac.
+
+func macherror(r int32, fn string) {
+	print("mach error ", fn, ": ", r, "\n")
+	gothrow("mach error")
+}
+
+const _DebugMach = false
+
+var zerondr machndr
+
+func mach_msgh_bits(a, b uint32) uint32 {
+	return a | b<<8
+}
+
+func mach_msg(h *machheader, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32 {
+	// TODO: Loop on interrupt.
+	return mach_msg_trap(unsafe.Pointer(h), op, send_size, rcv_size, rcv_name, timeout, notify)
+}
+
+// Mach RPC (MIG)
+const (
+	_MinMachMsg = 48
+	_MachReply  = 100
+)
+
+type codemsg struct {
+	h    machheader
+	ndr  machndr
+	code int32
+}
+
+func machcall(h *machheader, maxsize int32, rxsize int32) int32 {
+	_g_ := getg()
+	port := _g_.m.machport
+	if port == 0 {
+		port = mach_reply_port()
+		_g_.m.machport = port
+	}
+
+	h.msgh_bits |= mach_msgh_bits(_MACH_MSG_TYPE_COPY_SEND, _MACH_MSG_TYPE_MAKE_SEND_ONCE)
+	h.msgh_local_port = port
+	h.msgh_reserved = 0
+	id := h.msgh_id
+
+	if _DebugMach {
+		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
+		print("send:\t")
+		var i uint32
+		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
+			print(" ", p[i])
+			if i%8 == 7 {
+				print("\n\t")
+			}
+		}
+		if i%8 != 0 {
+			print("\n")
+		}
+	}
+	ret := mach_msg(h, _MACH_SEND_MSG|_MACH_RCV_MSG, h.msgh_size, uint32(maxsize), port, 0, 0)
+	if ret != 0 {
+		if _DebugMach {
+			print("mach_msg error ", ret, "\n")
+		}
+		return ret
+	}
+	if _DebugMach {
+		p := (*[10000]unsafe.Pointer)(unsafe.Pointer(h))
+		var i uint32
+		for i = 0; i < h.msgh_size/uint32(unsafe.Sizeof(p[0])); i++ {
+			print(" ", p[i])
+			if i%8 == 7 {
+				print("\n\t")
+			}
+		}
+		if i%8 != 0 {
+			print("\n")
+		}
+	}
+	if h.msgh_id != id+_MachReply {
+		if _DebugMach {
+			print("mach_msg _MachReply id mismatch ", h.msgh_id, " != ", id+_MachReply, "\n")
+		}
+		return -303 // MIG_REPLY_MISMATCH
+	}
+	// Look for a response giving the return value.
+	// Any call can send this back with an error,
+	// and some calls only have return values so they
+	// send it back on success too.  I don't quite see how
+	// you know it's one of these and not the full response
+	// format, so just look if the message is right.
+	c := (*codemsg)(unsafe.Pointer(h))
+	if uintptr(h.msgh_size) == unsafe.Sizeof(*c) && h.msgh_bits&_MACH_MSGH_BITS_COMPLEX == 0 {
+		if _DebugMach {
+			print("mig result ", c.code, "\n")
+		}
+		return c.code
+	}
+	if h.msgh_size != uint32(rxsize) {
+		if _DebugMach {
+			print("mach_msg _MachReply size mismatch ", h.msgh_size, " != ", rxsize, "\n")
+		}
+		return -307 // MIG_ARRAY_TOO_LARGE
+	}
+	return 0
+}
+
+// Semaphores!
+
+const (
+	tmach_semcreate = 3418
+	rmach_semcreate = tmach_semcreate + _MachReply
+
+	tmach_semdestroy = 3419
+	rmach_semdestroy = tmach_semdestroy + _MachReply
+
+	_KERN_ABORTED             = 14
+	_KERN_OPERATION_TIMED_OUT = 49
+)
+
+type tmach_semcreatemsg struct {
+	h      machheader
+	ndr    machndr
+	policy int32
+	value  int32
+}
+
+type rmach_semcreatemsg struct {
+	h         machheader
+	body      machbody
+	semaphore machport
+}
+
+type tmach_semdestroymsg struct {
+	h         machheader
+	body      machbody
+	semaphore machport
+}
+
+func mach_semcreate() uint32 {
+	var m [256]uint8
+	tx := (*tmach_semcreatemsg)(unsafe.Pointer(&m))
+	rx := (*rmach_semcreatemsg)(unsafe.Pointer(&m))
+
+	tx.h.msgh_bits = 0
+	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
+	tx.h.msgh_remote_port = mach_task_self()
+	tx.h.msgh_id = tmach_semcreate
+	tx.ndr = zerondr
+
+	tx.policy = 0 // 0 = SYNC_POLICY_FIFO
+	tx.value = 0
+
+	for {
+		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), int32(unsafe.Sizeof(*rx)))
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+		macherror(r, "semaphore_create")
+	}
+	if rx.body.msgh_descriptor_count != 1 {
+		unimplemented("mach_semcreate desc count")
+	}
+	return rx.semaphore.name
+}
+
+func mach_semdestroy(sem uint32) {
+	var m [256]uint8
+	tx := (*tmach_semdestroymsg)(unsafe.Pointer(&m))
+
+	tx.h.msgh_bits = _MACH_MSGH_BITS_COMPLEX
+	tx.h.msgh_size = uint32(unsafe.Sizeof(*tx))
+	tx.h.msgh_remote_port = mach_task_self()
+	tx.h.msgh_id = tmach_semdestroy
+	tx.body.msgh_descriptor_count = 1
+	tx.semaphore.name = sem
+	tx.semaphore.disposition = _MACH_MSG_TYPE_MOVE_SEND
+	tx.semaphore._type = 0
+
+	for {
+		r := machcall(&tx.h, int32(unsafe.Sizeof(m)), 0)
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+		macherror(r, "semaphore_destroy")
+	}
+}
+
+// The other calls have simple system call traps in sys_darwin_{amd64,386}.s
+
+func mach_semaphore_wait(sema uint32) int32
+func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
+func mach_semaphore_signal(sema uint32) int32
+func mach_semaphore_signal_all(sema uint32) int32
+
+func semasleep1(ns int64) int32 {
+	_g_ := getg()
+
+	if ns >= 0 {
+		var nsecs int32
+		secs := timediv(ns, 1000000000, &nsecs)
+		r := mach_semaphore_timedwait(uint32(_g_.m.waitsema), uint32(secs), uint32(nsecs))
+		if r == _KERN_ABORTED || r == _KERN_OPERATION_TIMED_OUT {
+			return -1
+		}
+		if r != 0 {
+			macherror(r, "semaphore_wait")
+		}
+		return 0
+	}
+
+	for {
+		r := mach_semaphore_wait(uint32(_g_.m.waitsema))
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+		macherror(r, "semaphore_wait")
+	}
+	return 0
+}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	var r int32
+	systemstack(func() {
+		r = semasleep1(ns)
+	})
+	return r
+}
+
+//go:nosplit
+func mach_semrelease(sem uint32) {
+	for {
+		r := mach_semaphore_signal(sem)
+		if r == 0 {
+			break
+		}
+		if r == _KERN_ABORTED { // interrupted
+			continue
+		}
+
+		// mach_semrelease must be completely nosplit,
+		// because it is called from Go code.
+		// If we're going to die, start that process on the system stack
+		// to avoid a Go stack split.
+		systemstack(func() { macherror(r, "semaphore_signal") })
+	}
+}
+
+//go:nosplit
+func osyield() {
+	usleep(1)
+}
+
+func memlimit() uintptr {
+	// NOTE(rsc): Could use getrlimit here,
+	// like on FreeBSD or Linux, but Darwin doesn't enforce
+	// ulimit -v, so it's unclear why we'd try to stay within
+	// the limit.
+	return 0
+}
+
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = ^uint32(0)
+	sa.sa_tramp = unsafe.Pointer(funcPC(sigtramp)) // runtime·sigtramp's job is to call into real handler
+	*(*uintptr)(unsafe.Pointer(&sa.__sigaction_u)) = fn
+	sigaction(uint32(i), &sa, nil)
+}
+
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
+	sigaction(uint32(i), nil, &sa)
+	return *(*uintptr)(unsafe.Pointer(&sa.__sigaction_u))
+}
+
+func signalstack(p *byte, n int32) {
+	var st stackt
+	st.ss_sp = p
+	st.ss_size = uintptr(n)
+	st.ss_flags = 0
+	if p == nil {
+		st.ss_flags = _SS_DISABLE
+	}
+	sigaltstack(&st, nil)
+}
+
+func unblocksignals() {
+	sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+}
diff --git a/src/runtime/os1_dragonfly.go b/src/runtime/os1_dragonfly.go
new file mode 100644
index 0000000..82bb45b
--- /dev/null
+++ b/src/runtime/os1_dragonfly.go
@@ -0,0 +1,220 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// From DragonFly's <sys/sysctl.h>
+const (
+	_CTL_HW  = 6
+	_HW_NCPU = 3
+)
+
+var sigset_none = sigset{}
+var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
+
+func getncpu() int32 {
+	mib := [2]uint32{_CTL_HW, _HW_NCPU}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 {
+		return int32(out)
+	}
+	return 1
+}
+
+//go:nosplit
+func futexsleep(addr *uint32, val uint32, ns int64) {
+	systemstack(func() {
+		futexsleep1(addr, val, ns)
+	})
+}
+
+func futexsleep1(addr *uint32, val uint32, ns int64) {
+	var timeout int32
+	if ns >= 0 {
+		// The timeout is specified in microseconds - ensure that we
+		// do not end up dividing to zero, which would put us to sleep
+		// indefinitely...
+		timeout = timediv(ns, 1000, nil)
+		if timeout == 0 {
+			timeout = 1
+		}
+	}
+
+	// sys_umtx_sleep will return EWOULDBLOCK (EAGAIN) when the timeout
+	// expires or EBUSY if the mutex value does not match.
+	ret := sys_umtx_sleep(addr, int32(val), timeout)
+	if ret >= 0 || ret == -_EINTR || ret == -_EAGAIN || ret == -_EBUSY {
+		return
+	}
+
+	print("umtx_sleep addr=", addr, " val=", val, " ret=", ret, "\n")
+	*(*int32)(unsafe.Pointer(uintptr(0x1005))) = 0x1005
+}
+
+//go:nosplit
+func futexwakeup(addr *uint32, cnt uint32) {
+	ret := sys_umtx_wakeup(addr, int32(cnt))
+	if ret >= 0 {
+		return
+	}
+
+	systemstack(func() {
+		print("umtx_wake_addr=", addr, " ret=", ret, "\n")
+		*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
+	})
+}
+
+func lwp_start(uintptr)
+
+func newosproc(mp *m, stk unsafe.Pointer) {
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " lwp_start=", funcPC(lwp_start), " id=", mp.id, "/", mp.tls[0], " ostk=", &mp, "\n")
+	}
+
+	var oset sigset
+	sigprocmask(&sigset_all, &oset)
+
+	params := lwpparams{
+		start_func: funcPC(lwp_start),
+		arg:        unsafe.Pointer(mp),
+		stack:      uintptr(stk),
+		tid1:       unsafe.Pointer(&mp.procid),
+		tid2:       nil,
+	}
+
+	mp.tls[0] = uintptr(mp.id) // so 386 asm can find it
+
+	lwp_create(&params)
+	sigprocmask(&oset, nil)
+}
+
+func osinit() {
+	ncpu = getncpu()
+}
+
+var urandom_data [_HashRandomBytes]byte
+var urandom_dev = []byte("/dev/urandom\x00")
+
+//go:nosplit
+func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes {
+		*rnd = unsafe.Pointer(&urandom_data[0])
+		*rnd_len = _HashRandomBytes
+	} else {
+		*rnd = nil
+		*rnd_len = 0
+	}
+	close(fd)
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+func minit() {
+	_g_ := getg()
+
+	// m.procid is a uint64, but lwp_start writes an int32. Fix it up.
+	_g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid)))
+
+	// Initialize signal handling
+	signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
+	sigprocmask(&sigset_none, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+	signalstack(nil, 0)
+}
+
+func memlimit() uintptr {
+	/*
+		                TODO: Convert to Go when something actually uses the result.
+
+				Rlimit rl;
+				extern byte runtime·text[], runtime·end[];
+				uintptr used;
+
+				if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+					return 0;
+				if(rl.rlim_cur >= 0x7fffffff)
+					return 0;
+
+				// Estimate our VM footprint excluding the heap.
+				// Not an exact science: use size of binary plus
+				// some room for thread stacks.
+				used = runtime·end - runtime·text + (64<<20);
+				if(used >= rl.rlim_cur)
+					return 0;
+
+				// If there's not at least 16 MB left, we're probably
+				// not going to be able to do much.  Treat as no limit.
+				rl.rlim_cur -= used;
+				if(rl.rlim_cur < (16<<20))
+					return 0;
+
+				return rl.rlim_cur - used;
+	*/
+	return 0
+}
+
+func sigtramp()
+
+type sigactiont struct {
+	sa_sigaction uintptr
+	sa_flags     int32
+	sa_mask      sigset
+}
+
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = sigset_all
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	sa.sa_sigaction = fn
+	sigaction(i, &sa, nil)
+}
+
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	sigaction(i, nil, &sa)
+	if sa.sa_sigaction == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_sigaction
+}
+
+func signalstack(p *byte, n int32) {
+	var st sigaltstackt
+	st.ss_sp = uintptr(unsafe.Pointer(p))
+	st.ss_size = uintptr(n)
+	st.ss_flags = 0
+	if p == nil {
+		st.ss_flags = _SS_DISABLE
+	}
+	sigaltstack(&st, nil)
+}
+
+func unblocksignals() {
+	sigprocmask(&sigset_none, nil)
+}
diff --git a/src/runtime/os1_freebsd.go b/src/runtime/os1_freebsd.go
new file mode 100644
index 0000000..2cacfba
--- /dev/null
+++ b/src/runtime/os1_freebsd.go
@@ -0,0 +1,221 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// From FreeBSD's <sys/sysctl.h>
+const (
+	_CTL_HW  = 6
+	_HW_NCPU = 3
+)
+
+var sigset_none = sigset{}
+var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
+
+func getncpu() int32 {
+	mib := [2]uint32{_CTL_HW, _HW_NCPU}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 {
+		return int32(out)
+	}
+	return 1
+}
+
+// FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and
+// thus the code is largely similar. See Linux implementation
+// and lock_futex.c for comments.
+
+//go:nosplit
+func futexsleep(addr *uint32, val uint32, ns int64) {
+	systemstack(func() {
+		futexsleep1(addr, val, ns)
+	})
+}
+
+func futexsleep1(addr *uint32, val uint32, ns int64) {
+	var tsp *timespec
+	if ns >= 0 {
+		var ts timespec
+		ts.tv_nsec = 0
+		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
+		tsp = &ts
+	}
+	ret := sys_umtx_op(addr, _UMTX_OP_WAIT_UINT_PRIVATE, val, nil, tsp)
+	if ret >= 0 || ret == -_EINTR {
+		return
+	}
+	print("umtx_wait addr=", addr, " val=", val, " ret=", ret, "\n")
+	*(*int32)(unsafe.Pointer(uintptr(0x1005))) = 0x1005
+}
+
+//go:nosplit
+func futexwakeup(addr *uint32, cnt uint32) {
+	ret := sys_umtx_op(addr, _UMTX_OP_WAKE_PRIVATE, cnt, nil, nil)
+	if ret >= 0 {
+		return
+	}
+
+	systemstack(func() {
+		print("umtx_wake_addr=", addr, " ret=", ret, "\n")
+	})
+}
+
+func thr_start()
+
+func newosproc(mp *m, stk unsafe.Pointer) {
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " thr_start=", funcPC(thr_start), " id=", mp.id, "/", mp.tls[0], " ostk=", &mp, "\n")
+	}
+
+	// NOTE(rsc): This code is confused. stackbase is the top of the stack
+	// and is equal to stk. However, it's working, so I'm not changing it.
+	param := thrparam{
+		start_func: funcPC(thr_start),
+		arg:        unsafe.Pointer(mp),
+		stack_base: mp.g0.stack.hi,
+		stack_size: uintptr(stk) - mp.g0.stack.hi,
+		child_tid:  unsafe.Pointer(&mp.procid),
+		parent_tid: nil,
+		tls_base:   unsafe.Pointer(&mp.tls[0]),
+		tls_size:   unsafe.Sizeof(mp.tls),
+	}
+	mp.tls[0] = uintptr(mp.id) // so 386 asm can find it
+
+	var oset sigset
+	sigprocmask(&sigset_all, &oset)
+	thr_new(&param, int32(unsafe.Sizeof(param)))
+	sigprocmask(&oset, nil)
+}
+
+func osinit() {
+	ncpu = getncpu()
+}
+
+var urandom_data [_HashRandomBytes]byte
+var urandom_dev = []byte("/dev/random\x00")
+
+//go:nosplit
+func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes {
+		*rnd = unsafe.Pointer(&urandom_data[0])
+		*rnd_len = _HashRandomBytes
+	} else {
+		*rnd = nil
+		*rnd_len = 0
+	}
+	close(fd)
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+func minit() {
+	_g_ := getg()
+
+	// m.procid is a uint64, but thr_new writes a uint32 on 32-bit systems.
+	// Fix it up. (Only matters on big-endian, but be clean anyway.)
+	if ptrSize == 4 {
+		_g_.m.procid = uint64(*(*uint32)(unsafe.Pointer(&_g_.m.procid)))
+	}
+
+	// Initialize signal handling.
+	signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
+	sigprocmask(&sigset_none, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+	signalstack(nil, 0)
+}
+
+func memlimit() uintptr {
+	/*
+		TODO: Convert to Go when something actually uses the result.
+		Rlimit rl;
+		extern byte runtime·text[], runtime·end[];
+		uintptr used;
+
+		if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+			return 0;
+		if(rl.rlim_cur >= 0x7fffffff)
+			return 0;
+
+		// Estimate our VM footprint excluding the heap.
+		// Not an exact science: use size of binary plus
+		// some room for thread stacks.
+		used = runtime·end - runtime·text + (64<<20);
+		if(used >= rl.rlim_cur)
+			return 0;
+
+		// If there's not at least 16 MB left, we're probably
+		// not going to be able to do much.  Treat as no limit.
+		rl.rlim_cur -= used;
+		if(rl.rlim_cur < (16<<20))
+			return 0;
+
+		return rl.rlim_cur - used;
+	*/
+
+	return 0
+}
+
+func sigtramp()
+
+type sigactiont struct {
+	sa_handler uintptr
+	sa_flags   int32
+	sa_mask    sigset
+}
+
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = sigset_all
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	sa.sa_handler = fn
+	sigaction(i, &sa, nil)
+}
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	sigaction(i, nil, &sa)
+	if sa.sa_handler == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_handler
+}
+
+func signalstack(p *byte, n int32) {
+	var st stackt
+	st.ss_sp = uintptr(unsafe.Pointer(p))
+	st.ss_size = uintptr(n)
+	st.ss_flags = 0
+	if p == nil {
+		st.ss_flags = _SS_DISABLE
+	}
+	sigaltstack(&st, nil)
+}
+
+func unblocksignals() {
+	sigprocmask(&sigset_none, nil)
+}
diff --git a/src/runtime/os1_linux.go b/src/runtime/os1_linux.go
new file mode 100644
index 0000000..67fa639
--- /dev/null
+++ b/src/runtime/os1_linux.go
@@ -0,0 +1,287 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+var sigset_none sigset
+var sigset_all sigset = sigset{^uint32(0), ^uint32(0)}
+
+// Linux futex.
+//
+//	futexsleep(uint32 *addr, uint32 val)
+//	futexwakeup(uint32 *addr)
+//
+// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
+// Futexwakeup wakes up threads sleeping on addr.
+// Futexsleep is allowed to wake up spuriously.
+
+const (
+	_FUTEX_WAIT = 0
+	_FUTEX_WAKE = 1
+)
+
+// Atomically,
+//	if(*addr == val) sleep
+// Might be woken up spuriously; that's allowed.
+// Don't sleep longer than ns; ns < 0 means forever.
+//go:nosplit
+func futexsleep(addr *uint32, val uint32, ns int64) {
+	var ts timespec
+
+	// Some Linux kernels have a bug where futex of
+	// FUTEX_WAIT returns an internal error code
+	// as an errno.  Libpthread ignores the return value
+	// here, and so can we: as it says a few lines up,
+	// spurious wakeups are allowed.
+	if ns < 0 {
+		futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, nil, nil, 0)
+		return
+	}
+
+	// It's difficult to live within the no-split stack limits here.
+	// On ARM and 386, a 64-bit divide invokes a general software routine
+	// that needs more stack than we can afford. So we use timediv instead.
+	// But on real 64-bit systems, where words are larger but the stack limit
+	// is not, even timediv is too heavy, and we really need to use just an
+	// ordinary machine instruction.
+	if ptrSize == 8 {
+		ts.set_sec(ns / 1000000000)
+		ts.set_nsec(int32(ns % 1000000000))
+	} else {
+		ts.tv_nsec = 0
+		ts.set_sec(int64(timediv(ns, 1000000000, (*int32)(unsafe.Pointer(&ts.tv_nsec)))))
+	}
+	futex(unsafe.Pointer(addr), _FUTEX_WAIT, val, unsafe.Pointer(&ts), nil, 0)
+}
+
+// If any procs are sleeping on addr, wake up at most cnt.
+//go:nosplit
+func futexwakeup(addr *uint32, cnt uint32) {
+	ret := futex(unsafe.Pointer(addr), _FUTEX_WAKE, cnt, nil, nil, 0)
+	if ret >= 0 {
+		return
+	}
+
+	// I don't know that futex wakeup can return
+	// EAGAIN or EINTR, but if it does, it would be
+	// safe to loop and call futex again.
+	systemstack(func() {
+		print("futexwakeup addr=", addr, " returned ", ret, "\n")
+	})
+
+	*(*int32)(unsafe.Pointer(uintptr(0x1006))) = 0x1006
+}
+
+func getproccount() int32 {
+	var buf [16]uintptr
+	r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
+	n := int32(0)
+	for _, v := range buf[:r/ptrSize] {
+		for i := 0; i < 64; i++ {
+			n += int32(v & 1)
+			v >>= 1
+		}
+	}
+	if n == 0 {
+		n = 1
+	}
+	return n
+}
+
+// Clone, the Linux rfork.
+const (
+	_CLONE_VM             = 0x100
+	_CLONE_FS             = 0x200
+	_CLONE_FILES          = 0x400
+	_CLONE_SIGHAND        = 0x800
+	_CLONE_PTRACE         = 0x2000
+	_CLONE_VFORK          = 0x4000
+	_CLONE_PARENT         = 0x8000
+	_CLONE_THREAD         = 0x10000
+	_CLONE_NEWNS          = 0x20000
+	_CLONE_SYSVSEM        = 0x40000
+	_CLONE_SETTLS         = 0x80000
+	_CLONE_PARENT_SETTID  = 0x100000
+	_CLONE_CHILD_CLEARTID = 0x200000
+	_CLONE_UNTRACED       = 0x800000
+	_CLONE_CHILD_SETTID   = 0x1000000
+	_CLONE_STOPPED        = 0x2000000
+	_CLONE_NEWUTS         = 0x4000000
+	_CLONE_NEWIPC         = 0x8000000
+)
+
+func newosproc(mp *m, stk unsafe.Pointer) {
+	/*
+	 * note: strace gets confused if we use CLONE_PTRACE here.
+	 */
+	var flags int32 = _CLONE_VM | /* share memory */
+		_CLONE_FS | /* share cwd, etc */
+		_CLONE_FILES | /* share fd table */
+		_CLONE_SIGHAND | /* share sig handler table */
+		_CLONE_THREAD /* revisit - okay for now */
+
+	mp.tls[0] = uintptr(mp.id) // so 386 asm can find it
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " clone=", funcPC(clone), " id=", mp.id, "/", mp.tls[0], " ostk=", &mp, "\n")
+	}
+
+	// Disable signals during clone, so that the new thread starts
+	// with signals disabled.  It will enable them in minit.
+	var oset sigset
+	rtsigprocmask(_SIG_SETMASK, &sigset_all, &oset, int32(unsafe.Sizeof(oset)))
+	ret := clone(flags, stk, unsafe.Pointer(mp), unsafe.Pointer(mp.g0), unsafe.Pointer(funcPC(mstart)))
+	rtsigprocmask(_SIG_SETMASK, &oset, nil, int32(unsafe.Sizeof(oset)))
+
+	if ret < 0 {
+		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", -ret, ")\n")
+		gothrow("newosproc")
+	}
+}
+
+func osinit() {
+	ncpu = getproccount()
+}
+
+// Random bytes initialized at startup.  These come
+// from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c).
+// byte*	runtime·startup_random_data;
+// uint32	runtime·startup_random_data_len;
+
+var urandom_data [_HashRandomBytes]byte
+var urandom_dev = []byte("/dev/random\x00")
+
+//go:nosplit
+func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) {
+	if startup_random_data != nil {
+		*rnd = unsafe.Pointer(startup_random_data)
+		*rnd_len = int32(startup_random_data_len)
+		return
+	}
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes {
+		*rnd = unsafe.Pointer(&urandom_data[0])
+		*rnd_len = _HashRandomBytes
+	} else {
+		*rnd = nil
+		*rnd_len = 0
+	}
+	close(fd)
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024) // Linux wants >= 2K
+	mp.gsignal.m = mp
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+func minit() {
+	// Initialize signal handling.
+	_g_ := getg()
+	signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
+	rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none)))
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+	signalstack(nil, 0)
+}
+
+func memlimit() uintptr {
+	/*
+		TODO: Convert to Go when something actually uses the result.
+
+		Rlimit rl;
+		extern byte runtime·text[], runtime·end[];
+		uintptr used;
+
+		if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+			return 0;
+		if(rl.rlim_cur >= 0x7fffffff)
+			return 0;
+
+		// Estimate our VM footprint excluding the heap.
+		// Not an exact science: use size of binary plus
+		// some room for thread stacks.
+		used = runtime·end - runtime·text + (64<<20);
+		if(used >= rl.rlim_cur)
+			return 0;
+
+		// If there's not at least 16 MB left, we're probably
+		// not going to be able to do much.  Treat as no limit.
+		rl.rlim_cur -= used;
+		if(rl.rlim_cur < (16<<20))
+			return 0;
+
+		return rl.rlim_cur - used;
+	*/
+
+	return 0
+}
+
+//#ifdef GOARCH_386
+//#define sa_handler k_sa_handler
+//#endif
+
+func sigreturn()
+func sigtramp()
+
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK | _SA_RESTORER
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = ^uint64(0)
+	// Although Linux manpage says "sa_restorer element is obsolete and
+	// should not be used". x86_64 kernel requires it. Only use it on
+	// x86.
+	if GOARCH == "386" || GOARCH == "amd64" {
+		sa.sa_restorer = funcPC(sigreturn)
+	}
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	sa.sa_handler = fn
+	if rt_sigaction(uintptr(i), &sa, nil, unsafe.Sizeof(sa.sa_mask)) != 0 {
+		gothrow("rt_sigaction failure")
+	}
+}
+
+func getsig(i int32) uintptr {
+	var sa sigactiont
+
+	memclr(unsafe.Pointer(&sa), unsafe.Sizeof(sa))
+	if rt_sigaction(uintptr(i), nil, &sa, unsafe.Sizeof(sa.sa_mask)) != 0 {
+		gothrow("rt_sigaction read failure")
+	}
+	if sa.sa_handler == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_handler
+}
+
+func signalstack(p *byte, n int32) {
+	var st sigaltstackt
+	st.ss_sp = p
+	st.ss_size = uintptr(n)
+	st.ss_flags = 0
+	if p == nil {
+		st.ss_flags = _SS_DISABLE
+	}
+	sigaltstack(&st, nil)
+}
+
+func unblocksignals() {
+	rtsigprocmask(_SIG_SETMASK, &sigset_none, nil, int32(unsafe.Sizeof(sigset_none)))
+}
diff --git a/src/runtime/os1_openbsd.go b/src/runtime/os1_openbsd.go
new file mode 100644
index 0000000..d5ffe10
--- /dev/null
+++ b/src/runtime/os1_openbsd.go
@@ -0,0 +1,235 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	ESRCH       = 3
+	EAGAIN      = 35
+	EWOULDBLOCK = EAGAIN
+	ENOTSUP     = 91
+
+	// From OpenBSD's sys/time.h
+	CLOCK_REALTIME  = 0
+	CLOCK_VIRTUAL   = 1
+	CLOCK_PROF      = 2
+	CLOCK_MONOTONIC = 3
+)
+
+var sigset_none = uint32(0)
+var sigset_all = ^sigset_none
+
+// From OpenBSD's <sys/sysctl.h>
+const (
+	CTL_HW  = 6
+	HW_NCPU = 3
+)
+
+func getncpu() int32 {
+	mib := [2]uint32{CTL_HW, HW_NCPU}
+	out := uint32(0)
+	nout := unsafe.Sizeof(out)
+
+	// Fetch hw.ncpu via sysctl.
+	ret := sysctl(&mib[0], 2, (*byte)(unsafe.Pointer(&out)), &nout, nil, 0)
+	if ret >= 0 {
+		return int32(out)
+	}
+	return 1
+}
+
+//go:nosplit
+func semacreate() uintptr {
+	return 1
+}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	_g_ := getg()
+
+	// Compute sleep deadline.
+	var tsp *timespec
+	if ns >= 0 {
+		var ts timespec
+		var nsec int32
+		ns += nanotime()
+		ts.set_sec(int64(timediv(ns, 1000000000, &nsec)))
+		ts.set_nsec(nsec)
+		tsp = &ts
+	}
+
+	for {
+		// spin-mutex lock
+		for {
+			if xchg(&_g_.m.waitsemalock, 1) == 0 {
+				break
+			}
+			osyield()
+		}
+
+		if _g_.m.waitsemacount != 0 {
+			// semaphore is available.
+			_g_.m.waitsemacount--
+			// spin-mutex unlock
+			atomicstore(&_g_.m.waitsemalock, 0)
+			return 0 // semaphore acquired
+		}
+
+		// sleep until semaphore != 0 or timeout.
+		// thrsleep unlocks m.waitsemalock.
+		ret := thrsleep((uintptr)(unsafe.Pointer(&_g_.m.waitsemacount)), CLOCK_MONOTONIC, tsp, (uintptr)(unsafe.Pointer(&_g_.m.waitsemalock)), (*int32)(unsafe.Pointer(&_g_.m.waitsemacount)))
+		if ret == EWOULDBLOCK {
+			return -1
+		}
+	}
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	// spin-mutex lock
+	for {
+		if xchg(&mp.waitsemalock, 1) == 0 {
+			break
+		}
+		osyield()
+	}
+	mp.waitsemacount++
+	ret := thrwakeup(uintptr(unsafe.Pointer(&mp.waitsemacount)), 1)
+	if ret != 0 && ret != ESRCH {
+		// semawakeup can be called on signal stack.
+		systemstack(func() {
+			print("thrwakeup addr=", &mp.waitsemacount, " sem=", mp.waitsemacount, " ret=", ret, "\n")
+		})
+	}
+	// spin-mutex unlock
+	atomicstore(&mp.waitsemalock, 0)
+}
+
+func newosproc(mp *m, stk unsafe.Pointer) {
+	if false {
+		print("newosproc stk=", stk, " m=", mp, " g=", mp.g0, " id=", mp.id, "/", int32(mp.tls[0]), " ostk=", &mp, "\n")
+	}
+
+	mp.tls[0] = uintptr(mp.id) // so 386 asm can find it
+
+	param := tforkt{
+		tf_tcb:   unsafe.Pointer(&mp.tls[0]),
+		tf_tid:   (*int32)(unsafe.Pointer(&mp.procid)),
+		tf_stack: uintptr(stk),
+	}
+
+	oset := sigprocmask(_SIG_SETMASK, sigset_all)
+	ret := tfork(&param, unsafe.Sizeof(param), mp, mp.g0, funcPC(mstart))
+	sigprocmask(_SIG_SETMASK, oset)
+
+	if ret < 0 {
+		print("runtime: failed to create new OS thread (have ", mcount()-1, " already; errno=", -ret, ")\n")
+		if ret == -ENOTSUP {
+			print("runtime: is kern.rthreads disabled?\n")
+		}
+		gothrow("runtime.newosproc")
+	}
+}
+
+func osinit() {
+	ncpu = getncpu()
+}
+
+var urandom_data [_HashRandomBytes]byte
+var urandom_dev = []byte("/dev/urandom\x00")
+
+//go:nosplit
+func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes {
+		*rnd = unsafe.Pointer(&urandom_data[0])
+		*rnd_len = _HashRandomBytes
+	} else {
+		*rnd = nil
+		*rnd_len = 0
+	}
+	close(fd)
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+func minit() {
+	_g_ := getg()
+
+	// m.procid is a uint64, but tfork writes an int32. Fix it up.
+	_g_.m.procid = uint64(*(*int32)(unsafe.Pointer(&_g_.m.procid)))
+
+	// Initialize signal handling
+	signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
+	sigprocmask(_SIG_SETMASK, sigset_none)
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+	signalstack(nil, 0)
+}
+
+func memlimit() uintptr {
+	return 0
+}
+
+func sigtramp()
+
+type sigactiont struct {
+	sa_sigaction uintptr
+	sa_mask      uint32
+	sa_flags     int32
+}
+
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = sigset_all
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	sa.sa_sigaction = fn
+	sigaction(i, &sa, nil)
+}
+
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	sigaction(i, nil, &sa)
+	if sa.sa_sigaction == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return sa.sa_sigaction
+}
+
+func signalstack(p *byte, n int32) {
+	var st stackt
+
+	st.ss_sp = uintptr(unsafe.Pointer(p))
+	st.ss_size = uintptr(n)
+	st.ss_flags = 0
+	if p == nil {
+		st.ss_flags = _SS_DISABLE
+	}
+	sigaltstack(&st, nil)
+}
+
+func unblocksignals() {
+	sigprocmask(_SIG_SETMASK, sigset_none)
+}
diff --git a/src/runtime/os2_darwin.go b/src/runtime/os2_darwin.go
new file mode 100644
index 0000000..542bd74
--- /dev/null
+++ b/src/runtime/os2_darwin.go
@@ -0,0 +1,14 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_NSIG        = 32
+	_SI_USER     = 0 /* empirically true, but not what headers say */
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+	_SS_DISABLE  = 4
+)
diff --git a/src/runtime/os2_dragonfly.go b/src/runtime/os2_dragonfly.go
new file mode 100644
index 0000000..0a20ed4
--- /dev/null
+++ b/src/runtime/os2_dragonfly.go
@@ -0,0 +1,12 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_NSIG       = 33
+	_SI_USER    = 0x10001
+	_SS_DISABLE = 4
+	_RLIMIT_AS  = 10
+)
diff --git a/src/runtime/os2_freebsd.go b/src/runtime/os2_freebsd.go
new file mode 100644
index 0000000..f67211f
--- /dev/null
+++ b/src/runtime/os2_freebsd.go
@@ -0,0 +1,12 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_SS_DISABLE = 4
+	_NSIG       = 33
+	_SI_USER    = 0x10001
+	_RLIMIT_AS  = 10
+)
diff --git a/src/runtime/os2_linux.go b/src/runtime/os2_linux.go
new file mode 100644
index 0000000..eaa9f0e8
--- /dev/null
+++ b/src/runtime/os2_linux.go
@@ -0,0 +1,23 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_SS_DISABLE  = 2
+	_NSIG        = 65
+	_SI_USER     = 0
+	_SIG_SETMASK = 2
+	_RLIMIT_AS   = 9
+)
+
+// It's hard to tease out exactly how big a Sigset is, but
+// rt_sigprocmask crashes if we get it wrong, so if binaries
+// are running, this is right.
+type sigset [2]uint32
+
+type rlimit struct {
+	rlim_cur uintptr
+	rlim_max uintptr
+}
diff --git a/src/runtime/os2_openbsd.go b/src/runtime/os2_openbsd.go
new file mode 100644
index 0000000..1e785ad
--- /dev/null
+++ b/src/runtime/os2_openbsd.go
@@ -0,0 +1,14 @@
+// Copyright 2010 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_SS_DISABLE  = 4
+	_SIG_BLOCK   = 1
+	_SIG_UNBLOCK = 2
+	_SIG_SETMASK = 3
+	_NSIG        = 33
+	_SI_USER     = 0
+)
diff --git a/src/runtime/os2_solaris.go b/src/runtime/os2_solaris.go
new file mode 100644
index 0000000..26ca15f
--- /dev/null
+++ b/src/runtime/os2_solaris.go
@@ -0,0 +1,13 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_SS_DISABLE  = 2
+	_SIG_SETMASK = 3
+	_NSIG        = 73 /* number of signals in sigtable array */
+	_SI_USER     = 0
+	_RLIMIT_AS   = 10
+)
diff --git a/src/runtime/os3_solaris.go b/src/runtime/os3_solaris.go
new file mode 100644
index 0000000..1df74fa
--- /dev/null
+++ b/src/runtime/os3_solaris.go
@@ -0,0 +1,493 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//go:cgo_export_dynamic runtime.end _end
+//go:cgo_export_dynamic runtime.etext _etext
+//go:cgo_export_dynamic runtime.edata _edata
+
+//go:cgo_import_dynamic libc____errno ___errno "libc.so"
+//go:cgo_import_dynamic libc_clock_gettime clock_gettime "libc.so"
+//go:cgo_import_dynamic libc_close close "libc.so"
+//go:cgo_import_dynamic libc_exit exit "libc.so"
+//go:cgo_import_dynamic libc_fstat fstat "libc.so"
+//go:cgo_import_dynamic libc_getcontext getcontext "libc.so"
+//go:cgo_import_dynamic libc_getrlimit getrlimit "libc.so"
+//go:cgo_import_dynamic libc_madvise madvise "libc.so"
+//go:cgo_import_dynamic libc_malloc malloc "libc.so"
+//go:cgo_import_dynamic libc_mmap mmap "libc.so"
+//go:cgo_import_dynamic libc_munmap munmap "libc.so"
+//go:cgo_import_dynamic libc_open open "libc.so"
+//go:cgo_import_dynamic libc_pthread_attr_destroy pthread_attr_destroy "libc.so"
+//go:cgo_import_dynamic libc_pthread_attr_getstack pthread_attr_getstack "libc.so"
+//go:cgo_import_dynamic libc_pthread_attr_init pthread_attr_init "libc.so"
+//go:cgo_import_dynamic libc_pthread_attr_setdetachstate pthread_attr_setdetachstate "libc.so"
+//go:cgo_import_dynamic libc_pthread_attr_setstack pthread_attr_setstack "libc.so"
+//go:cgo_import_dynamic libc_pthread_create pthread_create "libc.so"
+//go:cgo_import_dynamic libc_raise raise "libc.so"
+//go:cgo_import_dynamic libc_read read "libc.so"
+//go:cgo_import_dynamic libc_select select "libc.so"
+//go:cgo_import_dynamic libc_sched_yield sched_yield "libc.so"
+//go:cgo_import_dynamic libc_sem_init sem_init "libc.so"
+//go:cgo_import_dynamic libc_sem_post sem_post "libc.so"
+//go:cgo_import_dynamic libc_sem_reltimedwait_np sem_reltimedwait_np "libc.so"
+//go:cgo_import_dynamic libc_sem_wait sem_wait "libc.so"
+//go:cgo_import_dynamic libc_setitimer setitimer "libc.so"
+//go:cgo_import_dynamic libc_sigaction sigaction "libc.so"
+//go:cgo_import_dynamic libc_sigaltstack sigaltstack "libc.so"
+//go:cgo_import_dynamic libc_sigprocmask sigprocmask "libc.so"
+//go:cgo_import_dynamic libc_sysconf sysconf "libc.so"
+//go:cgo_import_dynamic libc_usleep usleep "libc.so"
+//go:cgo_import_dynamic libc_write write "libc.so"
+
+//go:linkname libc____errno libc____errno
+//go:linkname libc_clock_gettime libc_clock_gettime
+//go:linkname libc_close libc_close
+//go:linkname libc_exit libc_exit
+//go:linkname libc_fstat libc_fstat
+//go:linkname libc_getcontext libc_getcontext
+//go:linkname libc_getrlimit libc_getrlimit
+//go:linkname libc_madvise libc_madvise
+//go:linkname libc_malloc libc_malloc
+//go:linkname libc_mmap libc_mmap
+//go:linkname libc_munmap libc_munmap
+//go:linkname libc_open libc_open
+//go:linkname libc_pthread_attr_destroy libc_pthread_attr_destroy
+//go:linkname libc_pthread_attr_getstack libc_pthread_attr_getstack
+//go:linkname libc_pthread_attr_init libc_pthread_attr_init
+//go:linkname libc_pthread_attr_setdetachstate libc_pthread_attr_setdetachstate
+//go:linkname libc_pthread_attr_setstack libc_pthread_attr_setstack
+//go:linkname libc_pthread_create libc_pthread_create
+//go:linkname libc_raise libc_raise
+//go:linkname libc_read libc_read
+//go:linkname libc_select libc_select
+//go:linkname libc_sched_yield libc_sched_yield
+//go:linkname libc_sem_init libc_sem_init
+//go:linkname libc_sem_post libc_sem_post
+//go:linkname libc_sem_reltimedwait_np libc_sem_reltimedwait_np
+//go:linkname libc_sem_wait libc_sem_wait
+//go:linkname libc_setitimer libc_setitimer
+//go:linkname libc_sigaction libc_sigaction
+//go:linkname libc_sigaltstack libc_sigaltstack
+//go:linkname libc_sigprocmask libc_sigprocmask
+//go:linkname libc_sysconf libc_sysconf
+//go:linkname libc_usleep libc_usleep
+//go:linkname libc_write libc_write
+
+var (
+	libc____errno,
+	libc_clock_gettime,
+	libc_close,
+	libc_exit,
+	libc_fstat,
+	libc_getcontext,
+	libc_getrlimit,
+	libc_madvise,
+	libc_malloc,
+	libc_mmap,
+	libc_munmap,
+	libc_open,
+	libc_pthread_attr_destroy,
+	libc_pthread_attr_getstack,
+	libc_pthread_attr_init,
+	libc_pthread_attr_setdetachstate,
+	libc_pthread_attr_setstack,
+	libc_pthread_create,
+	libc_raise,
+	libc_read,
+	libc_sched_yield,
+	libc_select,
+	libc_sem_init,
+	libc_sem_post,
+	libc_sem_reltimedwait_np,
+	libc_sem_wait,
+	libc_setitimer,
+	libc_sigaction,
+	libc_sigaltstack,
+	libc_sigprocmask,
+	libc_sysconf,
+	libc_usleep,
+	libc_write libcFunc
+)
+
+var sigset_none = sigset{}
+var sigset_all = sigset{[4]uint32{^uint32(0), ^uint32(0), ^uint32(0), ^uint32(0)}}
+
+func getncpu() int32 {
+	n := int32(sysconf(__SC_NPROCESSORS_ONLN))
+	if n < 1 {
+		return 1
+	}
+	return n
+}
+
+func osinit() {
+	ncpu = getncpu()
+}
+
+func tstart_sysvicall()
+
+func newosproc(mp *m, _ unsafe.Pointer) {
+	var (
+		attr pthreadattr
+		oset sigset
+		tid  pthread
+		ret  int32
+		size uint64
+	)
+
+	if pthread_attr_init(&attr) != 0 {
+		gothrow("pthread_attr_init")
+	}
+	if pthread_attr_setstack(&attr, 0, 0x200000) != 0 {
+		gothrow("pthread_attr_setstack")
+	}
+	if pthread_attr_getstack(&attr, unsafe.Pointer(&mp.g0.stack.hi), &size) != 0 {
+		gothrow("pthread_attr_getstack")
+	}
+	mp.g0.stack.lo = mp.g0.stack.hi - uintptr(size)
+	if pthread_attr_setdetachstate(&attr, _PTHREAD_CREATE_DETACHED) != 0 {
+		gothrow("pthread_attr_setdetachstate")
+	}
+
+	// Disable signals during create, so that the new thread starts
+	// with signals disabled.  It will enable them in minit.
+	sigprocmask(_SIG_SETMASK, &sigset_all, &oset)
+	ret = pthread_create(&tid, &attr, funcPC(tstart_sysvicall), unsafe.Pointer(mp))
+	sigprocmask(_SIG_SETMASK, &oset, nil)
+	if ret != 0 {
+		print("runtime: failed to create new OS thread (have ", mcount(), " already; errno=", ret, ")\n")
+		gothrow("newosproc")
+	}
+}
+
+var urandom_data [_HashRandomBytes]byte
+var urandom_dev = []byte("/dev/random\x00")
+
+//go:nosplit
+func get_random_data(rnd *unsafe.Pointer, rnd_len *int32) {
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	if read(fd, unsafe.Pointer(&urandom_data), _HashRandomBytes) == _HashRandomBytes {
+		*rnd = unsafe.Pointer(&urandom_data[0])
+		*rnd_len = _HashRandomBytes
+	} else {
+		*rnd = nil
+		*rnd_len = 0
+	}
+	close(fd)
+}
+
+func goenvs() {
+	goenvs_unix()
+}
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
+func mpreinit(mp *m) {
+	mp.gsignal = malg(32 * 1024)
+	mp.gsignal.m = mp
+}
+
+func miniterrno()
+
+// Called to initialize a new m (including the bootstrap m).
+// Called on the new thread, can not allocate memory.
+func minit() {
+	_g_ := getg()
+	asmcgocall(unsafe.Pointer(funcPC(miniterrno)), unsafe.Pointer(libc____errno))
+	// Initialize signal handling
+	signalstack((*byte)(unsafe.Pointer(_g_.m.gsignal.stack.lo)), 32*1024)
+	sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+}
+
+// Called from dropm to undo the effect of an minit.
+func unminit() {
+	signalstack(nil, 0)
+}
+
+func memlimit() uintptr {
+	/*
+		TODO: Convert to Go when something actually uses the result.
+		Rlimit rl;
+		extern byte runtime·text[], runtime·end[];
+		uintptr used;
+
+		if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+			return 0;
+		if(rl.rlim_cur >= 0x7fffffff)
+			return 0;
+
+		// Estimate our VM footprint excluding the heap.
+		// Not an exact science: use size of binary plus
+		// some room for thread stacks.
+		used = runtime·end - runtime·text + (64<<20);
+		if(used >= rl.rlim_cur)
+			return 0;
+
+		// If there's not at least 16 MB left, we're probably
+		// not going to be able to do much.  Treat as no limit.
+		rl.rlim_cur -= used;
+		if(rl.rlim_cur < (16<<20))
+			return 0;
+
+		return rl.rlim_cur - used;
+	*/
+
+	return 0
+}
+
+func sigtramp()
+
+func setsig(i int32, fn uintptr, restart bool) {
+	var sa sigactiont
+
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	sa.sa_flags = _SA_SIGINFO | _SA_ONSTACK
+	if restart {
+		sa.sa_flags |= _SA_RESTART
+	}
+	sa.sa_mask = sigset_all
+	if fn == funcPC(sighandler) {
+		fn = funcPC(sigtramp)
+	}
+	*((*uintptr)(unsafe.Pointer(&sa._funcptr))) = fn
+	sigaction(i, &sa, nil)
+}
+
+func getsig(i int32) uintptr {
+	var sa sigactiont
+	sigaction(i, nil, &sa)
+	if *((*uintptr)(unsafe.Pointer(&sa._funcptr))) == funcPC(sigtramp) {
+		return funcPC(sighandler)
+	}
+	return *((*uintptr)(unsafe.Pointer(&sa._funcptr)))
+}
+
+func signalstack(p *byte, n int32) {
+	var st sigaltstackt
+	st.ss_sp = (*byte)(unsafe.Pointer(p))
+	st.ss_size = uint64(n)
+	st.ss_flags = 0
+	if p == nil {
+		st.ss_flags = _SS_DISABLE
+	}
+	sigaltstack(&st, nil)
+}
+
+func unblocksignals() {
+	sigprocmask(_SIG_SETMASK, &sigset_none, nil)
+}
+
+//go:nosplit
+func semacreate() uintptr {
+	var sem *semt
+	_g_ := getg()
+
+	// Call libc's malloc rather than malloc.  This will
+	// allocate space on the C heap.  We can't call malloc
+	// here because it could cause a deadlock.
+	_g_.m.libcall.fn = uintptr(libc_malloc)
+	_g_.m.libcall.n = 1
+	memclr(unsafe.Pointer(&_g_.m.scratch), uintptr(len(_g_.m.scratch.v)))
+	_g_.m.scratch.v[0] = unsafe.Sizeof(*sem)
+	_g_.m.libcall.args = uintptr(unsafe.Pointer(&_g_.m.scratch))
+	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&_g_.m.libcall))
+	sem = (*semt)(unsafe.Pointer(_g_.m.libcall.r1))
+	if sem_init(sem, 0, 0) != 0 {
+		gothrow("sem_init")
+	}
+	return uintptr(unsafe.Pointer(sem))
+}
+
+//go:nosplit
+func semasleep(ns int64) int32 {
+	_m_ := getg().m
+	if ns >= 0 {
+		_m_.ts.tv_sec = ns / 1000000000
+		_m_.ts.tv_nsec = ns % 1000000000
+
+		_m_.libcall.fn = uintptr(unsafe.Pointer(libc_sem_reltimedwait_np))
+		_m_.libcall.n = 2
+		memclr(unsafe.Pointer(&_m_.scratch), uintptr(len(_m_.scratch.v)))
+		_m_.scratch.v[0] = _m_.waitsema
+		_m_.scratch.v[1] = uintptr(unsafe.Pointer(&_m_.ts))
+		_m_.libcall.args = uintptr(unsafe.Pointer(&_m_.scratch))
+		asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&_m_.libcall))
+		if *_m_.perrno != 0 {
+			if *_m_.perrno == _ETIMEDOUT || *_m_.perrno == _EAGAIN || *_m_.perrno == _EINTR {
+				return -1
+			}
+			gothrow("sem_reltimedwait_np")
+		}
+		return 0
+	}
+	for {
+		_m_.libcall.fn = uintptr(unsafe.Pointer(libc_sem_wait))
+		_m_.libcall.n = 1
+		memclr(unsafe.Pointer(&_m_.scratch), uintptr(len(_m_.scratch.v)))
+		_m_.scratch.v[0] = _m_.waitsema
+		_m_.libcall.args = uintptr(unsafe.Pointer(&_m_.scratch))
+		asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&_m_.libcall))
+		if _m_.libcall.r1 == 0 {
+			break
+		}
+		if *_m_.perrno == _EINTR {
+			continue
+		}
+		gothrow("sem_wait")
+	}
+	return 0
+}
+
+//go:nosplit
+func semawakeup(mp *m) {
+	if sem_post((*semt)(unsafe.Pointer(mp.waitsema))) != 0 {
+		gothrow("sem_post")
+	}
+}
+
+//go:nosplit
+func close(fd int32) int32 {
+	return int32(sysvicall1(libc_close, uintptr(fd)))
+}
+
+//go:nosplit
+func exit(r int32) {
+	sysvicall1(libc_exit, uintptr(r))
+}
+
+//go:nosplit
+func getcontext(context *ucontext) /* int32 */ {
+	sysvicall1(libc_getcontext, uintptr(unsafe.Pointer(context)))
+}
+
+//go:nosplit
+func madvise(addr unsafe.Pointer, n uintptr, flags int32) {
+	sysvicall3(libc_madvise, uintptr(addr), uintptr(n), uintptr(flags))
+}
+
+//go:nosplit
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer {
+	return unsafe.Pointer(sysvicall6(libc_mmap, uintptr(addr), uintptr(n), uintptr(prot), uintptr(flags), uintptr(fd), uintptr(off)))
+}
+
+//go:nosplit
+func munmap(addr unsafe.Pointer, n uintptr) {
+	sysvicall2(libc_munmap, uintptr(addr), uintptr(n))
+}
+
+func nanotime1()
+
+//go:nosplit
+func nanotime() int64 {
+	return int64(sysvicall0(libcFunc(funcPC(nanotime1))))
+}
+
+//go:nosplit
+func open(path *byte, mode, perm int32) int32 {
+	return int32(sysvicall3(libc_open, uintptr(unsafe.Pointer(path)), uintptr(mode), uintptr(perm)))
+}
+
+func pthread_attr_destroy(attr *pthreadattr) int32 {
+	return int32(sysvicall1(libc_pthread_attr_destroy, uintptr(unsafe.Pointer(attr))))
+}
+
+func pthread_attr_getstack(attr *pthreadattr, addr unsafe.Pointer, size *uint64) int32 {
+	return int32(sysvicall3(libc_pthread_attr_getstack, uintptr(unsafe.Pointer(attr)), uintptr(addr), uintptr(unsafe.Pointer(size))))
+}
+
+func pthread_attr_init(attr *pthreadattr) int32 {
+	return int32(sysvicall1(libc_pthread_attr_init, uintptr(unsafe.Pointer(attr))))
+}
+
+func pthread_attr_setdetachstate(attr *pthreadattr, state int32) int32 {
+	return int32(sysvicall2(libc_pthread_attr_setdetachstate, uintptr(unsafe.Pointer(attr)), uintptr(state)))
+}
+
+func pthread_attr_setstack(attr *pthreadattr, addr uintptr, size uint64) int32 {
+	return int32(sysvicall3(libc_pthread_attr_setstack, uintptr(unsafe.Pointer(attr)), uintptr(addr), uintptr(size)))
+}
+
+func pthread_create(thread *pthread, attr *pthreadattr, fn uintptr, arg unsafe.Pointer) int32 {
+	return int32(sysvicall4(libc_pthread_create, uintptr(unsafe.Pointer(thread)), uintptr(unsafe.Pointer(attr)), uintptr(fn), uintptr(arg)))
+}
+
+func raise(sig int32) /* int32 */ {
+	sysvicall1(libc_raise, uintptr(sig))
+}
+
+//go:nosplit
+func read(fd int32, buf unsafe.Pointer, nbyte int32) int32 {
+	return int32(sysvicall3(libc_read, uintptr(fd), uintptr(buf), uintptr(nbyte)))
+}
+
+//go:nosplit
+func sem_init(sem *semt, pshared int32, value uint32) int32 {
+	return int32(sysvicall3(libc_sem_init, uintptr(unsafe.Pointer(sem)), uintptr(pshared), uintptr(value)))
+}
+
+//go:nosplit
+func sem_post(sem *semt) int32 {
+	return int32(sysvicall1(libc_sem_post, uintptr(unsafe.Pointer(sem))))
+}
+
+//go:nosplit
+func sem_reltimedwait_np(sem *semt, timeout *timespec) int32 {
+	return int32(sysvicall2(libc_sem_reltimedwait_np, uintptr(unsafe.Pointer(sem)), uintptr(unsafe.Pointer(timeout))))
+}
+
+//go:nosplit
+func sem_wait(sem *semt) int32 {
+	return int32(sysvicall1(libc_sem_wait, uintptr(unsafe.Pointer(sem))))
+}
+
+func setitimer(which int32, value *itimerval, ovalue *itimerval) /* int32 */ {
+	sysvicall3(libc_setitimer, uintptr(which), uintptr(unsafe.Pointer(value)), uintptr(unsafe.Pointer(ovalue)))
+}
+
+func sigaction(sig int32, act *sigactiont, oact *sigactiont) /* int32 */ {
+	sysvicall3(libc_sigaction, uintptr(sig), uintptr(unsafe.Pointer(act)), uintptr(unsafe.Pointer(oact)))
+}
+
+func sigaltstack(ss *sigaltstackt, oss *sigaltstackt) /* int32 */ {
+	sysvicall2(libc_sigaltstack, uintptr(unsafe.Pointer(ss)), uintptr(unsafe.Pointer(oss)))
+}
+
+func sigprocmask(how int32, set *sigset, oset *sigset) /* int32 */ {
+	sysvicall3(libc_sigprocmask, uintptr(how), uintptr(unsafe.Pointer(set)), uintptr(unsafe.Pointer(oset)))
+}
+
+func sysconf(name int32) int64 {
+	return int64(sysvicall1(libc_sysconf, uintptr(name)))
+}
+
+func usleep1(uint32)
+
+//go:nosplit
+func usleep(µs uint32) {
+	usleep1(µs)
+}
+
+//go:nosplit
+func write(fd uintptr, buf unsafe.Pointer, nbyte int32) int32 {
+	return int32(sysvicall3(libc_write, uintptr(fd), uintptr(buf), uintptr(nbyte)))
+}
+
+func osyield1()
+
+//go:nosplit
+func osyield() {
+	_g_ := getg()
+
+	// Check the validity of m because we might be called in cgo callback
+	// path early enough where there isn't a m available yet.
+	if _g_ != nil && _g_.m != nil {
+		sysvicall0(libc_sched_yield)
+		return
+	}
+	osyield1()
+}
diff --git a/src/runtime/os_darwin.c b/src/runtime/os_darwin.c
deleted file mode 100644
index b866863..0000000
--- a/src/runtime/os_darwin.c
+++ /dev/null
@@ -1,570 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-#include "stack.h"
-#include "textflag.h"
-
-extern SigTab runtime·sigtab[];
-
-static Sigset sigset_none;
-static Sigset sigset_all = ~(Sigset)0;
-
-static void
-unimplemented(int8 *name)
-{
-	runtime·prints(name);
-	runtime·prints(" not implemented\n");
-	*(int32*)1231 = 1231;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·semawakeup(M *mp)
-{
-	runtime·mach_semrelease(mp->waitsema);
-}
-
-static void
-semacreate(void)
-{
-	g->m->scalararg[0] = runtime·mach_semcreate();
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·semacreate(void)
-{
-	uintptr x;
-	void (*fn)(void);
-	
-	fn = semacreate;
-	runtime·onM(&fn);
-	x = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-	return x;
-}
-
-// BSD interface for threading.
-void
-runtime·osinit(void)
-{
-	// bsdthread_register delayed until end of goenvs so that we
-	// can look at the environment first.
-
-	// Use sysctl to fetch hw.ncpu.
-	uint32 mib[2];
-	uint32 out;
-	int32 ret;
-	uintptr nout;
-
-	mib[0] = 6;
-	mib[1] = 3;
-	nout = sizeof out;
-	out = 0;
-	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
-	if(ret >= 0)
-		runtime·ncpu = out;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·get_random_data(byte **rnd, int32 *rnd_len)
-{
-	#pragma dataflag NOPTR
-	static byte urandom_data[HashRandomBytes];
-	int32 fd;
-	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
-	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
-		*rnd = urandom_data;
-		*rnd_len = HashRandomBytes;
-	} else {
-		*rnd = nil;
-		*rnd_len = 0;
-	}
-	runtime·close(fd);
-}
-
-void
-runtime·goenvs(void)
-{
-	runtime·goenvs_unix();
-
-	// Register our thread-creation callback (see sys_darwin_{amd64,386}.s)
-	// but only if we're not using cgo.  If we are using cgo we need
-	// to let the C pthread library install its own thread-creation callback.
-	if(!runtime·iscgo) {
-		if(runtime·bsdthread_register() != 0) {
-			if(runtime·getenv("DYLD_INSERT_LIBRARIES"))
-				runtime·throw("runtime: bsdthread_register error (unset DYLD_INSERT_LIBRARIES)");
-			runtime·throw("runtime: bsdthread_register error");
-		}
-	}
-
-}
-
-void
-runtime·newosproc(M *mp, void *stk)
-{
-	int32 errno;
-	Sigset oset;
-
-	mp->tls[0] = mp->id;	// so 386 asm can find it
-	if(0){
-		runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
-			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
-	}
-
-	runtime·sigprocmask(SIG_SETMASK, &sigset_all, &oset);
-	errno = runtime·bsdthread_create(stk, mp, mp->g0, runtime·mstart);
-	runtime·sigprocmask(SIG_SETMASK, &oset, nil);
-
-	if(errno < 0) {
-		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -errno);
-		runtime·throw("runtime.newosproc");
-	}
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime·mpreinit(M *mp)
-{
-	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
-	runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
-
-	mp->gsignal->m = mp;
-	runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime·minit(void)
-{
-	// Initialize signal handling.
-	runtime·signalstack((byte*)g->m->gsignal->stack.lo, 32*1024);
-
-	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime·unminit(void)
-{
-	runtime·signalstack(nil, 0);
-}
-
-// Mach IPC, to get at semaphores
-// Definitions are in /usr/include/mach on a Mac.
-
-static void
-macherror(int32 r, int8 *fn)
-{
-	runtime·prints("mach error ");
-	runtime·prints(fn);
-	runtime·prints(": ");
-	runtime·printint(r);
-	runtime·prints("\n");
-	runtime·throw("mach error");
-}
-
-enum
-{
-	DebugMach = 0
-};
-
-static MachNDR zerondr;
-
-#define MACH_MSGH_BITS(a, b) ((a) | ((b)<<8))
-
-static int32
-mach_msg(MachHeader *h,
-	int32 op,
-	uint32 send_size,
-	uint32 rcv_size,
-	uint32 rcv_name,
-	uint32 timeout,
-	uint32 notify)
-{
-	// TODO: Loop on interrupt.
-	return runtime·mach_msg_trap(h, op, send_size, rcv_size, rcv_name, timeout, notify);
-}
-
-// Mach RPC (MIG)
-
-enum
-{
-	MinMachMsg = 48,
-	Reply = 100,
-};
-
-#pragma pack on
-typedef struct CodeMsg CodeMsg;
-struct CodeMsg
-{
-	MachHeader h;
-	MachNDR NDR;
-	int32 code;
-};
-#pragma pack off
-
-static int32
-machcall(MachHeader *h, int32 maxsize, int32 rxsize)
-{
-	uint32 *p;
-	int32 i, ret, id;
-	uint32 port;
-	CodeMsg *c;
-
-	if((port = g->m->machport) == 0){
-		port = runtime·mach_reply_port();
-		g->m->machport = port;
-	}
-
-	h->msgh_bits |= MACH_MSGH_BITS(MACH_MSG_TYPE_COPY_SEND, MACH_MSG_TYPE_MAKE_SEND_ONCE);
-	h->msgh_local_port = port;
-	h->msgh_reserved = 0;
-	id = h->msgh_id;
-
-	if(DebugMach){
-		p = (uint32*)h;
-		runtime·prints("send:\t");
-		for(i=0; i<h->msgh_size/sizeof(p[0]); i++){
-			runtime·prints(" ");
-			runtime·printpointer((void*)p[i]);
-			if(i%8 == 7)
-				runtime·prints("\n\t");
-		}
-		if(i%8)
-			runtime·prints("\n");
-	}
-
-	ret = mach_msg(h, MACH_SEND_MSG|MACH_RCV_MSG,
-		h->msgh_size, maxsize, port, 0, 0);
-	if(ret != 0){
-		if(DebugMach){
-			runtime·prints("mach_msg error ");
-			runtime·printint(ret);
-			runtime·prints("\n");
-		}
-		return ret;
-	}
-
-	if(DebugMach){
-		p = (uint32*)h;
-		runtime·prints("recv:\t");
-		for(i=0; i<h->msgh_size/sizeof(p[0]); i++){
-			runtime·prints(" ");
-			runtime·printpointer((void*)p[i]);
-			if(i%8 == 7)
-				runtime·prints("\n\t");
-		}
-		if(i%8)
-			runtime·prints("\n");
-	}
-
-	if(h->msgh_id != id+Reply){
-		if(DebugMach){
-			runtime·prints("mach_msg reply id mismatch ");
-			runtime·printint(h->msgh_id);
-			runtime·prints(" != ");
-			runtime·printint(id+Reply);
-			runtime·prints("\n");
-		}
-		return -303;	// MIG_REPLY_MISMATCH
-	}
-
-	// Look for a response giving the return value.
-	// Any call can send this back with an error,
-	// and some calls only have return values so they
-	// send it back on success too.  I don't quite see how
-	// you know it's one of these and not the full response
-	// format, so just look if the message is right.
-	c = (CodeMsg*)h;
-	if(h->msgh_size == sizeof(CodeMsg)
-	&& !(h->msgh_bits & MACH_MSGH_BITS_COMPLEX)){
-		if(DebugMach){
-			runtime·prints("mig result ");
-			runtime·printint(c->code);
-			runtime·prints("\n");
-		}
-		return c->code;
-	}
-
-	if(h->msgh_size != rxsize){
-		if(DebugMach){
-			runtime·prints("mach_msg reply size mismatch ");
-			runtime·printint(h->msgh_size);
-			runtime·prints(" != ");
-			runtime·printint(rxsize);
-			runtime·prints("\n");
-		}
-		return -307;	// MIG_ARRAY_TOO_LARGE
-	}
-
-	return 0;
-}
-
-
-// Semaphores!
-
-enum
-{
-	Tmach_semcreate = 3418,
-	Rmach_semcreate = Tmach_semcreate + Reply,
-
-	Tmach_semdestroy = 3419,
-	Rmach_semdestroy = Tmach_semdestroy + Reply,
-
-	// Mach calls that get interrupted by Unix signals
-	// return this error code.  We retry them.
-	KERN_ABORTED = 14,
-	KERN_OPERATION_TIMED_OUT = 49,
-};
-
-typedef struct Tmach_semcreateMsg Tmach_semcreateMsg;
-typedef struct Rmach_semcreateMsg Rmach_semcreateMsg;
-typedef struct Tmach_semdestroyMsg Tmach_semdestroyMsg;
-// Rmach_semdestroyMsg = CodeMsg
-
-#pragma pack on
-struct Tmach_semcreateMsg
-{
-	MachHeader h;
-	MachNDR ndr;
-	int32 policy;
-	int32 value;
-};
-
-struct Rmach_semcreateMsg
-{
-	MachHeader h;
-	MachBody body;
-	MachPort semaphore;
-};
-
-struct Tmach_semdestroyMsg
-{
-	MachHeader h;
-	MachBody body;
-	MachPort semaphore;
-};
-#pragma pack off
-
-uint32
-runtime·mach_semcreate(void)
-{
-	union {
-		Tmach_semcreateMsg tx;
-		Rmach_semcreateMsg rx;
-		uint8 pad[MinMachMsg];
-	} m;
-	int32 r;
-
-	m.tx.h.msgh_bits = 0;
-	m.tx.h.msgh_size = sizeof(m.tx);
-	m.tx.h.msgh_remote_port = runtime·mach_task_self();
-	m.tx.h.msgh_id = Tmach_semcreate;
-	m.tx.ndr = zerondr;
-
-	m.tx.policy = 0;	// 0 = SYNC_POLICY_FIFO
-	m.tx.value = 0;
-
-	while((r = machcall(&m.tx.h, sizeof m, sizeof(m.rx))) != 0){
-		if(r == KERN_ABORTED)	// interrupted
-			continue;
-		macherror(r, "semaphore_create");
-	}
-	if(m.rx.body.msgh_descriptor_count != 1)
-		unimplemented("mach_semcreate desc count");
-	return m.rx.semaphore.name;
-}
-
-void
-runtime·mach_semdestroy(uint32 sem)
-{
-	union {
-		Tmach_semdestroyMsg tx;
-		uint8 pad[MinMachMsg];
-	} m;
-	int32 r;
-
-	m.tx.h.msgh_bits = MACH_MSGH_BITS_COMPLEX;
-	m.tx.h.msgh_size = sizeof(m.tx);
-	m.tx.h.msgh_remote_port = runtime·mach_task_self();
-	m.tx.h.msgh_id = Tmach_semdestroy;
-	m.tx.body.msgh_descriptor_count = 1;
-	m.tx.semaphore.name = sem;
-	m.tx.semaphore.disposition = MACH_MSG_TYPE_MOVE_SEND;
-	m.tx.semaphore.type = 0;
-
-	while((r = machcall(&m.tx.h, sizeof m, 0)) != 0){
-		if(r == KERN_ABORTED)	// interrupted
-			continue;
-		macherror(r, "semaphore_destroy");
-	}
-}
-
-// The other calls have simple system call traps in sys_darwin_{amd64,386}.s
-int32 runtime·mach_semaphore_wait(uint32 sema);
-int32 runtime·mach_semaphore_timedwait(uint32 sema, uint32 sec, uint32 nsec);
-int32 runtime·mach_semaphore_signal(uint32 sema);
-int32 runtime·mach_semaphore_signal_all(uint32 sema);
-
-static void
-semasleep(void)
-{
-	int32 r, secs, nsecs;
-	int64 ns;
-	
-	ns = (int64)(uint32)g->m->scalararg[0] | (int64)(uint32)g->m->scalararg[1]<<32;
-	g->m->scalararg[0] = 0;
-	g->m->scalararg[1] = 0;
-
-	if(ns >= 0) {
-		secs = runtime·timediv(ns, 1000000000, &nsecs);
-		r = runtime·mach_semaphore_timedwait(g->m->waitsema, secs, nsecs);
-		if(r == KERN_ABORTED || r == KERN_OPERATION_TIMED_OUT) {
-			g->m->scalararg[0] = -1;
-			return;
-		}
-		if(r != 0)
-			macherror(r, "semaphore_wait");
-		g->m->scalararg[0] = 0;
-		return;
-	}
-	while((r = runtime·mach_semaphore_wait(g->m->waitsema)) != 0) {
-		if(r == KERN_ABORTED)	// interrupted
-			continue;
-		macherror(r, "semaphore_wait");
-	}
-	g->m->scalararg[0] = 0;
-	return;
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·semasleep(int64 ns)
-{
-	int32 r;
-	void (*fn)(void);
-
-	g->m->scalararg[0] = (uint32)ns;
-	g->m->scalararg[1] = (uint32)(ns>>32);
-	fn = semasleep;
-	runtime·onM(&fn);
-	r = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-	return r;
-}
-
-static int32 mach_semrelease_errno;
-
-static void
-mach_semrelease_fail(void)
-{
-	macherror(mach_semrelease_errno, "semaphore_signal");
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·mach_semrelease(uint32 sem)
-{
-	int32 r;
-	void (*fn)(void);
-
-	while((r = runtime·mach_semaphore_signal(sem)) != 0) {
-		if(r == KERN_ABORTED)	// interrupted
-			continue;
-		
-		// mach_semrelease must be completely nosplit,
-		// because it is called from Go code.
-		// If we're going to die, start that process on the m stack
-		// to avoid a Go stack split.
-		// Only do that if we're actually running on the g stack.
-		// We might be on the gsignal stack, and if so, onM will abort.
-		// We use the global variable instead of scalararg because
-		// we might be on the gsignal stack, having interrupted a
-		// normal call to onM. It doesn't quite matter, since the
-		// program is about to die, but better to be clean.
-		mach_semrelease_errno = r;
-		fn = mach_semrelease_fail;
-		if(g == g->m->curg)
-			runtime·onM(&fn);
-		else
-			fn();
-	}
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·osyield(void)
-{
-	runtime·usleep(1);
-}
-
-uintptr
-runtime·memlimit(void)
-{
-	// NOTE(rsc): Could use getrlimit here,
-	// like on FreeBSD or Linux, but Darwin doesn't enforce
-	// ulimit -v, so it's unclear why we'd try to stay within
-	// the limit.
-	return 0;
-}
-
-void
-runtime·setsig(int32 i, GoSighandler *fn, bool restart)
-{
-	SigactionT sa;
-		
-	runtime·memclr((byte*)&sa, sizeof sa);
-	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
-	if(restart)
-		sa.sa_flags |= SA_RESTART;
-	sa.sa_mask = ~(uintptr)0;
-	sa.sa_tramp = (void*)runtime·sigtramp;	// runtime·sigtramp's job is to call into real handler
-	*(uintptr*)sa.__sigaction_u = (uintptr)fn;
-	runtime·sigaction(i, &sa, nil);
-}
-
-GoSighandler*
-runtime·getsig(int32 i)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	runtime·sigaction(i, nil, &sa);
-	return *(void**)sa.__sigaction_u;
-}
-
-void
-runtime·signalstack(byte *p, int32 n)
-{
-	StackT st;
-
-	st.ss_sp = (void*)p;
-	st.ss_size = n;
-	st.ss_flags = 0;
-	if(p == nil)
-		st.ss_flags = SS_DISABLE;
-	runtime·sigaltstack(&st, nil);
-}
-
-void
-runtime·unblocksignals(void)
-{
-	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
-}
-
-#pragma textflag NOSPLIT
-int8*
-runtime·signame(int32 sig)
-{
-	return runtime·sigtab[sig].name;
-}
diff --git a/src/runtime/os_darwin.go b/src/runtime/os_darwin.go
index 4327ced..d8296e0 100644
--- a/src/runtime/os_darwin.go
+++ b/src/runtime/os_darwin.go
@@ -6,19 +6,31 @@
 
 import "unsafe"
 
-func bsdthread_create(stk, mm, gg, fn unsafe.Pointer) int32
+func bsdthread_create(stk unsafe.Pointer, mm *m, gg *g, fn uintptr) int32
 func bsdthread_register() int32
+
+//go:noescape
 func mach_msg_trap(h unsafe.Pointer, op int32, send_size, rcv_size, rcv_name, timeout, notify uint32) int32
+
 func mach_reply_port() uint32
 func mach_task_self() uint32
 func mach_thread_self() uint32
+
+//go:noescape
 func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-func sigprocmask(sig int32, new, old unsafe.Pointer)
-func sigaction(mode uint32, new, old unsafe.Pointer)
-func sigaltstack(new, old unsafe.Pointer)
+
+//go:noescape
+func sigprocmask(sig uint32, new, old *uint32)
+
+//go:noescape
+func sigaction(mode uint32, new, old *sigactiont)
+
+//go:noescape
+func sigaltstack(new, old *stackt)
+
 func sigtramp()
-func setitimer(mode int32, new, old unsafe.Pointer)
-func mach_semaphore_wait(sema uint32) int32
-func mach_semaphore_timedwait(sema, sec, nsec uint32) int32
-func mach_semaphore_signal(sema uint32) int32
-func mach_semaphore_signal_all(sema uint32) int32
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+func raise(int32)
diff --git a/src/runtime/os_darwin.h b/src/runtime/os_darwin.h
deleted file mode 100644
index e8bb45d..0000000
--- a/src/runtime/os_darwin.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-typedef byte* kevent_udata;
-
-int32	runtime·bsdthread_create(void*, M*, G*, void(*)(void));
-int32	runtime·bsdthread_register(void);
-int32	runtime·mach_msg_trap(MachHeader*, int32, uint32, uint32, uint32, uint32, uint32);
-uint32	runtime·mach_reply_port(void);
-int32	runtime·mach_semacquire(uint32, int64);
-uint32	runtime·mach_semcreate(void);
-void	runtime·mach_semdestroy(uint32);
-void	runtime·mach_semrelease(uint32);
-void	runtime·mach_semreset(uint32);
-uint32	runtime·mach_task_self(void);
-uint32	runtime·mach_task_self(void);
-uint32	runtime·mach_thread_self(void);
-uint32	runtime·mach_thread_self(void);
-int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
-
-typedef uint32 Sigset;
-void	runtime·sigprocmask(int32, Sigset*, Sigset*);
-void	runtime·unblocksignals(void);
-
-struct SigactionT;
-void	runtime·sigaction(uintptr, struct SigactionT*, struct SigactionT*);
-
-struct StackT;
-void	runtime·sigaltstack(struct StackT*, struct StackT*);
-void	runtime·sigtramp(void);
-void	runtime·sigpanic(void);
-void	runtime·setitimer(int32, Itimerval*, Itimerval*);
-
-
-enum {
-	NSIG = 32,
-	SI_USER = 0, /* empirically true, but not what headers say */
-	SIG_BLOCK = 1,
-	SIG_UNBLOCK = 2,
-	SIG_SETMASK = 3,
-	SS_DISABLE = 4,
-};
diff --git a/src/runtime/os_dragonfly.c b/src/runtime/os_dragonfly.c
deleted file mode 100644
index 051192a..0000000
--- a/src/runtime/os_dragonfly.c
+++ /dev/null
@@ -1,315 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-#include "stack.h"
-#include "textflag.h"
-
-extern SigTab runtime·sigtab[];
-extern int32 runtime·sys_umtx_sleep(uint32*, int32, int32);
-extern int32 runtime·sys_umtx_wakeup(uint32*, int32);
-
-// From DragonFly's <sys/sysctl.h>
-#define	CTL_HW	6
-#define	HW_NCPU	3
-
-static Sigset sigset_none;
-static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
-
-static int32
-getncpu(void)
-{
-	uint32 mib[2];
-	uint32 out;
-	int32 ret;
-	uintptr nout;
-
-	// Fetch hw.ncpu via sysctl.
-	mib[0] = CTL_HW;
-	mib[1] = HW_NCPU;
-	nout = sizeof out;
-	out = 0;
-	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
-	if(ret >= 0)
-		return out;
-	else
-		return 1;
-}
-
-static void futexsleep(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
-{
-	void (*fn)(void);
-
-	g->m->ptrarg[0] = addr;
-	g->m->scalararg[0] = val;
-	g->m->ptrarg[1] = &ns;
-
-	fn = futexsleep;
-	runtime·onM(&fn);
-}
-
-static void
-futexsleep(void)
-{
-	uint32 *addr;
-	uint32 val;
-	int64 ns;
-	int32 timeout = 0;
-	int32 ret;
-
-	addr = g->m->ptrarg[0];
-	val = g->m->scalararg[0];
-	ns = *(int64*)g->m->ptrarg[1];
-	g->m->ptrarg[0] = nil;
-	g->m->scalararg[0] = 0;
-	g->m->ptrarg[1] = nil;
-
-	if(ns >= 0) {
-		// The timeout is specified in microseconds - ensure that we
-		// do not end up dividing to zero, which would put us to sleep
-		// indefinitely...
-		timeout = runtime·timediv(ns, 1000, nil);
-		if(timeout == 0)
-			timeout = 1;
-	}
-
-	// sys_umtx_sleep will return EWOULDBLOCK (EAGAIN) when the timeout
-	// expires or EBUSY if the mutex value does not match. 
-	ret = runtime·sys_umtx_sleep(addr, val, timeout);
-	if(ret >= 0 || ret == -EINTR || ret == -EAGAIN || ret == -EBUSY)
-		return;
-
-	runtime·prints("umtx_wait addr=");
-	runtime·printpointer(addr);
-	runtime·prints(" val=");
-	runtime·printint(val);
-	runtime·prints(" ret=");
-	runtime·printint(ret);
-	runtime·prints("\n");
-	*(int32*)0x1005 = 0x1005;
-}
-
-static void badfutexwakeup(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·futexwakeup(uint32 *addr, uint32 cnt)
-{
-	int32 ret;
-	void (*fn)(void);
-
-	ret = runtime·sys_umtx_wakeup(addr, cnt);
-	if(ret >= 0)
-		return;
-
-	g->m->ptrarg[0] = addr;
-	g->m->scalararg[0] = ret;
-	fn = badfutexwakeup;
-	if(g == g->m->gsignal)
-		fn();
-	else
-		runtime·onM(&fn);
-	*(int32*)0x1006 = 0x1006;
-}
-
-static void
-badfutexwakeup(void)
-{
-	void *addr;
-	int32 ret;
-	
-	addr = g->m->ptrarg[0];
-	ret = g->m->scalararg[0];
-	runtime·printf("umtx_wake addr=%p ret=%d\n", addr, ret);
-}
-
-void runtime·lwp_start(void*);
-
-void
-runtime·newosproc(M *mp, void *stk)
-{
-	Lwpparams params;
-	Sigset oset;
-
-	if(0){
-		runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
-			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
-	}
-
-	runtime·sigprocmask(&sigset_all, &oset);
-	runtime·memclr((byte*)&params, sizeof params);
-
-	params.func = runtime·lwp_start;
-	params.arg = (byte*)mp;
-	params.stack = (byte*)stk;
-	params.tid1 = (int32*)&mp->procid;
-	params.tid2 = nil;
-
-	mp->tls[0] = mp->id;	// so 386 asm can find it
-
-	runtime·lwp_create(&params);
-	runtime·sigprocmask(&oset, nil);
-}
-
-void
-runtime·osinit(void)
-{
-	runtime·ncpu = getncpu();
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·get_random_data(byte **rnd, int32 *rnd_len)
-{
-	#pragma dataflag NOPTR
-	static byte urandom_data[HashRandomBytes];
-	int32 fd;
-	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
-	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
-		*rnd = urandom_data;
-		*rnd_len = HashRandomBytes;
-	} else {
-		*rnd = nil;
-		*rnd_len = 0;
-	}
-	runtime·close(fd);
-}
-
-void
-runtime·goenvs(void)
-{
-	runtime·goenvs_unix();
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime·mpreinit(M *mp)
-{
-	mp->gsignal = runtime·malg(32*1024);
-	runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
-
-	mp->gsignal->m = mp;
-	runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime·minit(void)
-{
-	// Initialize signal handling
-	runtime·signalstack((byte*)g->m->gsignal->stack.lo, 32*1024);
-	runtime·sigprocmask(&sigset_none, nil);
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime·unminit(void)
-{
-	runtime·signalstack(nil, 0);
-}
-
-uintptr
-runtime·memlimit(void)
-{
-	Rlimit rl;
-	extern byte runtime·text[], runtime·end[];
-	uintptr used;
-	
-	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
-		return 0;
-	if(rl.rlim_cur >= 0x7fffffff)
-		return 0;
-
-	// Estimate our VM footprint excluding the heap.
-	// Not an exact science: use size of binary plus
-	// some room for thread stacks.
-	used = runtime·end - runtime·text + (64<<20);
-	if(used >= rl.rlim_cur)
-		return 0;
-
-	// If there's not at least 16 MB left, we're probably
-	// not going to be able to do much.  Treat as no limit.
-	rl.rlim_cur -= used;
-	if(rl.rlim_cur < (16<<20))
-		return 0;
-
-	return rl.rlim_cur - used;
-}
-
-extern void runtime·sigtramp(void);
-
-typedef struct sigaction {
-	union {
-		void    (*__sa_handler)(int32);
-		void    (*__sa_sigaction)(int32, Siginfo*, void *);
-	} __sigaction_u;		/* signal handler */
-	int32	sa_flags;		/* see signal options below */
-	Sigset	sa_mask;		/* signal mask to apply */
-} SigactionT;
-
-void
-runtime·setsig(int32 i, GoSighandler *fn, bool restart)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
-	if(restart)
-		sa.sa_flags |= SA_RESTART;
-	sa.sa_mask.__bits[0] = ~(uint32)0;
-	sa.sa_mask.__bits[1] = ~(uint32)0;
-	sa.sa_mask.__bits[2] = ~(uint32)0;
-	sa.sa_mask.__bits[3] = ~(uint32)0;
-	if(fn == runtime·sighandler)
-		fn = (void*)runtime·sigtramp;
-	sa.__sigaction_u.__sa_sigaction = (void*)fn;
-	runtime·sigaction(i, &sa, nil);
-}
-
-GoSighandler*
-runtime·getsig(int32 i)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	runtime·sigaction(i, nil, &sa);
-	if((void*)sa.__sigaction_u.__sa_sigaction == runtime·sigtramp)
-		return runtime·sighandler;
-	return (void*)sa.__sigaction_u.__sa_sigaction;
-}
-
-void
-runtime·signalstack(byte *p, int32 n)
-{
-	StackT st;
-
-	st.ss_sp = (void*)p;
-	st.ss_size = n;
-	st.ss_flags = 0;
-	if(p == nil)
-		st.ss_flags = SS_DISABLE;
-	runtime·sigaltstack(&st, nil);
-}
-
-void
-runtime·unblocksignals(void)
-{
-	runtime·sigprocmask(&sigset_none, nil);
-}
-
-#pragma textflag NOSPLIT
-int8*
-runtime·signame(int32 sig)
-{
-	return runtime·sigtab[sig].name;
-}
diff --git a/src/runtime/os_dragonfly.go b/src/runtime/os_dragonfly.go
index cdaa069..0e00f87 100644
--- a/src/runtime/os_dragonfly.go
+++ b/src/runtime/os_dragonfly.go
@@ -6,15 +6,35 @@
 
 import "unsafe"
 
-func lwp_create(param unsafe.Pointer) int32
-func sigaltstack(new, old unsafe.Pointer)
-func sigaction(sig int32, new, old unsafe.Pointer)
-func sigprocmask(new, old unsafe.Pointer)
-func setitimer(mode int32, new, old unsafe.Pointer)
+//go:noescape
+func lwp_create(param *lwpparams) int32
+
+//go:noescape
+func sigaltstack(new, old *sigaltstackt)
+
+//go:noescape
+func sigaction(sig int32, new, old *sigactiont)
+
+//go:noescape
+func sigprocmask(new, old *sigset)
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+//go:noescape
 func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+
+//go:noescape
 func getrlimit(kind int32, limit unsafe.Pointer) int32
+
 func raise(sig int32)
-func sys_umtx_sleep(addr unsafe.Pointer, val, timeout int32) int32
-func sys_umtx_wakeup(addr unsafe.Pointer, val int32) int32
+
+//go:noescape
+func sys_umtx_sleep(addr *uint32, val, timeout int32) int32
+
+//go:noescape
+func sys_umtx_wakeup(addr *uint32, val int32) int32
+
+func osyield()
 
 const stackSystem = 0
diff --git a/src/runtime/os_dragonfly.h b/src/runtime/os_dragonfly.h
deleted file mode 100644
index 389736a..0000000
--- a/src/runtime/os_dragonfly.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-
-typedef byte* kevent_udata;
-
-int32	runtime·lwp_create(Lwpparams*);
-void	runtime·sigpanic(void);
-void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
-struct	sigaction;
-void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
-void	runtime·sigprocmask(Sigset *, Sigset *);
-void	runtime·unblocksignals(void);
-void	runtime·setitimer(int32, Itimerval*, Itimerval*);
-int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
-
-enum {
-	NSIG = 33,
-	SI_USER = 0x10001,
-	SS_DISABLE = 4,
-	RLIMIT_AS = 10,
-};
-
-typedef struct Rlimit Rlimit;
-struct Rlimit {
-	int64	rlim_cur;
-	int64	rlim_max;
-};
-int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/runtime/os_freebsd.c b/src/runtime/os_freebsd.c
deleted file mode 100644
index 1c12654..0000000
--- a/src/runtime/os_freebsd.c
+++ /dev/null
@@ -1,323 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-#include "stack.h"
-#include "textflag.h"
-
-extern SigTab runtime·sigtab[];
-extern int32 runtime·sys_umtx_op(uint32*, int32, uint32, void*, void*);
-
-// From FreeBSD's <sys/sysctl.h>
-#define	CTL_HW	6
-#define	HW_NCPU	3
-
-static Sigset sigset_none;
-static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
-
-static int32
-getncpu(void)
-{
-	uint32 mib[2];
-	uint32 out;
-	int32 ret;
-	uintptr nout;
-
-	// Fetch hw.ncpu via sysctl.
-	mib[0] = CTL_HW;
-	mib[1] = HW_NCPU;
-	nout = sizeof out;
-	out = 0;
-	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
-	if(ret >= 0)
-		return out;
-	else
-		return 1;
-}
-
-// FreeBSD's umtx_op syscall is effectively the same as Linux's futex, and
-// thus the code is largely similar. See linux/thread.c and lock_futex.c for comments.
-
-static void futexsleep(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
-{
-	void (*fn)(void);
-
-	g->m->ptrarg[0] = addr;
-	g->m->scalararg[0] = val;
-	g->m->ptrarg[1] = &ns;
-
-	fn = futexsleep;
-	runtime·onM(&fn);
-}
-
-static void
-futexsleep(void)
-{
-	uint32 *addr;
-	uint32 val;
-	int64 ns;
-	int32 ret;
-	Timespec ts;
-	
-	addr = g->m->ptrarg[0];
-	val = g->m->scalararg[0];
-	ns = *(int64*)g->m->ptrarg[1];
-	g->m->ptrarg[0] = nil;
-	g->m->scalararg[0] = 0;
-	g->m->ptrarg[1] = nil;
-
-	if(ns < 0) {
-		ret = runtime·sys_umtx_op(addr, UMTX_OP_WAIT_UINT_PRIVATE, val, nil, nil);
-		if(ret >= 0 || ret == -EINTR)
-			return;
-		goto fail;
-	}
-	// NOTE: tv_nsec is int64 on amd64, so this assumes a little-endian system.
-	ts.tv_nsec = 0;
-	ts.tv_sec = runtime·timediv(ns, 1000000000, (int32*)&ts.tv_nsec);
-	ret = runtime·sys_umtx_op(addr, UMTX_OP_WAIT_UINT_PRIVATE, val, nil, &ts);
-	if(ret >= 0 || ret == -EINTR)
-		return;
-
-fail:
-	runtime·prints("umtx_wait addr=");
-	runtime·printpointer(addr);
-	runtime·prints(" val=");
-	runtime·printint(val);
-	runtime·prints(" ret=");
-	runtime·printint(ret);
-	runtime·prints("\n");
-	*(int32*)0x1005 = 0x1005;
-}
-
-static void badfutexwakeup(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·futexwakeup(uint32 *addr, uint32 cnt)
-{
-	int32 ret;
-	void (*fn)(void);
-
-	ret = runtime·sys_umtx_op(addr, UMTX_OP_WAKE_PRIVATE, cnt, nil, nil);
-	if(ret >= 0)
-		return;
-
-	g->m->ptrarg[0] = addr;
-	g->m->scalararg[0] = ret;
-	fn = badfutexwakeup;
-	if(g == g->m->gsignal)
-		fn();
-	else
-		runtime·onM(&fn);
-	*(int32*)0x1006 = 0x1006;
-}
-
-static void
-badfutexwakeup(void)
-{
-	void *addr;
-	int32 ret;
-	
-	addr = g->m->ptrarg[0];
-	ret = g->m->scalararg[0];
-	runtime·printf("umtx_wake addr=%p ret=%d\n", addr, ret);
-}
-
-void runtime·thr_start(void*);
-
-void
-runtime·newosproc(M *mp, void *stk)
-{
-	ThrParam param;
-	Sigset oset;
-
-	if(0){
-		runtime·printf("newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
-			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
-	}
-
-	runtime·sigprocmask(&sigset_all, &oset);
-	runtime·memclr((byte*)&param, sizeof param);
-
-	param.start_func = runtime·thr_start;
-	param.arg = (byte*)mp;
-	
-	// NOTE(rsc): This code is confused. stackbase is the top of the stack
-	// and is equal to stk. However, it's working, so I'm not changing it.
-	param.stack_base = (void*)mp->g0->stack.hi;
-	param.stack_size = (byte*)stk - (byte*)mp->g0->stack.hi;
-
-	param.child_tid = (void*)&mp->procid;
-	param.parent_tid = nil;
-	param.tls_base = (void*)&mp->tls[0];
-	param.tls_size = sizeof mp->tls;
-
-	mp->tls[0] = mp->id;	// so 386 asm can find it
-
-	runtime·thr_new(&param, sizeof param);
-	runtime·sigprocmask(&oset, nil);
-}
-
-void
-runtime·osinit(void)
-{
-	runtime·ncpu = getncpu();
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·get_random_data(byte **rnd, int32 *rnd_len)
-{
-	#pragma dataflag NOPTR
-	static byte urandom_data[HashRandomBytes];
-	int32 fd;
-	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
-	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
-		*rnd = urandom_data;
-		*rnd_len = HashRandomBytes;
-	} else {
-		*rnd = nil;
-		*rnd_len = 0;
-	}
-	runtime·close(fd);
-}
-
-void
-runtime·goenvs(void)
-{
-	runtime·goenvs_unix();
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime·mpreinit(M *mp)
-{
-	mp->gsignal = runtime·malg(32*1024);
-	runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
-
-	mp->gsignal->m = mp;
-	runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime·minit(void)
-{
-	// Initialize signal handling
-	runtime·signalstack((byte*)g->m->gsignal->stack.lo, 32*1024);
-	runtime·sigprocmask(&sigset_none, nil);
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime·unminit(void)
-{
-	runtime·signalstack(nil, 0);
-}
-
-uintptr
-runtime·memlimit(void)
-{
-	Rlimit rl;
-	extern byte runtime·text[], runtime·end[];
-	uintptr used;
-	
-	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
-		return 0;
-	if(rl.rlim_cur >= 0x7fffffff)
-		return 0;
-
-	// Estimate our VM footprint excluding the heap.
-	// Not an exact science: use size of binary plus
-	// some room for thread stacks.
-	used = runtime·end - runtime·text + (64<<20);
-	if(used >= rl.rlim_cur)
-		return 0;
-
-	// If there's not at least 16 MB left, we're probably
-	// not going to be able to do much.  Treat as no limit.
-	rl.rlim_cur -= used;
-	if(rl.rlim_cur < (16<<20))
-		return 0;
-
-	return rl.rlim_cur - used;
-}
-
-extern void runtime·sigtramp(void);
-
-typedef struct sigaction {
-	union {
-		void    (*__sa_handler)(int32);
-		void    (*__sa_sigaction)(int32, Siginfo*, void *);
-	} __sigaction_u;		/* signal handler */
-	int32	sa_flags;		/* see signal options below */
-	Sigset	sa_mask;		/* signal mask to apply */
-} SigactionT;
-
-void
-runtime·setsig(int32 i, GoSighandler *fn, bool restart)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
-	if(restart)
-		sa.sa_flags |= SA_RESTART;
-	sa.sa_mask.__bits[0] = ~(uint32)0;
-	sa.sa_mask.__bits[1] = ~(uint32)0;
-	sa.sa_mask.__bits[2] = ~(uint32)0;
-	sa.sa_mask.__bits[3] = ~(uint32)0;
-	if(fn == runtime·sighandler)
-		fn = (void*)runtime·sigtramp;
-	sa.__sigaction_u.__sa_sigaction = (void*)fn;
-	runtime·sigaction(i, &sa, nil);
-}
-
-GoSighandler*
-runtime·getsig(int32 i)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	runtime·sigaction(i, nil, &sa);
-	if((void*)sa.__sigaction_u.__sa_sigaction == runtime·sigtramp)
-		return runtime·sighandler;
-	return (void*)sa.__sigaction_u.__sa_sigaction;
-}
-
-void
-runtime·signalstack(byte *p, int32 n)
-{
-	StackT st;
-
-	st.ss_sp = (void*)p;
-	st.ss_size = n;
-	st.ss_flags = 0;
-	if(p == nil)
-		st.ss_flags = SS_DISABLE;
-	runtime·sigaltstack(&st, nil);
-}
-
-void
-runtime·unblocksignals(void)
-{
-	runtime·sigprocmask(&sigset_none, nil);
-}
-
-#pragma textflag NOSPLIT
-int8*
-runtime·signame(int32 sig)
-{
-	return runtime·sigtab[sig].name;
-}
diff --git a/src/runtime/os_freebsd.go b/src/runtime/os_freebsd.go
index 5970804..998fbca 100644
--- a/src/runtime/os_freebsd.go
+++ b/src/runtime/os_freebsd.go
@@ -6,12 +6,29 @@
 
 import "unsafe"
 
-func thr_new(param unsafe.Pointer, size int32)
-func sigaltstack(new, old unsafe.Pointer)
-func sigaction(sig int32, new, old unsafe.Pointer)
-func sigprocmask(new, old unsafe.Pointer)
-func setitimer(mode int32, new, old unsafe.Pointer)
+//go:noescape
+func thr_new(param *thrparam, size int32)
+
+//go:noescape
+func sigaltstack(new, old *stackt)
+
+//go:noescape
+func sigaction(sig int32, new, old *sigactiont)
+
+//go:noescape
+func sigprocmask(new, old *sigset)
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+//go:noescape
 func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+
+//go:noescape
 func getrlimit(kind int32, limit unsafe.Pointer) int32
 func raise(sig int32)
-func sys_umtx_op(addr unsafe.Pointer, mode int32, val uint32, ptr2, ts unsafe.Pointer) int32
+
+//go:noescape
+func sys_umtx_op(addr *uint32, mode int32, val uint32, ptr2, ts *timespec) int32
+
+func osyield()
diff --git a/src/runtime/os_freebsd.h b/src/runtime/os_freebsd.h
deleted file mode 100644
index b86bb39..0000000
--- a/src/runtime/os_freebsd.h
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-typedef byte* kevent_udata;
-
-int32	runtime·thr_new(ThrParam*, int32);
-void	runtime·sigpanic(void);
-void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
-struct	sigaction;
-void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
-void	runtime·sigprocmask(Sigset *, Sigset *);
-void	runtime·unblocksignals(void);
-void	runtime·setitimer(int32, Itimerval*, Itimerval*);
-int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
-
-enum {
-	SS_DISABLE = 4,
-	NSIG = 33,
-	SI_USER = 0x10001,
-	RLIMIT_AS = 10,
-};
-
-typedef struct Rlimit Rlimit;
-struct Rlimit {
-	int64	rlim_cur;
-	int64	rlim_max;
-};
-int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/runtime/os_freebsd_arm.c b/src/runtime/os_freebsd_arm.go
similarity index 66%
rename from src/runtime/os_freebsd_arm.c
rename to src/runtime/os_freebsd_arm.go
index 2f2d776..e049cbf 100644
--- a/src/runtime/os_freebsd_arm.c
+++ b/src/runtime/os_freebsd_arm.go
@@ -2,23 +2,16 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "textflag.h"
+package runtime
 
-void
-runtime·checkgoarm(void)
-{
+func checkgoarm() {
 	// TODO(minux)
 }
 
-#pragma textflag NOSPLIT
-int64
-runtime·cputicks(void)
-{
+//go:nosplit
+func cputicks() int64 {
 	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
 	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
 	// TODO: need more entropy to better seed fastrand1.
-	return runtime·nanotime();
+	return nanotime()
 }
diff --git a/src/runtime/os_linux.c b/src/runtime/os_linux.c
deleted file mode 100644
index cc23774..0000000
--- a/src/runtime/os_linux.c
+++ /dev/null
@@ -1,362 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-#include "stack.h"
-#include "textflag.h"
-
-extern SigTab runtime·sigtab[];
-
-static Sigset sigset_none;
-static Sigset sigset_all = { ~(uint32)0, ~(uint32)0 };
-
-// Linux futex.
-//
-//	futexsleep(uint32 *addr, uint32 val)
-//	futexwakeup(uint32 *addr)
-//
-// Futexsleep atomically checks if *addr == val and if so, sleeps on addr.
-// Futexwakeup wakes up threads sleeping on addr.
-// Futexsleep is allowed to wake up spuriously.
-
-enum
-{
-	FUTEX_WAIT = 0,
-	FUTEX_WAKE = 1,
-};
-
-// Atomically,
-//	if(*addr == val) sleep
-// Might be woken up spuriously; that's allowed.
-// Don't sleep longer than ns; ns < 0 means forever.
-#pragma textflag NOSPLIT
-void
-runtime·futexsleep(uint32 *addr, uint32 val, int64 ns)
-{
-	Timespec ts;
-
-	// Some Linux kernels have a bug where futex of
-	// FUTEX_WAIT returns an internal error code
-	// as an errno.  Libpthread ignores the return value
-	// here, and so can we: as it says a few lines up,
-	// spurious wakeups are allowed.
-
-	if(ns < 0) {
-		runtime·futex(addr, FUTEX_WAIT, val, nil, nil, 0);
-		return;
-	}
-
-	// It's difficult to live within the no-split stack limits here.
-	// On ARM and 386, a 64-bit divide invokes a general software routine
-	// that needs more stack than we can afford. So we use timediv instead.
-	// But on real 64-bit systems, where words are larger but the stack limit
-	// is not, even timediv is too heavy, and we really need to use just an
-	// ordinary machine instruction.
-	// Sorry for the #ifdef.
-	// For what it's worth, the #ifdef eliminated an implicit little-endian assumption.
-#ifdef _64BIT
-	ts.tv_sec = ns / 1000000000LL;
-	ts.tv_nsec = ns % 1000000000LL;
-#else
-	ts.tv_nsec = 0;
-	ts.tv_sec = runtime·timediv(ns, 1000000000LL, (int32*)&ts.tv_nsec);
-#endif
-	runtime·futex(addr, FUTEX_WAIT, val, &ts, nil, 0);
-}
-
-static void badfutexwakeup(void);
-
-// If any procs are sleeping on addr, wake up at most cnt.
-#pragma textflag NOSPLIT
-void
-runtime·futexwakeup(uint32 *addr, uint32 cnt)
-{
-	int64 ret;
-	void (*fn)(void);
-
-	ret = runtime·futex(addr, FUTEX_WAKE, cnt, nil, nil, 0);
-	if(ret >= 0)
-		return;
-
-	// I don't know that futex wakeup can return
-	// EAGAIN or EINTR, but if it does, it would be
-	// safe to loop and call futex again.
-	g->m->ptrarg[0] = addr;
-	g->m->scalararg[0] = (int32)ret; // truncated but fine
-	fn = badfutexwakeup;
-	if(g == g->m->gsignal)
-		fn();
-	else
-		runtime·onM(&fn);
-	*(int32*)0x1006 = 0x1006;
-}
-
-static void
-badfutexwakeup(void)
-{
-	void *addr;
-	int64 ret;
-	
-	addr = g->m->ptrarg[0];
-	ret = (int32)g->m->scalararg[0];
-	runtime·printf("futexwakeup addr=%p returned %D\n", addr, ret);
-}
-
-extern runtime·sched_getaffinity(uintptr pid, uintptr len, uintptr *buf);
-static int32
-getproccount(void)
-{
-	uintptr buf[16], t;
-	int32 r, n, i;
-
-	r = runtime·sched_getaffinity(0, sizeof(buf), buf);
-	if(r <= 0)
-		return 1;
-	n = 0;
-	for(i = 0; i < r/sizeof(buf[0]); i++) {
-		t = buf[i];
-		while(t != 0) {
-			n += t&1;
-			t >>= 1;
-		}
-	}
-	if(n < 1)
-		n = 1;
-	return n;
-}
-
-// Clone, the Linux rfork.
-enum
-{
-	CLONE_VM = 0x100,
-	CLONE_FS = 0x200,
-	CLONE_FILES = 0x400,
-	CLONE_SIGHAND = 0x800,
-	CLONE_PTRACE = 0x2000,
-	CLONE_VFORK = 0x4000,
-	CLONE_PARENT = 0x8000,
-	CLONE_THREAD = 0x10000,
-	CLONE_NEWNS = 0x20000,
-	CLONE_SYSVSEM = 0x40000,
-	CLONE_SETTLS = 0x80000,
-	CLONE_PARENT_SETTID = 0x100000,
-	CLONE_CHILD_CLEARTID = 0x200000,
-	CLONE_UNTRACED = 0x800000,
-	CLONE_CHILD_SETTID = 0x1000000,
-	CLONE_STOPPED = 0x2000000,
-	CLONE_NEWUTS = 0x4000000,
-	CLONE_NEWIPC = 0x8000000,
-};
-
-void
-runtime·newosproc(M *mp, void *stk)
-{
-	int32 ret;
-	int32 flags;
-	Sigset oset;
-
-	/*
-	 * note: strace gets confused if we use CLONE_PTRACE here.
-	 */
-	flags = CLONE_VM	/* share memory */
-		| CLONE_FS	/* share cwd, etc */
-		| CLONE_FILES	/* share fd table */
-		| CLONE_SIGHAND	/* share sig handler table */
-		| CLONE_THREAD	/* revisit - okay for now */
-		;
-
-	mp->tls[0] = mp->id;	// so 386 asm can find it
-	if(0){
-		runtime·printf("newosproc stk=%p m=%p g=%p clone=%p id=%d/%d ostk=%p\n",
-			stk, mp, mp->g0, runtime·clone, mp->id, (int32)mp->tls[0], &mp);
-	}
-
-	// Disable signals during clone, so that the new thread starts
-	// with signals disabled.  It will enable them in minit.
-	runtime·rtsigprocmask(SIG_SETMASK, &sigset_all, &oset, sizeof oset);
-	ret = runtime·clone(flags, stk, mp, mp->g0, runtime·mstart);
-	runtime·rtsigprocmask(SIG_SETMASK, &oset, nil, sizeof oset);
-
-	if(ret < 0) {
-		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), -ret);
-		runtime·throw("runtime.newosproc");
-	}
-}
-
-void
-runtime·osinit(void)
-{
-	runtime·ncpu = getproccount();
-}
-
-// Random bytes initialized at startup.  These come
-// from the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.c).
-byte*	runtime·startup_random_data;
-uint32	runtime·startup_random_data_len;
-
-#pragma textflag NOSPLIT
-void
-runtime·get_random_data(byte **rnd, int32 *rnd_len)
-{
-	if(runtime·startup_random_data != nil) {
-		*rnd = runtime·startup_random_data;
-		*rnd_len = runtime·startup_random_data_len;
-	} else {
-		#pragma dataflag NOPTR
-		static byte urandom_data[HashRandomBytes];
-		int32 fd;
-		fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
-		if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
-			*rnd = urandom_data;
-			*rnd_len = HashRandomBytes;
-		} else {
-			*rnd = nil;
-			*rnd_len = 0;
-		}
-		runtime·close(fd);
-	}
-}
-
-void
-runtime·goenvs(void)
-{
-	runtime·goenvs_unix();
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime·mpreinit(M *mp)
-{
-	mp->gsignal = runtime·malg(32*1024);	// OS X wants >=8K, Linux >=2K
-	runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
-
-	mp->gsignal->m = mp;
-	runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime·minit(void)
-{
-	// Initialize signal handling.
-	runtime·signalstack((byte*)g->m->gsignal->stack.lo, 32*1024);
-	runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof(Sigset));
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime·unminit(void)
-{
-	runtime·signalstack(nil, 0);
-}
-
-uintptr
-runtime·memlimit(void)
-{
-	Rlimit rl;
-	extern byte runtime·text[], runtime·end[];
-	uintptr used;
-
-	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
-		return 0;
-	if(rl.rlim_cur >= 0x7fffffff)
-		return 0;
-
-	// Estimate our VM footprint excluding the heap.
-	// Not an exact science: use size of binary plus
-	// some room for thread stacks.
-	used = runtime·end - runtime·text + (64<<20);
-	if(used >= rl.rlim_cur)
-		return 0;
-
-	// If there's not at least 16 MB left, we're probably
-	// not going to be able to do much.  Treat as no limit.
-	rl.rlim_cur -= used;
-	if(rl.rlim_cur < (16<<20))
-		return 0;
-
-	return rl.rlim_cur - used;
-}
-
-#ifdef GOARCH_386
-#define sa_handler k_sa_handler
-#endif
-
-/*
- * This assembler routine takes the args from registers, puts them on the stack,
- * and calls sighandler().
- */
-extern void runtime·sigtramp(void);
-extern void runtime·sigreturn(void);	// calls rt_sigreturn, only used with SA_RESTORER
-
-void
-runtime·setsig(int32 i, GoSighandler *fn, bool restart)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	sa.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTORER;
-	if(restart)
-		sa.sa_flags |= SA_RESTART;
-	sa.sa_mask = ~0ULL;
-	// Although Linux manpage says "sa_restorer element is obsolete and
-	// should not be used". x86_64 kernel requires it. Only use it on
-	// x86.
-#ifdef GOARCH_386
-	sa.sa_restorer = (void*)runtime·sigreturn;
-#endif
-#ifdef GOARCH_amd64
-	sa.sa_restorer = (void*)runtime·sigreturn;
-#endif
-	if(fn == runtime·sighandler)
-		fn = (void*)runtime·sigtramp;
-	sa.sa_handler = fn;
-	// Qemu rejects rt_sigaction of SIGRTMAX (64).
-	if(runtime·rt_sigaction(i, &sa, nil, sizeof(sa.sa_mask)) != 0 && i != 64)
-		runtime·throw("rt_sigaction failure");
-}
-
-GoSighandler*
-runtime·getsig(int32 i)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	if(runtime·rt_sigaction(i, nil, &sa, sizeof(sa.sa_mask)) != 0)
-		runtime·throw("rt_sigaction read failure");
-	if((void*)sa.sa_handler == runtime·sigtramp)
-		return runtime·sighandler;
-	return (void*)sa.sa_handler;
-}
-
-void
-runtime·signalstack(byte *p, int32 n)
-{
-	SigaltstackT st;
-
-	st.ss_sp = p;
-	st.ss_size = n;
-	st.ss_flags = 0;
-	if(p == nil)
-		st.ss_flags = SS_DISABLE;
-	runtime·sigaltstack(&st, nil);
-}
-
-void
-runtime·unblocksignals(void)
-{
-	runtime·rtsigprocmask(SIG_SETMASK, &sigset_none, nil, sizeof sigset_none);
-}
-
-#pragma textflag NOSPLIT
-int8*
-runtime·signame(int32 sig)
-{
-	return runtime·sigtab[sig].name;
-}
diff --git a/src/runtime/os_linux.go b/src/runtime/os_linux.go
index 41123ad..113219a 100644
--- a/src/runtime/os_linux.go
+++ b/src/runtime/os_linux.go
@@ -6,12 +6,28 @@
 
 import "unsafe"
 
+//go:noescape
 func futex(addr unsafe.Pointer, op int32, val uint32, ts, addr2 unsafe.Pointer, val3 uint32) int32
+
+//go:noescape
 func clone(flags int32, stk, mm, gg, fn unsafe.Pointer) int32
-func rt_sigaction(sig uintptr, new, old unsafe.Pointer, size uintptr) int32
-func sigaltstack(new, old unsafe.Pointer)
-func setitimer(mode int32, new, old unsafe.Pointer)
-func rtsigprocmask(sig int32, new, old unsafe.Pointer, size int32)
+
+//go:noescape
+func rt_sigaction(sig uintptr, new, old *sigactiont, size uintptr) int32
+
+//go:noescape
+func sigaltstack(new, old *sigaltstackt)
+
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
+
+//go:noescape
+func rtsigprocmask(sig uint32, new, old *sigset, size int32)
+
+//go:noescape
 func getrlimit(kind int32, limit unsafe.Pointer) int32
-func raise(sig int32)
+func raise(sig uint32)
+
+//go:noescape
 func sched_getaffinity(pid, len uintptr, buf *uintptr) int32
+func osyield()
diff --git a/src/runtime/os_linux.h b/src/runtime/os_linux.h
deleted file mode 100644
index 75606d6..0000000
--- a/src/runtime/os_linux.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-
-// Linux-specific system calls
-int32	runtime·futex(uint32*, int32, uint32, Timespec*, uint32*, uint32);
-int32	runtime·clone(int32, void*, M*, G*, void(*)(void));
-
-struct SigactionT;
-int32	runtime·rt_sigaction(uintptr, struct SigactionT*, void*, uintptr);
-
-void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
-void	runtime·sigpanic(void);
-void runtime·setitimer(int32, Itimerval*, Itimerval*);
-
-enum {
-	SS_DISABLE = 2,
-	NSIG = 65,
-	SI_USER = 0,
-	SIG_SETMASK = 2,
-	RLIMIT_AS = 9,
-};
-
-// It's hard to tease out exactly how big a Sigset is, but
-// rt_sigprocmask crashes if we get it wrong, so if binaries
-// are running, this is right.
-typedef struct Sigset Sigset;
-struct Sigset
-{
-	uint32 mask[2];
-};
-void	runtime·rtsigprocmask(int32, Sigset*, Sigset*, int32);
-void	runtime·unblocksignals(void);
-
-typedef struct Rlimit Rlimit;
-struct Rlimit {
-	uintptr	rlim_cur;
-	uintptr	rlim_max;
-};
-int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/runtime/os_linux_386.c b/src/runtime/os_linux_386.c
deleted file mode 100644
index dc89d04..0000000
--- a/src/runtime/os_linux_386.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "textflag.h"
-
-#define AT_NULL		0
-#define AT_RANDOM	25
-#define AT_SYSINFO	32
-extern uint32 runtime·_vdso;
-
-#pragma textflag NOSPLIT
-void
-runtime·linux_setup_vdso(int32 argc, byte **argv)
-{
-	byte **envp;
-	uint32 *auxv;
-
-	// skip envp to get to ELF auxiliary vector.
-	for(envp = &argv[argc+1]; *envp != nil; envp++)
-		;
-	envp++;
-	
-	for(auxv=(uint32*)envp; auxv[0] != AT_NULL; auxv += 2) {
-		if(auxv[0] == AT_SYSINFO) {
-			runtime·_vdso = auxv[1];
-			continue;
-		}
-		if(auxv[0] == AT_RANDOM) {
-			runtime·startup_random_data = (byte*)auxv[1];
-			runtime·startup_random_data_len = 16;
-			continue;
-		}
-	}
-}
diff --git a/src/runtime/os_linux_386.go b/src/runtime/os_linux_386.go
new file mode 100644
index 0000000..adcd5a1
--- /dev/null
+++ b/src/runtime/os_linux_386.go
@@ -0,0 +1,36 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_AT_NULL    = 0
+	_AT_RANDOM  = 25
+	_AT_SYSINFO = 32
+)
+
+var _vdso uint32
+
+func sysargs(argc int32, argv **byte) {
+	// skip over argv, envv to get to auxv
+	n := argc + 1
+	for argv_index(argv, n) != nil {
+		n++
+	}
+	n++
+	auxv := (*[1 << 28]uint32)(add(unsafe.Pointer(argv), uintptr(n)*ptrSize))
+
+	for i := 0; auxv[i] != _AT_NULL; i += 2 {
+		switch auxv[i] {
+		case _AT_SYSINFO:
+			_vdso = auxv[i+1]
+
+		case _AT_RANDOM:
+			startup_random_data = (*byte)(unsafe.Pointer(uintptr(auxv[i+1])))
+			startup_random_data_len = 16
+		}
+	}
+}
diff --git a/src/runtime/os_linux_arm.c b/src/runtime/os_linux_arm.c
deleted file mode 100644
index e3eda7c..0000000
--- a/src/runtime/os_linux_arm.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "textflag.h"
-
-#define AT_NULL		0
-#define AT_PLATFORM	15 // introduced in at least 2.6.11
-#define AT_HWCAP	16 // introduced in at least 2.6.11
-#define AT_RANDOM	25 // introduced in 2.6.29
-#define HWCAP_VFP	(1 << 6) // introduced in at least 2.6.11
-#define HWCAP_VFPv3	(1 << 13) // introduced in 2.6.30
-static uint32 runtime·randomNumber;
-uint8  runtime·armArch = 6;	// we default to ARMv6
-uint32 runtime·hwcap;	// set by setup_auxv
-extern uint8  runtime·goarm;	// set by 5l
-
-void
-runtime·checkgoarm(void)
-{
-	if(runtime·goarm > 5 && !(runtime·hwcap & HWCAP_VFP)) {
-		runtime·printf("runtime: this CPU has no floating point hardware, so it cannot run\n");
-		runtime·printf("this GOARM=%d binary. Recompile using GOARM=5.\n", runtime·goarm);
-		runtime·exit(1);
-	}
-	if(runtime·goarm > 6 && !(runtime·hwcap & HWCAP_VFPv3)) {
-		runtime·printf("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n");
-		runtime·printf("this GOARM=%d binary. Recompile using GOARM=6.\n", runtime·goarm);
-		runtime·exit(1);
-	}
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·setup_auxv(int32 argc, byte **argv)
-{
-	byte **envp;
-	byte *rnd;
-	uint32 *auxv;
-	uint32 t;
-
-	// skip envp to get to ELF auxiliary vector.
-	for(envp = &argv[argc+1]; *envp != nil; envp++)
-		;
-	envp++;
-	
-	for(auxv=(uint32*)envp; auxv[0] != AT_NULL; auxv += 2) {
-		switch(auxv[0]) {
-		case AT_RANDOM: // kernel provided 16-byte worth of random data
-			if(auxv[1]) {
-				rnd = (byte*)auxv[1];
-				runtime·randomNumber = rnd[4] | rnd[5]<<8 | rnd[6]<<16 | rnd[7]<<24;
-			}
-			break;
-		case AT_PLATFORM: // v5l, v6l, v7l
-			if(auxv[1]) {
-				t = *(uint8*)(auxv[1]+1);
-				if(t >= '5' && t <= '7')
-					runtime·armArch = t - '0';
-			}
-			break;
-		case AT_HWCAP: // CPU capability bit flags
-			runtime·hwcap = auxv[1];
-			break;
-		}
-	}
-}
-
-#pragma textflag NOSPLIT
-int64
-runtime·cputicks(void)
-{
-	// Currently cputicks() is used in blocking profiler and to seed runtime·fastrand1().
-	// runtime·nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
-	// runtime·randomNumber provides better seeding of fastrand1.
-	return runtime·nanotime() + runtime·randomNumber;
-}
diff --git a/src/runtime/os_linux_arm.go b/src/runtime/os_linux_arm.go
new file mode 100644
index 0000000..9b0ade6
--- /dev/null
+++ b/src/runtime/os_linux_arm.go
@@ -0,0 +1,75 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_AT_NULL     = 0
+	_AT_PLATFORM = 15 //  introduced in at least 2.6.11
+	_AT_HWCAP    = 16 // introduced in at least 2.6.11
+	_AT_RANDOM   = 25 // introduced in 2.6.29
+
+	_HWCAP_VFP   = 1 << 6  // introduced in at least 2.6.11
+	_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
+)
+
+var randomNumber uint32
+var armArch uint8 = 6 // we default to ARMv6
+var hwcap uint32      // set by setup_auxv
+var goarm uint8       // set by 5l
+
+func checkgoarm() {
+	if goarm > 5 && hwcap&_HWCAP_VFP == 0 {
+		print("runtime: this CPU has no floating point hardware, so it cannot run\n")
+		print("this GOARM=", goarm, " binary. Recompile using GOARM=5.\n")
+		exit(1)
+	}
+	if goarm > 6 && hwcap&_HWCAP_VFPv3 == 0 {
+		print("runtime: this CPU has no VFPv3 floating point hardware, so it cannot run\n")
+		print("this GOARM=", goarm, " binary. Recompile using GOARM=5.\n")
+		exit(1)
+	}
+}
+
+//go:nosplit
+func setup_auxv(argc int32, argv **byte) {
+	// skip over argv, envv to get to auxv
+	n := argc + 1
+	for argv_index(argv, n) != nil {
+		n++
+	}
+	n++
+	auxv := (*[1 << 28]uint32)(add(unsafe.Pointer(argv), uintptr(n)*ptrSize))
+
+	for i := 0; auxv[i] != _AT_NULL; i += 2 {
+		switch auxv[i] {
+		case _AT_RANDOM: // kernel provided 16-byte worth of random data
+			if auxv[i+1] != 0 {
+				randomNumber = *(*uint32)(unsafe.Pointer(uintptr(auxv[i+1])))
+			}
+
+		case _AT_PLATFORM: // v5l, v6l, v7l
+			t := *(*uint8)(unsafe.Pointer(uintptr(auxv[i+1] + 1)))
+			if '5' <= t && t <= '7' {
+				armArch = t - '0'
+			}
+
+		case _AT_HWCAP: // CPU capability bit flags
+			hwcap = auxv[i+1]
+		}
+	}
+}
+
+func cputicks() int64 {
+	// Currently cputicks() is used in blocking profiler and to seed fastrand1().
+	// nanotime() is a poor approximation of CPU ticks that is enough for the profiler.
+	// randomNumber provides better seeding of fastrand1.
+	return nanotime() + int64(randomNumber)
+}
diff --git a/src/runtime/os_openbsd.c b/src/runtime/os_openbsd.c
deleted file mode 100644
index 960aaff..0000000
--- a/src/runtime/os_openbsd.c
+++ /dev/null
@@ -1,312 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-#include "stack.h"
-#include "textflag.h"
-
-enum
-{
-	ESRCH = 3,
-	EAGAIN = 35,
-	EWOULDBLOCK = EAGAIN,
-	ENOTSUP = 91,
-
-	// From OpenBSD's sys/time.h
-	CLOCK_REALTIME = 0,
-	CLOCK_VIRTUAL = 1,
-	CLOCK_PROF = 2,
-	CLOCK_MONOTONIC = 3
-};
-
-extern SigTab runtime·sigtab[];
-
-static Sigset sigset_none;
-static Sigset sigset_all = ~(Sigset)0;
-
-extern int32 runtime·tfork(TforkT *param, uintptr psize, M *mp, G *gp, void (*fn)(void));
-extern int32 runtime·thrsleep(void *ident, int32 clock_id, void *tsp, void *lock, const int32 *abort);
-extern int32 runtime·thrwakeup(void *ident, int32 n);
-
-// From OpenBSD's <sys/sysctl.h>
-#define	CTL_HW	6
-#define	HW_NCPU	3
-
-static int32
-getncpu(void)
-{
-	uint32 mib[2];
-	uint32 out;
-	int32 ret;
-	uintptr nout;
-
-	// Fetch hw.ncpu via sysctl.
-	mib[0] = CTL_HW;
-	mib[1] = HW_NCPU;
-	nout = sizeof out;
-	out = 0;
-	ret = runtime·sysctl(mib, 2, (byte*)&out, &nout, nil, 0);
-	if(ret >= 0)
-		return out;
-	else
-		return 1;
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·semacreate(void)
-{
-	return 1;
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·semasleep(int64 ns)
-{
-	Timespec ts, *tsp = nil;
-
-	// Compute sleep deadline.
-	if(ns >= 0) {
-		int32 nsec;
-		ns += runtime·nanotime();
-		ts.tv_sec = runtime·timediv(ns, 1000000000, &nsec);
-		ts.tv_nsec = nsec; // tv_nsec is int64 on amd64
-		tsp = &ts;
-	}
-
-	for(;;) {
-		int32 ret;
-
-		// spin-mutex lock
-		while(runtime·xchg(&g->m->waitsemalock, 1))
-			runtime·osyield();
-
-		if(g->m->waitsemacount != 0) {
-			// semaphore is available.
-			g->m->waitsemacount--;
-			// spin-mutex unlock
-			runtime·atomicstore(&g->m->waitsemalock, 0);
-			return 0;  // semaphore acquired
-		}
-
-		// sleep until semaphore != 0 or timeout.
-		// thrsleep unlocks m->waitsemalock.
-		ret = runtime·thrsleep(&g->m->waitsemacount, CLOCK_MONOTONIC, tsp, &g->m->waitsemalock, (int32 *)&g->m->waitsemacount);
-		if(ret == EWOULDBLOCK)
-			return -1;
-	}
-}
-
-static void badsemawakeup(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·semawakeup(M *mp)
-{
-	uint32 ret;
-	void *oldptr;
-	uint32 oldscalar;
-	void (*fn)(void);
-
-	// spin-mutex lock
-	while(runtime·xchg(&mp->waitsemalock, 1))
-		runtime·osyield();
-	mp->waitsemacount++;
-	ret = runtime·thrwakeup(&mp->waitsemacount, 1);
-	if(ret != 0 && ret != ESRCH) {
-		// semawakeup can be called on signal stack.
-		// Save old ptrarg/scalararg so we can restore them.
-		oldptr = g->m->ptrarg[0];
-		oldscalar = g->m->scalararg[0];
-		g->m->ptrarg[0] = mp;
-		g->m->scalararg[0] = ret;
-		fn = badsemawakeup;
-		if(g == g->m->gsignal)
-			fn();
-		else
-			runtime·onM(&fn);
-		g->m->ptrarg[0] = oldptr;
-		g->m->scalararg[0] = oldscalar;
-	}
-	// spin-mutex unlock
-	runtime·atomicstore(&mp->waitsemalock, 0);
-}
-
-static void
-badsemawakeup(void)
-{
-	M *mp;
-	int32 ret;
-
-	mp = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	ret = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-
-	runtime·printf("thrwakeup addr=%p sem=%d ret=%d\n", &mp->waitsemacount, mp->waitsemacount, ret);
-}
-
-void
-runtime·newosproc(M *mp, void *stk)
-{
-	TforkT param;
-	Sigset oset;
-	int32 ret;
-
-	if(0) {
-		runtime·printf(
-			"newosproc stk=%p m=%p g=%p id=%d/%d ostk=%p\n",
-			stk, mp, mp->g0, mp->id, (int32)mp->tls[0], &mp);
-	}
-
-	mp->tls[0] = mp->id;	// so 386 asm can find it
-
-	param.tf_tcb = (byte*)&mp->tls[0];
-	param.tf_tid = (int32*)&mp->procid;
-	param.tf_stack = stk;
-
-	oset = runtime·sigprocmask(SIG_SETMASK, sigset_all);
-	ret = runtime·tfork(&param, sizeof(param), mp, mp->g0, runtime·mstart);
-	runtime·sigprocmask(SIG_SETMASK, oset);
-
-	if(ret < 0) {
-		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount() - 1, -ret);
-		if (ret == -ENOTSUP)
-			runtime·printf("runtime: is kern.rthreads disabled?\n");
-		runtime·throw("runtime.newosproc");
-	}
-}
-
-void
-runtime·osinit(void)
-{
-	runtime·ncpu = getncpu();
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·get_random_data(byte **rnd, int32 *rnd_len)
-{
-	#pragma dataflag NOPTR
-	static byte urandom_data[HashRandomBytes];
-	int32 fd;
-	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
-	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
-		*rnd = urandom_data;
-		*rnd_len = HashRandomBytes;
-	} else {
-		*rnd = nil;
-		*rnd_len = 0;
-	}
-	runtime·close(fd);
-}
-
-void
-runtime·goenvs(void)
-{
-	runtime·goenvs_unix();
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime·mpreinit(M *mp)
-{
-	mp->gsignal = runtime·malg(32*1024);
-	runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
-
-	mp->gsignal->m = mp;
-	runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime·minit(void)
-{
-	// Initialize signal handling
-	runtime·signalstack((byte*)g->m->gsignal->stack.lo, 32*1024);
-	runtime·sigprocmask(SIG_SETMASK, sigset_none);
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime·unminit(void)
-{
-	runtime·signalstack(nil, 0);
-}
-
-uintptr
-runtime·memlimit(void)
-{
-	return 0;
-}
-
-extern void runtime·sigtramp(void);
-
-typedef struct sigaction {
-	union {
-		void    (*__sa_handler)(int32);
-		void    (*__sa_sigaction)(int32, Siginfo*, void *);
-	} __sigaction_u;		/* signal handler */
-	uint32	sa_mask;		/* signal mask to apply */
-	int32	sa_flags;		/* see signal options below */
-} SigactionT;
-
-void
-runtime·setsig(int32 i, GoSighandler *fn, bool restart)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
-	if(restart)
-		sa.sa_flags |= SA_RESTART;
-	sa.sa_mask = ~0U;
-	if(fn == runtime·sighandler)
-		fn = (void*)runtime·sigtramp;
-	sa.__sigaction_u.__sa_sigaction = (void*)fn;
-	runtime·sigaction(i, &sa, nil);
-}
-
-GoSighandler*
-runtime·getsig(int32 i)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	runtime·sigaction(i, nil, &sa);
-	if((void*)sa.__sigaction_u.__sa_sigaction == runtime·sigtramp)
-		return runtime·sighandler;
-	return (void*)sa.__sigaction_u.__sa_sigaction;
-}
-
-void
-runtime·signalstack(byte *p, int32 n)
-{
-	StackT st;
-
-	st.ss_sp = (void*)p;
-	st.ss_size = n;
-	st.ss_flags = 0;
-	if(p == nil)
-		st.ss_flags = SS_DISABLE;
-	runtime·sigaltstack(&st, nil);
-}
-
-void
-runtime·unblocksignals(void)
-{
-	runtime·sigprocmask(SIG_SETMASK, sigset_none);
-}
-
-#pragma textflag NOSPLIT
-int8*
-runtime·signame(int32 sig)
-{
-	return runtime·sigtab[sig].name;
-}
diff --git a/src/runtime/os_openbsd.go b/src/runtime/os_openbsd.go
index a000f96..9e5adcd 100644
--- a/src/runtime/os_openbsd.go
+++ b/src/runtime/os_openbsd.go
@@ -4,14 +4,30 @@
 
 package runtime
 
-import "unsafe"
+//go:noescape
+func setitimer(mode int32, new, old *itimerval)
 
-func setitimer(mode int32, new, old unsafe.Pointer)
-func sigaction(sig int32, new, old unsafe.Pointer)
-func sigaltstack(new, old unsafe.Pointer)
+//go:noescape
+func sigaction(sig int32, new, old *sigactiont)
+
+//go:noescape
+func sigaltstack(new, old *stackt)
+
+//go:noescape
 func sigprocmask(mode int32, new uint32) uint32
+
+//go:noescape
 func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
+
 func raise(sig int32)
-func tfork(param unsafe.Pointer, psize uintptr, mm, gg, fn unsafe.Pointer) int32
-func thrsleep(ident unsafe.Pointer, clock_id int32, tsp, lock, abort unsafe.Pointer) int32
-func thrwakeup(ident unsafe.Pointer, n int32) int32
+
+//go:noescape
+func tfork(param *tforkt, psize uintptr, mm *m, gg *g, fn uintptr) int32
+
+//go:noescape
+func thrsleep(ident uintptr, clock_id int32, tsp *timespec, lock uintptr, abort *int32) int32
+
+//go:noescape
+func thrwakeup(ident uintptr, n int32) int32
+
+func osyield()
diff --git a/src/runtime/os_openbsd.h b/src/runtime/os_openbsd.h
deleted file mode 100644
index 6ad9810..0000000
--- a/src/runtime/os_openbsd.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright 2010 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-
-typedef byte* kevent_udata;
-
-struct sigaction;
-
-void	runtime·sigpanic(void);
-
-void	runtime·setitimer(int32, Itimerval*, Itimerval*);
-void	runtime·sigaction(int32, struct sigaction*, struct sigaction*);
-void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
-Sigset	runtime·sigprocmask(int32, Sigset);
-void	runtime·unblocksignals(void);
-int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
-
-enum {
-	SS_DISABLE = 4,
-	SIG_BLOCK = 1,
-	SIG_UNBLOCK = 2,
-	SIG_SETMASK = 3,
-	NSIG = 33,
-	SI_USER = 0,
-};
diff --git a/src/runtime/os_solaris.c b/src/runtime/os_solaris.c
deleted file mode 100644
index bee91d8..0000000
--- a/src/runtime/os_solaris.c
+++ /dev/null
@@ -1,560 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-#include "stack.h"
-#include "textflag.h"
-
-#pragma dynexport runtime·end _end
-#pragma dynexport runtime·etext _etext
-#pragma dynexport runtime·edata _edata
-
-#pragma dynimport libc·___errno ___errno "libc.so"
-#pragma dynimport libc·clock_gettime clock_gettime "libc.so"
-#pragma dynimport libc·close close "libc.so"
-#pragma dynimport libc·exit exit "libc.so"
-#pragma dynimport libc·fstat fstat "libc.so"
-#pragma dynimport libc·getcontext getcontext "libc.so"
-#pragma dynimport libc·getrlimit getrlimit "libc.so"
-#pragma dynimport libc·malloc malloc "libc.so"
-#pragma dynimport libc·mmap mmap "libc.so"
-#pragma dynimport libc·munmap munmap "libc.so"
-#pragma dynimport libc·open open "libc.so"
-#pragma dynimport libc·pthread_attr_destroy pthread_attr_destroy "libc.so"
-#pragma dynimport libc·pthread_attr_getstack pthread_attr_getstack "libc.so"
-#pragma dynimport libc·pthread_attr_init pthread_attr_init "libc.so"
-#pragma dynimport libc·pthread_attr_setdetachstate pthread_attr_setdetachstate "libc.so"
-#pragma dynimport libc·pthread_attr_setstack pthread_attr_setstack "libc.so"
-#pragma dynimport libc·pthread_create pthread_create "libc.so"
-#pragma dynimport libc·raise raise "libc.so"
-#pragma dynimport libc·read read "libc.so"
-#pragma dynimport libc·select select "libc.so"
-#pragma dynimport libc·sched_yield sched_yield "libc.so"
-#pragma dynimport libc·sem_init sem_init "libc.so"
-#pragma dynimport libc·sem_post sem_post "libc.so"
-#pragma dynimport libc·sem_reltimedwait_np sem_reltimedwait_np "libc.so"
-#pragma dynimport libc·sem_wait sem_wait "libc.so"
-#pragma dynimport libc·setitimer setitimer "libc.so"
-#pragma dynimport libc·sigaction sigaction "libc.so"
-#pragma dynimport libc·sigaltstack sigaltstack "libc.so"
-#pragma dynimport libc·sigprocmask sigprocmask "libc.so"
-#pragma dynimport libc·sysconf sysconf "libc.so"
-#pragma dynimport libc·usleep usleep "libc.so"
-#pragma dynimport libc·write write "libc.so"
-
-extern uintptr libc·___errno;
-extern uintptr libc·clock_gettime;
-extern uintptr libc·close;
-extern uintptr libc·exit;
-extern uintptr libc·fstat;
-extern uintptr libc·getcontext;
-extern uintptr libc·getrlimit;
-extern uintptr libc·malloc;
-extern uintptr libc·mmap;
-extern uintptr libc·munmap;
-extern uintptr libc·open;
-extern uintptr libc·pthread_attr_destroy;
-extern uintptr libc·pthread_attr_getstack;
-extern uintptr libc·pthread_attr_init;
-extern uintptr libc·pthread_attr_setdetachstate;
-extern uintptr libc·pthread_attr_setstack;
-extern uintptr libc·pthread_create;
-extern uintptr libc·raise;
-extern uintptr libc·read;
-extern uintptr libc·sched_yield;
-extern uintptr libc·select;
-extern uintptr libc·sem_init;
-extern uintptr libc·sem_post;
-extern uintptr libc·sem_reltimedwait_np;
-extern uintptr libc·sem_wait;
-extern uintptr libc·setitimer;
-extern uintptr libc·sigaction;
-extern uintptr libc·sigaltstack;
-extern uintptr libc·sigprocmask;
-extern uintptr libc·sysconf;
-extern uintptr libc·usleep;
-extern uintptr libc·write;
-
-void	runtime·getcontext(Ucontext *context);
-int32	runtime·pthread_attr_destroy(PthreadAttr* attr);
-int32	runtime·pthread_attr_init(PthreadAttr* attr);
-int32	runtime·pthread_attr_getstack(PthreadAttr* attr, void** addr, uint64* size);
-int32	runtime·pthread_attr_setdetachstate(PthreadAttr* attr, int32 state);
-int32	runtime·pthread_attr_setstack(PthreadAttr* attr, void* addr, uint64 size);
-int32	runtime·pthread_create(Pthread* thread, PthreadAttr* attr, void(*fn)(void), void *arg);
-uint32	runtime·tstart_sysvicall(M *newm);
-int32	runtime·sem_init(SemT* sem, int32 pshared, uint32 value);
-int32	runtime·sem_post(SemT* sem);
-int32	runtime·sem_reltimedwait_np(SemT* sem, Timespec* timeout);
-int32	runtime·sem_wait(SemT* sem);
-int64	runtime·sysconf(int32 name);
-
-extern SigTab runtime·sigtab[];
-static Sigset sigset_none;
-static Sigset sigset_all = { ~(uint32)0, ~(uint32)0, ~(uint32)0, ~(uint32)0, };
-
-static int32
-getncpu(void) 
-{
-	int32 n;
-	
-	n = (int32)runtime·sysconf(_SC_NPROCESSORS_ONLN);
-	if(n < 1)
-		return 1;
-	return n;
-}
-
-void
-runtime·osinit(void)
-{
-	runtime·ncpu = getncpu(); 
-}
-
-void
-runtime·newosproc(M *mp, void *stk)
-{
-	PthreadAttr attr;
-	Sigset oset;
-	Pthread tid;
-	int32 ret;
-	uint64 size;
-
-	USED(stk);
-	if(runtime·pthread_attr_init(&attr) != 0)
-		runtime·throw("pthread_attr_init");
-	if(runtime·pthread_attr_setstack(&attr, 0, 0x200000) != 0)
-		runtime·throw("pthread_attr_setstack");
-	size = 0;
-	if(runtime·pthread_attr_getstack(&attr, (void**)&mp->g0->stack.hi, &size) != 0)
-		runtime·throw("pthread_attr_getstack");	
-	mp->g0->stack.lo = mp->g0->stack.hi - size;
-	if(runtime·pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
-		runtime·throw("pthread_attr_setdetachstate");
-
-	// Disable signals during create, so that the new thread starts
-	// with signals disabled.  It will enable them in minit.
-	runtime·sigprocmask(SIG_SETMASK, &sigset_all, &oset);
-	ret = runtime·pthread_create(&tid, &attr, (void (*)(void))runtime·tstart_sysvicall, mp);
-	runtime·sigprocmask(SIG_SETMASK, &oset, nil);
-	if(ret != 0) {
-		runtime·printf("runtime: failed to create new OS thread (have %d already; errno=%d)\n", runtime·mcount(), ret);
-		runtime·throw("runtime.newosproc");
-	}
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·get_random_data(byte **rnd, int32 *rnd_len)
-{
-	#pragma dataflag NOPTR
-	static byte urandom_data[HashRandomBytes];
-	int32 fd;
-	fd = runtime·open("/dev/urandom", 0 /* O_RDONLY */, 0);
-	if(runtime·read(fd, urandom_data, HashRandomBytes) == HashRandomBytes) {
-		*rnd = urandom_data;
-		*rnd_len = HashRandomBytes;
-	} else {
-		*rnd = nil;
-		*rnd_len = 0;
-	}
-	runtime·close(fd);
-}
-
-void
-runtime·goenvs(void)
-{
-	runtime·goenvs_unix();
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the parent thread (main thread in case of bootstrap), can allocate memory.
-void
-runtime·mpreinit(M *mp)
-{
-	mp->gsignal = runtime·malg(32*1024);
-	runtime·writebarrierptr_nostore(&mp->gsignal, mp->gsignal);
-
-	mp->gsignal->m = mp;
-	runtime·writebarrierptr_nostore(&mp->gsignal->m, mp->gsignal->m);
-}
-
-// Called to initialize a new m (including the bootstrap m).
-// Called on the new thread, can not allocate memory.
-void
-runtime·minit(void)
-{
-	runtime·asmcgocall(runtime·miniterrno, (void *)libc·___errno);
-	// Initialize signal handling
-	runtime·signalstack((byte*)g->m->gsignal->stack.lo, 32*1024);
-	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
-}
-
-// Called from dropm to undo the effect of an minit.
-void
-runtime·unminit(void)
-{
-	runtime·signalstack(nil, 0);
-}
-
-uintptr
-runtime·memlimit(void)
-{
-	Rlimit rl;
-	extern byte runtime·text[], runtime·end[];
-	uintptr used;
-	
-	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
-		return 0;
-	if(rl.rlim_cur >= 0x7fffffff)
-		return 0;
-
-	// Estimate our VM footprint excluding the heap.
-	// Not an exact science: use size of binary plus
-	// some room for thread stacks.
-	used = runtime·end - runtime·text + (64<<20);
-	if(used >= rl.rlim_cur)
-		return 0;
-
-	// If there's not at least 16 MB left, we're probably
-	// not going to be able to do much.  Treat as no limit.
-	rl.rlim_cur -= used;
-	if(rl.rlim_cur < (16<<20))
-		return 0;
-
-	return rl.rlim_cur - used;
-}
-
-void
-runtime·setprof(bool on)
-{
-	USED(on);
-}
-
-extern void runtime·sigtramp(void);
-
-void
-runtime·setsig(int32 i, GoSighandler *fn, bool restart)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	sa.sa_flags = SA_SIGINFO|SA_ONSTACK;
-	if(restart)
-		sa.sa_flags |= SA_RESTART;
-	sa.sa_mask.__sigbits[0] = ~(uint32)0;
-	sa.sa_mask.__sigbits[1] = ~(uint32)0;
-	sa.sa_mask.__sigbits[2] = ~(uint32)0;
-	sa.sa_mask.__sigbits[3] = ~(uint32)0;
-	if(fn == runtime·sighandler)
-		fn = (void*)runtime·sigtramp;
-	*((void**)&sa._funcptr[0]) = (void*)fn;
-	runtime·sigaction(i, &sa, nil);
-}
-
-GoSighandler*
-runtime·getsig(int32 i)
-{
-	SigactionT sa;
-
-	runtime·memclr((byte*)&sa, sizeof sa);
-	runtime·sigaction(i, nil, &sa);
-	if(*((void**)&sa._funcptr[0]) == runtime·sigtramp)
-		return runtime·sighandler;
-	return *((void**)&sa._funcptr[0]);
-}
-
-void
-runtime·signalstack(byte *p, int32 n)
-{
-	StackT st;
-
-	st.ss_sp = (void*)p;
-	st.ss_size = n;
-	st.ss_flags = 0;
-	if(p == nil)
-		st.ss_flags = SS_DISABLE;
-	runtime·sigaltstack(&st, nil);
-}
-
-void
-runtime·unblocksignals(void)
-{
-	runtime·sigprocmask(SIG_SETMASK, &sigset_none, nil);
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·semacreate(void)
-{
-	SemT* sem;
-
-	// Call libc's malloc rather than runtime·malloc.  This will
-	// allocate space on the C heap.  We can't call runtime·malloc
-	// here because it could cause a deadlock.
-	g->m->libcall.fn = (uintptr)(void*)libc·malloc;
-	g->m->libcall.n = 1;
-	runtime·memclr((byte*)&g->m->scratch, sizeof(g->m->scratch));
-	g->m->scratch.v[0] = (uintptr)sizeof(*sem);
-	g->m->libcall.args = (uintptr)(uintptr*)&g->m->scratch;
-	runtime·asmcgocall(runtime·asmsysvicall6, &g->m->libcall);
-	sem = (void*)g->m->libcall.r1;
-	if(runtime·sem_init(sem, 0, 0) != 0)
-		runtime·throw("sem_init");
-	return (uintptr)sem;
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·semasleep(int64 ns)
-{
-	M *m;
-
-	m = g->m;
-	if(ns >= 0) {
-		m->ts.tv_sec = ns / 1000000000LL;
-		m->ts.tv_nsec = ns % 1000000000LL;
-
-		m->libcall.fn = (uintptr)(void*)libc·sem_reltimedwait_np;
-		m->libcall.n = 2;
-		runtime·memclr((byte*)&m->scratch, sizeof(m->scratch));
-		m->scratch.v[0] = m->waitsema;
-		m->scratch.v[1] = (uintptr)&m->ts;
-		m->libcall.args = (uintptr)(uintptr*)&m->scratch;
-		runtime·asmcgocall(runtime·asmsysvicall6, &m->libcall);
-		if(*m->perrno != 0) {
-			if(*m->perrno == ETIMEDOUT || *m->perrno == EAGAIN || *m->perrno == EINTR)
-				return -1;
-			runtime·throw("sem_reltimedwait_np");
-		}
-		return 0;
-	}
-	for(;;) {
-		m->libcall.fn = (uintptr)(void*)libc·sem_wait;
-		m->libcall.n = 1;
-		runtime·memclr((byte*)&m->scratch, sizeof(m->scratch));
-		m->scratch.v[0] = m->waitsema;
-		m->libcall.args = (uintptr)(uintptr*)&m->scratch;
-		runtime·asmcgocall(runtime·asmsysvicall6, &m->libcall);
-		if(m->libcall.r1 == 0)
-			break;
-		if(*m->perrno == EINTR) 
-			continue;
-		runtime·throw("sem_wait");
-	}
-	return 0;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·semawakeup(M *mp)
-{
-	SemT* sem = (SemT*)mp->waitsema;
-	if(runtime·sem_post(sem) != 0)
-		runtime·throw("sem_post");
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·close(int32 fd)
-{
-	return runtime·sysvicall1(libc·close, (uintptr)fd);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·exit(int32 r)
-{
-	runtime·sysvicall1(libc·exit, (uintptr)r);
-}
-
-#pragma textflag NOSPLIT
-/* int32 */ void
-runtime·getcontext(Ucontext* context)
-{
-	runtime·sysvicall1(libc·getcontext, (uintptr)context);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·getrlimit(int32 res, Rlimit* rlp)
-{
-	return runtime·sysvicall2(libc·getrlimit, (uintptr)res, (uintptr)rlp);
-}
-
-#pragma textflag NOSPLIT
-uint8*
-runtime·mmap(byte* addr, uintptr len, int32 prot, int32 flags, int32 fildes, uint32 off)
-{
-	return (uint8*)runtime·sysvicall6(libc·mmap, (uintptr)addr, (uintptr)len, (uintptr)prot, (uintptr)flags, (uintptr)fildes, (uintptr)off);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·munmap(byte* addr, uintptr len)
-{
-	runtime·sysvicall2(libc·munmap, (uintptr)addr, (uintptr)len);
-}
-
-extern int64 runtime·nanotime1(void);
-#pragma textflag NOSPLIT
-int64
-runtime·nanotime(void)
-{
-	return runtime·sysvicall0((uintptr)runtime·nanotime1);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·open(int8* path, int32 oflag, int32 mode)
-{
-	return runtime·sysvicall3(libc·open, (uintptr)path, (uintptr)oflag, (uintptr)mode);
-}
-
-int32
-runtime·pthread_attr_destroy(PthreadAttr* attr)
-{
-	return runtime·sysvicall1(libc·pthread_attr_destroy, (uintptr)attr);
-}
-
-int32
-runtime·pthread_attr_getstack(PthreadAttr* attr, void** addr, uint64* size)
-{
-	return runtime·sysvicall3(libc·pthread_attr_getstack, (uintptr)attr, (uintptr)addr, (uintptr)size);
-}
-
-int32
-runtime·pthread_attr_init(PthreadAttr* attr)
-{
-	return runtime·sysvicall1(libc·pthread_attr_init, (uintptr)attr);
-}
-
-int32
-runtime·pthread_attr_setdetachstate(PthreadAttr* attr, int32 state)
-{
-	return runtime·sysvicall2(libc·pthread_attr_setdetachstate, (uintptr)attr, (uintptr)state);
-}
-
-int32
-runtime·pthread_attr_setstack(PthreadAttr* attr, void* addr, uint64 size)
-{
-	return runtime·sysvicall3(libc·pthread_attr_setstack, (uintptr)attr, (uintptr)addr, (uintptr)size);
-}
-
-int32
-runtime·pthread_create(Pthread* thread, PthreadAttr* attr, void(*fn)(void), void *arg)
-{
-	return runtime·sysvicall4(libc·pthread_create, (uintptr)thread, (uintptr)attr, (uintptr)fn, (uintptr)arg);
-}
-
-/* int32 */ void
-runtime·raise(int32 sig)
-{
-	runtime·sysvicall1(libc·raise, (uintptr)sig);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·read(int32 fd, void* buf, int32 nbyte)
-{
-	return runtime·sysvicall3(libc·read, (uintptr)fd, (uintptr)buf, (uintptr)nbyte);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·sem_init(SemT* sem, int32 pshared, uint32 value)
-{
-	return runtime·sysvicall3(libc·sem_init, (uintptr)sem, (uintptr)pshared, (uintptr)value);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·sem_post(SemT* sem)
-{
-	return runtime·sysvicall1(libc·sem_post, (uintptr)sem);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·sem_reltimedwait_np(SemT* sem, Timespec* timeout)
-{
-	return runtime·sysvicall2(libc·sem_reltimedwait_np, (uintptr)sem, (uintptr)timeout);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·sem_wait(SemT* sem)
-{
-	return runtime·sysvicall1(libc·sem_wait, (uintptr)sem);
-}
-
-/* int32 */ void
-runtime·setitimer(int32 which, Itimerval* value, Itimerval* ovalue)
-{
-	runtime·sysvicall3(libc·setitimer, (uintptr)which, (uintptr)value, (uintptr)ovalue);
-}
-
-/* int32 */ void
-runtime·sigaction(int32 sig, struct SigactionT* act, struct SigactionT* oact)
-{
-	runtime·sysvicall3(libc·sigaction, (uintptr)sig, (uintptr)act, (uintptr)oact);
-}
-
-/* int32 */ void
-runtime·sigaltstack(SigaltstackT* ss, SigaltstackT* oss)
-{
-	runtime·sysvicall2(libc·sigaltstack, (uintptr)ss, (uintptr)oss);
-}
-
-/* int32 */ void
-runtime·sigprocmask(int32 how, Sigset* set, Sigset* oset)
-{
-	runtime·sysvicall3(libc·sigprocmask, (uintptr)how, (uintptr)set, (uintptr)oset);
-}
-
-int64
-runtime·sysconf(int32 name)
-{
-	return runtime·sysvicall1(libc·sysconf, (uintptr)name);
-}
-
-extern void runtime·usleep1(uint32);
-
-#pragma textflag NOSPLIT
-void
-runtime·usleep(uint32 µs)
-{
-	runtime·usleep1(µs);
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·write(uintptr fd, void* buf, int32 nbyte)
-{
-	return runtime·sysvicall3(libc·write, (uintptr)fd, (uintptr)buf, (uintptr)nbyte);
-}
-
-extern void runtime·osyield1(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·osyield(void)
-{
-	// Check the validity of m because we might be called in cgo callback
-	// path early enough where there isn't a m available yet.
-	if(g && g->m != nil) {
-		runtime·sysvicall0(libc·sched_yield);
-		return;
-	}
-	runtime·osyield1();
-}
-
-#pragma textflag NOSPLIT
-int8*
-runtime·signame(int32 sig)
-{
-	return runtime·sigtab[sig].name;
-}
diff --git a/src/runtime/os_solaris.go b/src/runtime/os_solaris.go
index ca13151..6864ef9 100644
--- a/src/runtime/os_solaris.go
+++ b/src/runtime/os_solaris.go
@@ -6,53 +6,35 @@
 
 import "unsafe"
 
-func setitimer(mode int32, new, old unsafe.Pointer)
-func sigaction(sig int32, new, old unsafe.Pointer)
-func sigaltstack(new, old unsafe.Pointer)
-func sigprocmask(mode int32, new, old unsafe.Pointer)
-func sysctl(mib *uint32, miblen uint32, out *byte, size *uintptr, dst *byte, ndst uintptr) int32
-func getrlimit(kind int32, limit unsafe.Pointer)
-func miniterrno(fn unsafe.Pointer)
-func raise(sig int32)
-func getcontext(ctxt unsafe.Pointer)
-func tstart_sysvicall(mm unsafe.Pointer) uint32
-func nanotime1() int64
-func usleep1(usec uint32)
-func osyield1()
-func netpollinit()
-func netpollopen(fd uintptr, pd *pollDesc) int32
-func netpollclose(fd uintptr) int32
-func netpollarm(pd *pollDesc, mode int)
-
-type libcFunc byte
+type libcFunc uintptr
 
 var asmsysvicall6 libcFunc
 
 //go:nosplit
-func sysvicall0(fn *libcFunc) uintptr {
+func sysvicall0(fn libcFunc) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 0
-	// TODO(rsc): Why is noescape necessary here and below?
-	libcall.args = uintptr(noescape(unsafe.Pointer(&fn))) // it's unused but must be non-nil, otherwise crashes
+	libcall.args = uintptr(fn) // it's unused but must be non-nil, otherwise crashes
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
 	return libcall.r1
 }
 
 //go:nosplit
-func sysvicall1(fn *libcFunc, a1 uintptr) uintptr {
+func sysvicall1(fn libcFunc, a1 uintptr) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 1
+	// TODO(rsc): Why is noescape necessary here and below?
 	libcall.args = uintptr(noescape(unsafe.Pointer(&a1)))
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
 	return libcall.r1
 }
 
 //go:nosplit
-func sysvicall2(fn *libcFunc, a1, a2 uintptr) uintptr {
+func sysvicall2(fn libcFunc, a1, a2 uintptr) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 2
 	libcall.args = uintptr(noescape(unsafe.Pointer(&a1)))
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
@@ -60,9 +42,9 @@
 }
 
 //go:nosplit
-func sysvicall3(fn *libcFunc, a1, a2, a3 uintptr) uintptr {
+func sysvicall3(fn libcFunc, a1, a2, a3 uintptr) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 3
 	libcall.args = uintptr(noescape(unsafe.Pointer(&a1)))
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
@@ -70,9 +52,9 @@
 }
 
 //go:nosplit
-func sysvicall4(fn *libcFunc, a1, a2, a3, a4 uintptr) uintptr {
+func sysvicall4(fn libcFunc, a1, a2, a3, a4 uintptr) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 4
 	libcall.args = uintptr(noescape(unsafe.Pointer(&a1)))
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
@@ -80,9 +62,9 @@
 }
 
 //go:nosplit
-func sysvicall5(fn *libcFunc, a1, a2, a3, a4, a5 uintptr) uintptr {
+func sysvicall5(fn libcFunc, a1, a2, a3, a4, a5 uintptr) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 5
 	libcall.args = uintptr(noescape(unsafe.Pointer(&a1)))
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
@@ -90,9 +72,9 @@
 }
 
 //go:nosplit
-func sysvicall6(fn *libcFunc, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
+func sysvicall6(fn libcFunc, a1, a2, a3, a4, a5, a6 uintptr) uintptr {
 	libcall := &getg().m.libcall
-	libcall.fn = uintptr(unsafe.Pointer(fn))
+	libcall.fn = uintptr(fn)
 	libcall.n = 6
 	libcall.args = uintptr(noescape(unsafe.Pointer(&a1)))
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(libcall))
diff --git a/src/runtime/os_solaris.h b/src/runtime/os_solaris.h
deleted file mode 100644
index 3d9e1a2..0000000
--- a/src/runtime/os_solaris.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-
-typedef uintptr kevent_udata;
-
-struct sigaction;
-
-void	runtime·sigpanic(void);
-
-void	runtime·setitimer(int32, Itimerval*, Itimerval*);
-void	runtime·sigaction(int32, struct SigactionT*, struct SigactionT*);
-void	runtime·sigaltstack(SigaltstackT*, SigaltstackT*);
-void	runtime·sigprocmask(int32, Sigset*, Sigset*);
-void	runtime·unblocksignals(void);
-int32	runtime·sysctl(uint32*, uint32, byte*, uintptr*, byte*, uintptr);
-
-
-void	runtime·raisesigpipe(void);
-void	runtime·setsig(int32, void(*)(int32, Siginfo*, void*, G*), bool);
-void	runtime·sighandler(int32 sig, Siginfo *info, void *context, G *gp);
-void	runtime·sigpanic(void);
-
-enum {
-	SS_DISABLE = 2,
-	SIG_BLOCK = 1,
-	SIG_UNBLOCK = 2,
-	SIG_SETMASK = 3,
-	NSIG = 73, /* number of signals in runtime·SigTab array */
-	SI_USER = 0,
-	_UC_SIGMASK = 0x01,
-	_UC_CPU = 0x04,
-	RLIMIT_AS = 10,
-};
-
-typedef struct Rlimit Rlimit;
-struct Rlimit {
-	int64   rlim_cur;
-	int64   rlim_max;
-};
-int32   runtime·getrlimit(int32, Rlimit*);
-
-// Call an external library function described by {fn, a0, ..., an}, with
-// SysV conventions, switching to os stack during the call, if necessary.
-uintptr	runtime·sysvicall0(uintptr fn);
-uintptr	runtime·sysvicall1(uintptr fn, uintptr a1);
-uintptr	runtime·sysvicall2(uintptr fn, uintptr a1, uintptr a2);
-uintptr	runtime·sysvicall3(uintptr fn, uintptr a1, uintptr a2, uintptr a3);
-uintptr	runtime·sysvicall4(uintptr fn, uintptr a1, uintptr a2, uintptr a3, uintptr a4);
-uintptr	runtime·sysvicall5(uintptr fn, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5);
-uintptr	runtime·sysvicall6(uintptr fn, uintptr a1, uintptr a2, uintptr a3, uintptr a4, uintptr a5, uintptr a6);
-void	runtime·asmsysvicall6(void *c);
-
-void	runtime·miniterrno(void *fn);
diff --git a/src/runtime/os_windows.go b/src/runtime/os_windows.go
index 1528d2f..fcd8f44 100644
--- a/src/runtime/os_windows.go
+++ b/src/runtime/os_windows.go
@@ -21,10 +21,6 @@
 func getlasterror() uint32
 func setlasterror(err uint32)
 func usleep1(usec uint32)
-func netpollinit()
-func netpollopen(fd uintptr, pd *pollDesc) int32
-func netpollclose(fd uintptr) int32
-func netpollarm(pd *pollDesc, mode int)
 
 func os_sigpipe() {
 	gothrow("too many writes on closed pipe")
diff --git a/src/runtime/panic.c b/src/runtime/panic.c
deleted file mode 100644
index b19fdd0..0000000
--- a/src/runtime/panic.c
+++ /dev/null
@@ -1,200 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "stack.h"
-#include "malloc.h"
-#include "textflag.h"
-
-// Code related to defer, panic and recover.
-
-// TODO: remove once code is moved to Go
-extern Defer* runtime·newdefer(int32 siz);
-extern runtime·freedefer(Defer *d);
-
-uint32 runtime·panicking;
-static Mutex paniclk;
-
-void
-runtime·deferproc_m(void)
-{
-	int32 siz;
-	FuncVal *fn;
-	uintptr argp;
-	uintptr callerpc;
-	Defer *d;
-
-	siz = g->m->scalararg[0];
-	fn = g->m->ptrarg[0];
-	argp = g->m->scalararg[1];
-	callerpc = g->m->scalararg[2];
-	g->m->ptrarg[0] = nil;
-	g->m->scalararg[1] = 0;
-
-	d = runtime·newdefer(siz);
-	if(d->panic != nil)
-		runtime·throw("deferproc: d->panic != nil after newdefer");
-	d->fn = fn;
-	d->pc = callerpc;
-	d->argp = argp;
-	runtime·memmove(d+1, (void*)argp, siz);
-}
-
-// Unwind the stack after a deferred function calls recover
-// after a panic.  Then arrange to continue running as though
-// the caller of the deferred function returned normally.
-void
-runtime·recovery_m(G *gp)
-{
-	void *argp;
-	uintptr pc;
-	
-	// Info about defer passed in G struct.
-	argp = (void*)gp->sigcode0;
-	pc = (uintptr)gp->sigcode1;
-
-	// d's arguments need to be in the stack.
-	if(argp != nil && ((uintptr)argp < gp->stack.lo || gp->stack.hi < (uintptr)argp)) {
-		runtime·printf("recover: %p not in [%p, %p]\n", argp, gp->stack.lo, gp->stack.hi);
-		runtime·throw("bad recovery");
-	}
-
-	// Make the deferproc for this d return again,
-	// this time returning 1.  The calling function will
-	// jump to the standard return epilogue.
-	// The -2*sizeof(uintptr) makes up for the
-	// two extra words that are on the stack at
-	// each call to deferproc.
-	// (The pc we're returning to does pop pop
-	// before it tests the return value.)
-	// On the arm and power there are 2 saved LRs mixed in too.
-	if(thechar == '5' || thechar == '9')
-		gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr);
-	else
-		gp->sched.sp = (uintptr)argp - 2*sizeof(uintptr);
-	gp->sched.pc = pc;
-	gp->sched.lr = 0;
-	gp->sched.ret = 1;
-	runtime·gogo(&gp->sched);
-}
-
-void
-runtime·startpanic_m(void)
-{
-	if(runtime·mheap.cachealloc.size == 0) { // very early
-		runtime·printf("runtime: panic before malloc heap initialized\n");
-		g->m->mallocing = 1; // tell rest of panic not to try to malloc
-	} else if(g->m->mcache == nil) // can happen if called from signal handler or throw
-		g->m->mcache = runtime·allocmcache();
-	switch(g->m->dying) {
-	case 0:
-		g->m->dying = 1;
-		if(g != nil) {
-			g->writebuf.array = nil;
-			g->writebuf.len = 0;
-			g->writebuf.cap = 0;
-		}
-		runtime·xadd(&runtime·panicking, 1);
-		runtime·lock(&paniclk);
-		if(runtime·debug.schedtrace > 0 || runtime·debug.scheddetail > 0)
-			runtime·schedtrace(true);
-		runtime·freezetheworld();
-		return;
-	case 1:
-		// Something failed while panicing, probably the print of the
-		// argument to panic().  Just print a stack trace and exit.
-		g->m->dying = 2;
-		runtime·printf("panic during panic\n");
-		runtime·dopanic(0);
-		runtime·exit(3);
-	case 2:
-		// This is a genuine bug in the runtime, we couldn't even
-		// print the stack trace successfully.
-		g->m->dying = 3;
-		runtime·printf("stack trace unavailable\n");
-		runtime·exit(4);
-	default:
-		// Can't even print!  Just exit.
-		runtime·exit(5);
-	}
-}
-
-void
-runtime·dopanic_m(void)
-{
-	G *gp;
-	uintptr sp, pc;
-	static bool didothers;
-	bool crash;
-	int32 t;
-
-	gp = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	pc = g->m->scalararg[0];
-	sp = g->m->scalararg[1];
-	g->m->scalararg[1] = 0;
-	if(gp->sig != 0)
-		runtime·printf("[signal %x code=%p addr=%p pc=%p]\n",
-			gp->sig, gp->sigcode0, gp->sigcode1, gp->sigpc);
-
-	if((t = runtime·gotraceback(&crash)) > 0){
-		if(gp != gp->m->g0) {
-			runtime·printf("\n");
-			runtime·goroutineheader(gp);
-			runtime·traceback(pc, sp, 0, gp);
-		} else if(t >= 2 || g->m->throwing > 0) {
-			runtime·printf("\nruntime stack:\n");
-			runtime·traceback(pc, sp, 0, gp);
-		}
-		if(!didothers) {
-			didothers = true;
-			runtime·tracebackothers(gp);
-		}
-	}
-	runtime·unlock(&paniclk);
-	if(runtime·xadd(&runtime·panicking, -1) != 0) {
-		// Some other m is panicking too.
-		// Let it print what it needs to print.
-		// Wait forever without chewing up cpu.
-		// It will exit when it's done.
-		static Mutex deadlock;
-		runtime·lock(&deadlock);
-		runtime·lock(&deadlock);
-	}
-	
-	if(crash)
-		runtime·crash();
-
-	runtime·exit(2);
-}
-
-#pragma textflag NOSPLIT
-bool
-runtime·canpanic(G *gp)
-{
-	M *m;
-	uint32 status;
-
-	// Note that g is m->gsignal, different from gp.
-	// Note also that g->m can change at preemption, so m can go stale
-	// if this function ever makes a function call.
-	m = g->m;
-
-	// Is it okay for gp to panic instead of crashing the program?
-	// Yes, as long as it is running Go code, not runtime code,
-	// and not stuck in a system call.
-	if(gp == nil || gp != m->curg)
-		return false;
-	if(m->locks-m->softfloat != 0 || m->mallocing != 0 || m->throwing != 0 || m->gcing != 0 || m->dying != 0)
-		return false;
-	status = runtime·readgstatus(gp);
-	if((status&~Gscan) != Grunning || gp->syscallsp != 0)
-		return false;
-#ifdef GOOS_windows
-	if(m->libcallsp != 0)
-		return false;
-#endif
-	return true;
-}
diff --git a/src/runtime/panic.go b/src/runtime/panic.go
index 91b5da2..8929467 100644
--- a/src/runtime/panic.go
+++ b/src/runtime/panic.go
@@ -54,6 +54,11 @@
 // The compiler turns a defer statement into a call to this.
 //go:nosplit
 func deferproc(siz int32, fn *funcval) { // arguments of fn follow fn
+	if getg().m.curg != getg() {
+		// go code on the system stack can't defer
+		gothrow("defer on system stack")
+	}
+
 	// the arguments of fn are in a perilous state.  The stack map
 	// for deferproc does not describe them.  So we can't let garbage
 	// collection or stack copying trigger until we've copied them out
@@ -64,20 +69,18 @@
 	if GOARCH == "arm" || GOARCH == "power64" || GOARCH == "power64le" {
 		argp += ptrSize // skip caller's saved link register
 	}
-	mp := acquirem()
-	mp.scalararg[0] = uintptr(siz)
-	mp.ptrarg[0] = unsafe.Pointer(fn)
-	mp.scalararg[1] = argp
-	mp.scalararg[2] = getcallerpc(unsafe.Pointer(&siz))
+	callerpc := getcallerpc(unsafe.Pointer(&siz))
 
-	if mp.curg != getg() {
-		// go code on the m stack can't defer
-		gothrow("defer on m")
-	}
-
-	onM(deferproc_m)
-
-	releasem(mp)
+	systemstack(func() {
+		d := newdefer(siz)
+		if d._panic != nil {
+			gothrow("deferproc: d.panic != nil after newdefer")
+		}
+		d.fn = fn
+		d.pc = callerpc
+		d.argp = argp
+		memmove(add(unsafe.Pointer(d), unsafe.Sizeof(*d)), unsafe.Pointer(argp), uintptr(siz))
+	})
 
 	// deferproc returns 0 normally.
 	// a deferred func that stops a panic
@@ -298,8 +301,6 @@
 	goexit()
 }
 
-func canpanic(*g) bool
-
 // Print all currently active panics.  Used when crashing.
 func printpanics(p *_panic) {
 	if p.link != nil {
@@ -318,7 +319,10 @@
 func gopanic(e interface{}) {
 	gp := getg()
 	if gp.m.curg != gp {
-		gothrow("panic on m stack")
+		print("panic: ")
+		printany(e)
+		print("\n")
+		gothrow("panic on system stack")
 	}
 
 	// m.softfloat is set during software floating point.
@@ -414,7 +418,7 @@
 			// Pass information about recovering frame to recovery.
 			gp.sigcode0 = uintptr(argp)
 			gp.sigcode1 = pc
-			mcall(recovery_m)
+			mcall(recovery)
 			gothrow("recovery failed") // mcall should not return
 		}
 	}
@@ -466,17 +470,17 @@
 
 //go:nosplit
 func startpanic() {
-	onM_signalok(startpanic_m)
+	systemstack(startpanic_m)
 }
 
 //go:nosplit
 func dopanic(unused int) {
+	pc := getcallerpc(unsafe.Pointer(&unused))
+	sp := getcallersp(unsafe.Pointer(&unused))
 	gp := getg()
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(gp)
-	mp.scalararg[0] = getcallerpc((unsafe.Pointer)(&unused))
-	mp.scalararg[1] = getcallersp((unsafe.Pointer)(&unused))
-	onM_signalok(dopanic_m) // should never return
+	systemstack(func() {
+		dopanic_m(gp, pc, sp) // should never return
+	})
 	*(*int)(nil) = 0
 }
 
diff --git a/src/runtime/panic1.go b/src/runtime/panic1.go
new file mode 100644
index 0000000..17379f9
--- /dev/null
+++ b/src/runtime/panic1.go
@@ -0,0 +1,161 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Code related to defer, panic and recover.
+// TODO: Merge into panic.go.
+
+//uint32 runtime·panicking;
+var paniclk mutex
+
+const hasLinkRegister = GOARCH == "arm" || GOARCH == "power64" || GOARCH == "power64le"
+
+// Unwind the stack after a deferred function calls recover
+// after a panic.  Then arrange to continue running as though
+// the caller of the deferred function returned normally.
+func recovery(gp *g) {
+	// Info about defer passed in G struct.
+	argp := (unsafe.Pointer)(gp.sigcode0)
+	pc := uintptr(gp.sigcode1)
+
+	// d's arguments need to be in the stack.
+	if argp != nil && (uintptr(argp) < gp.stack.lo || gp.stack.hi < uintptr(argp)) {
+		print("recover: ", argp, " not in [", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n")
+		gothrow("bad recovery")
+	}
+
+	// Make the deferproc for this d return again,
+	// this time returning 1.  The calling function will
+	// jump to the standard return epilogue.
+	// The -2*sizeof(uintptr) makes up for the
+	// two extra words that are on the stack at
+	// each call to deferproc.
+	// (The pc we're returning to does pop pop
+	// before it tests the return value.)
+	// On the arm and power there are 2 saved LRs mixed in too.
+	if hasLinkRegister {
+		gp.sched.sp = uintptr(argp) - 4*ptrSize
+	} else {
+		gp.sched.sp = uintptr(argp) - 2*ptrSize
+	}
+	gp.sched.pc = pc
+	gp.sched.lr = 0
+	gp.sched.ret = 1
+	gogo(&gp.sched)
+}
+
+func startpanic_m() {
+	_g_ := getg()
+	if mheap_.cachealloc.size == 0 { // very early
+		print("runtime: panic before malloc heap initialized\n")
+		_g_.m.mallocing = 1 // tell rest of panic not to try to malloc
+	} else if _g_.m.mcache == nil { // can happen if called from signal handler or throw
+		_g_.m.mcache = allocmcache()
+	}
+
+	switch _g_.m.dying {
+	case 0:
+		_g_.m.dying = 1
+		if _g_ != nil {
+			_g_.writebuf = nil
+		}
+		xadd(&panicking, 1)
+		lock(&paniclk)
+		if debug.schedtrace > 0 || debug.scheddetail > 0 {
+			schedtrace(true)
+		}
+		freezetheworld()
+		return
+	case 1:
+		// Something failed while panicing, probably the print of the
+		// argument to panic().  Just print a stack trace and exit.
+		_g_.m.dying = 2
+		print("panic during panic\n")
+		dopanic(0)
+		exit(3)
+		fallthrough
+	case 2:
+		// This is a genuine bug in the runtime, we couldn't even
+		// print the stack trace successfully.
+		_g_.m.dying = 3
+		print("stack trace unavailable\n")
+		exit(4)
+		fallthrough
+	default:
+		// Can't even print!  Just exit.
+		exit(5)
+	}
+}
+
+var didothers bool
+var deadlock mutex
+
+func dopanic_m(gp *g, pc, sp uintptr) {
+	if gp.sig != 0 {
+		print("[signal ", hex(gp.sig), " code=", hex(gp.sigcode0), " addr=", hex(gp.sigcode1), " pc=", hex(gp.sigpc), "]\n")
+	}
+
+	var docrash bool
+	_g_ := getg()
+	if t := gotraceback(&docrash); t > 0 {
+		if gp != gp.m.g0 {
+			print("\n")
+			goroutineheader(gp)
+			traceback(pc, sp, 0, gp)
+		} else if t >= 2 || _g_.m.throwing > 0 {
+			print("\nruntime stack:\n")
+			traceback(pc, sp, 0, gp)
+		}
+		if !didothers {
+			didothers = true
+			tracebackothers(gp)
+		}
+	}
+	unlock(&paniclk)
+
+	if xadd(&panicking, -1) != 0 {
+		// Some other m is panicking too.
+		// Let it print what it needs to print.
+		// Wait forever without chewing up cpu.
+		// It will exit when it's done.
+		lock(&deadlock)
+		lock(&deadlock)
+	}
+
+	if docrash {
+		crash()
+	}
+
+	exit(2)
+}
+
+//go:nosplit
+func canpanic(gp *g) bool {
+	// Note that g is m->gsignal, different from gp.
+	// Note also that g->m can change at preemption, so m can go stale
+	// if this function ever makes a function call.
+	_g_ := getg()
+	_m_ := _g_.m
+
+	// Is it okay for gp to panic instead of crashing the program?
+	// Yes, as long as it is running Go code, not runtime code,
+	// and not stuck in a system call.
+	if gp == nil || gp != _m_.curg {
+		return false
+	}
+	if _m_.locks-_m_.softfloat != 0 || _m_.mallocing != 0 || _m_.throwing != 0 || _m_.gcing != 0 || _m_.dying != 0 {
+		return false
+	}
+	status := readgstatus(gp)
+	if status&^_Gscan != _Grunning || gp.syscallsp != 0 {
+		return false
+	}
+	if GOOS == "windows" && _m_.libcallsp != 0 {
+		return false
+	}
+	return true
+}
diff --git a/src/runtime/parfor.c b/src/runtime/parfor.c
deleted file mode 100644
index e449568..0000000
--- a/src/runtime/parfor.c
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Parallel for algorithm.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-
-struct ParForThread
-{
-	// the thread's iteration space [32lsb, 32msb)
-	uint64 pos;
-	// stats
-	uint64 nsteal;
-	uint64 nstealcnt;
-	uint64 nprocyield;
-	uint64 nosyield;
-	uint64 nsleep;
-	byte pad[CacheLineSize];
-};
-
-void
-runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32))
-{
-	uint32 i, begin, end;
-	uint64 *pos;
-
-	if(desc == nil || nthr == 0 || nthr > desc->nthrmax || body == nil) {
-		runtime·printf("desc=%p nthr=%d count=%d body=%p\n", desc, nthr, n, body);
-		runtime·throw("parfor: invalid args");
-	}
-
-	desc->body = body;
-	desc->done = 0;
-	desc->nthr = nthr;
-	desc->thrseq = 0;
-	desc->cnt = n;
-	desc->ctx = ctx;
-	desc->wait = wait;
-	desc->nsteal = 0;
-	desc->nstealcnt = 0;
-	desc->nprocyield = 0;
-	desc->nosyield = 0;
-	desc->nsleep = 0;
-	for(i=0; i<nthr; i++) {
-		begin = (uint64)n*i / nthr;
-		end = (uint64)n*(i+1) / nthr;
-		pos = &desc->thr[i].pos;
-		if(((uintptr)pos & 7) != 0)
-			runtime·throw("parforsetup: pos is not aligned");
-		*pos = (uint64)begin | (((uint64)end)<<32);
-	}
-}
-
-void
-runtime·parfordo(ParFor *desc)
-{
-	ParForThread *me;
-	uint32 tid, begin, end, begin2, try, victim, i;
-	uint64 *mypos, *victimpos, pos, newpos;
-	void (*body)(ParFor*, uint32);
-	bool idle;
-
-	// Obtain 0-based thread index.
-	tid = runtime·xadd(&desc->thrseq, 1) - 1;
-	if(tid >= desc->nthr) {
-		runtime·printf("tid=%d nthr=%d\n", tid, desc->nthr);
-		runtime·throw("parfor: invalid tid");
-	}
-
-	// If single-threaded, just execute the for serially.
-	if(desc->nthr==1) {
-		for(i=0; i<desc->cnt; i++)
-			desc->body(desc, i);
-		return;
-	}
-
-	body = desc->body;
-	me = &desc->thr[tid];
-	mypos = &me->pos;
-	for(;;) {
-		for(;;) {
-			// While there is local work,
-			// bump low index and execute the iteration.
-			pos = runtime·xadd64(mypos, 1);
-			begin = (uint32)pos-1;
-			end = (uint32)(pos>>32);
-			if(begin < end) {
-				body(desc, begin);
-				continue;
-			}
-			break;
-		}
-
-		// Out of work, need to steal something.
-		idle = false;
-		for(try=0;; try++) {
-			// If we don't see any work for long enough,
-			// increment the done counter...
-			if(try > desc->nthr*4 && !idle) {
-				idle = true;
-				runtime·xadd(&desc->done, 1);
-			}
-			// ...if all threads have incremented the counter,
-			// we are done.
-			if(desc->done + !idle == desc->nthr) {
-				if(!idle)
-					runtime·xadd(&desc->done, 1);
-				goto exit;
-			}
-			// Choose a random victim for stealing.
-			victim = runtime·fastrand1() % (desc->nthr-1);
-			if(victim >= tid)
-				victim++;
-			victimpos = &desc->thr[victim].pos;
-			for(;;) {
-				// See if it has any work.
-				pos = runtime·atomicload64(victimpos);
-				begin = (uint32)pos;
-				end = (uint32)(pos>>32);
-				if(begin+1 >= end) {
-					begin = end = 0;
-					break;
-				}
-				if(idle) {
-					runtime·xadd(&desc->done, -1);
-					idle = false;
-				}
-				begin2 = begin + (end-begin)/2;
-				newpos = (uint64)begin | (uint64)begin2<<32;
-				if(runtime·cas64(victimpos, pos, newpos)) {
-					begin = begin2;
-					break;
-				}
-			}
-			if(begin < end) {
-				// Has successfully stolen some work.
-				if(idle)
-					runtime·throw("parfor: should not be idle");
-				runtime·atomicstore64(mypos, (uint64)begin | (uint64)end<<32);
-				me->nsteal++;
-				me->nstealcnt += end-begin;
-				break;
-			}
-			// Backoff.
-			if(try < desc->nthr) {
-				// nothing
-			} else if (try < 4*desc->nthr) {
-				me->nprocyield++;
-				runtime·procyield(20);
-			// If a caller asked not to wait for the others, exit now
-			// (assume that most work is already done at this point).
-			} else if (!desc->wait) {
-				if(!idle)
-					runtime·xadd(&desc->done, 1);
-				goto exit;
-			} else if (try < 6*desc->nthr) {
-				me->nosyield++;
-				runtime·osyield();
-			} else {
-				me->nsleep++;
-				runtime·usleep(1);
-			}
-		}
-	}
-exit:
-	runtime·xadd64(&desc->nsteal, me->nsteal);
-	runtime·xadd64(&desc->nstealcnt, me->nstealcnt);
-	runtime·xadd64(&desc->nprocyield, me->nprocyield);
-	runtime·xadd64(&desc->nosyield, me->nosyield);
-	runtime·xadd64(&desc->nsleep, me->nsleep);
-	me->nsteal = 0;
-	me->nstealcnt = 0;
-	me->nprocyield = 0;
-	me->nosyield = 0;
-	me->nsleep = 0;
-}
-
-// For testing from Go.
-void
-runtime·newparfor_m(void)
-{
-	g->m->ptrarg[0] = runtime·parforalloc(g->m->scalararg[0]);
-}
-
-void
-runtime·parforsetup_m(void)
-{
-	ParFor *desc;
-	void *ctx;
-	void (*body)(ParFor*, uint32);
-
-	desc = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	ctx = g->m->ptrarg[1];
-	g->m->ptrarg[1] = nil;
-	body = g->m->ptrarg[2];
-	g->m->ptrarg[2] = nil;
-
-	runtime·parforsetup(desc, g->m->scalararg[0], g->m->scalararg[1], ctx, g->m->scalararg[2], body);
-}
-
-void
-runtime·parfordo_m(void)
-{
-	ParFor *desc;
-
-	desc = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	runtime·parfordo(desc);
-}
-
-void
-runtime·parforiters_m(void)
-{
-	ParFor *desc;
-	uintptr tid;
-
-	desc = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	tid = g->m->scalararg[0];
-	g->m->scalararg[0] = desc->thr[tid].pos;
-	g->m->scalararg[1] = desc->thr[tid].pos>>32;
-}
diff --git a/src/runtime/parfor.go b/src/runtime/parfor.go
new file mode 100644
index 0000000..14870c9
--- /dev/null
+++ b/src/runtime/parfor.go
@@ -0,0 +1,186 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Parallel for algorithm.
+
+package runtime
+
+import "unsafe"
+
+type parforthread struct {
+	// the thread's iteration space [32lsb, 32msb)
+	pos uint64
+	// stats
+	nsteal     uint64
+	nstealcnt  uint64
+	nprocyield uint64
+	nosyield   uint64
+	nsleep     uint64
+	pad        [_CacheLineSize]byte
+}
+
+func desc_thr_index(desc *parfor, i uint32) *parforthread {
+	return (*parforthread)(add(unsafe.Pointer(desc.thr), uintptr(i)*unsafe.Sizeof(*desc.thr)))
+}
+
+func parforsetup(desc *parfor, nthr, n uint32, ctx unsafe.Pointer, wait bool, body func(*parfor, uint32)) {
+	if desc == nil || nthr == 0 || nthr > desc.nthrmax || body == nil {
+		print("desc=", desc, " nthr=", nthr, " count=", n, " body=", body, "\n")
+		gothrow("parfor: invalid args")
+	}
+
+	desc.body = *(*unsafe.Pointer)(unsafe.Pointer(&body))
+	desc.done = 0
+	desc.nthr = nthr
+	desc.thrseq = 0
+	desc.cnt = n
+	desc.ctx = ctx
+	desc.wait = wait
+	desc.nsteal = 0
+	desc.nstealcnt = 0
+	desc.nprocyield = 0
+	desc.nosyield = 0
+	desc.nsleep = 0
+
+	for i := uint32(0); i < nthr; i++ {
+		begin := uint32(uint64(n) * uint64(i) / uint64(nthr))
+		end := uint32(uint64(n) * uint64(i+1) / uint64(nthr))
+		pos := &desc_thr_index(desc, i).pos
+		if uintptr(unsafe.Pointer(pos))&7 != 0 {
+			gothrow("parforsetup: pos is not aligned")
+		}
+		*pos = uint64(begin) | uint64(end)<<32
+	}
+}
+
+func parfordo(desc *parfor) {
+	// Obtain 0-based thread index.
+	tid := xadd(&desc.thrseq, 1) - 1
+	if tid >= desc.nthr {
+		print("tid=", tid, " nthr=", desc.nthr, "\n")
+		gothrow("parfor: invalid tid")
+	}
+
+	// If single-threaded, just execute the for serially.
+	body := *(*func(*parfor, uint32))(unsafe.Pointer(&desc.body))
+	if desc.nthr == 1 {
+		for i := uint32(0); i < desc.cnt; i++ {
+			body(desc, i)
+		}
+		return
+	}
+
+	me := desc_thr_index(desc, tid)
+	mypos := &me.pos
+	for {
+		for {
+			// While there is local work,
+			// bump low index and execute the iteration.
+			pos := xadd64(mypos, 1)
+			begin := uint32(pos) - 1
+			end := uint32(pos >> 32)
+			if begin < end {
+				body(desc, begin)
+				continue
+			}
+			break
+		}
+
+		// Out of work, need to steal something.
+		idle := false
+		for try := uint32(0); ; try++ {
+			// If we don't see any work for long enough,
+			// increment the done counter...
+			if try > desc.nthr*4 && !idle {
+				idle = true
+				xadd(&desc.done, 1)
+			}
+
+			// ...if all threads have incremented the counter,
+			// we are done.
+			extra := uint32(0)
+			if !idle {
+				extra = 1
+			}
+			if desc.done+extra == desc.nthr {
+				if !idle {
+					xadd(&desc.done, 1)
+				}
+				goto exit
+			}
+
+			// Choose a random victim for stealing.
+			var begin, end uint32
+			victim := fastrand1() % (desc.nthr - 1)
+			if victim >= tid {
+				victim++
+			}
+			victimpos := &desc_thr_index(desc, victim).pos
+			for {
+				// See if it has any work.
+				pos := atomicload64(victimpos)
+				begin = uint32(pos)
+				end = uint32(pos >> 32)
+				if begin+1 >= end {
+					end = 0
+					begin = end
+					break
+				}
+				if idle {
+					xadd(&desc.done, -1)
+					idle = false
+				}
+				begin2 := begin + (end-begin)/2
+				newpos := uint64(begin) | uint64(begin2)<<32
+				if cas64(victimpos, pos, newpos) {
+					begin = begin2
+					break
+				}
+			}
+			if begin < end {
+				// Has successfully stolen some work.
+				if idle {
+					gothrow("parfor: should not be idle")
+				}
+				atomicstore64(mypos, uint64(begin)|uint64(end)<<32)
+				me.nsteal++
+				me.nstealcnt += uint64(end) - uint64(begin)
+				break
+			}
+
+			// Backoff.
+			if try < desc.nthr {
+				// nothing
+			} else if try < 4*desc.nthr {
+				me.nprocyield++
+				procyield(20)
+			} else if !desc.wait {
+				// If a caller asked not to wait for the others, exit now
+				// (assume that most work is already done at this point).
+				if !idle {
+					xadd(&desc.done, 1)
+				}
+				goto exit
+			} else if try < 6*desc.nthr {
+				me.nosyield++
+				osyield()
+			} else {
+				me.nsleep++
+				usleep(1)
+			}
+		}
+	}
+
+exit:
+	xadd64(&desc.nsteal, int64(me.nsteal))
+	xadd64(&desc.nstealcnt, int64(me.nstealcnt))
+	xadd64(&desc.nprocyield, int64(me.nprocyield))
+	xadd64(&desc.nosyield, int64(me.nosyield))
+	xadd64(&desc.nsleep, int64(me.nsleep))
+	me.nsteal = 0
+	me.nstealcnt = 0
+	me.nprocyield = 0
+	me.nosyield = 0
+	me.nsleep = 0
+}
diff --git a/src/runtime/pprof/pprof_test.go b/src/runtime/pprof/pprof_test.go
index 8677cb3..101c059 100644
--- a/src/runtime/pprof/pprof_test.go
+++ b/src/runtime/pprof/pprof_test.go
@@ -249,7 +249,7 @@
 			// exists to record a PC without a traceback. Those are okay.
 			if len(stk) == 2 {
 				f := runtime.FuncForPC(stk[1])
-				if f != nil && (f.Name() == "System" || f.Name() == "ExternalCode" || f.Name() == "GC") {
+				if f != nil && (f.Name() == "runtime._System" || f.Name() == "runtime._ExternalCode" || f.Name() == "runtime._GC") {
 					return
 				}
 			}
diff --git a/src/runtime/proc.c b/src/runtime/proc.c
deleted file mode 100644
index ce39db4..0000000
--- a/src/runtime/proc.c
+++ /dev/null
@@ -1,3497 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "zaexperiment.h"
-#include "malloc.h"
-#include "stack.h"
-#include "race.h"
-#include "type.h"
-#include "mgc0.h"
-#include "textflag.h"
-
-// Goroutine scheduler
-// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
-//
-// The main concepts are:
-// G - goroutine.
-// M - worker thread, or machine.
-// P - processor, a resource that is required to execute Go code.
-//     M must have an associated P to execute Go code, however it can be
-//     blocked or in a syscall w/o an associated P.
-//
-// Design doc at http://golang.org/s/go11sched.
-
-enum
-{
-	// Number of goroutine ids to grab from runtime·sched.goidgen to local per-P cache at once.
-	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
-	GoidCacheBatch = 16,
-};
-
-SchedT	runtime·sched;
-int32	runtime·gomaxprocs;
-uint32	runtime·needextram;
-bool	runtime·iscgo;
-M	runtime·m0;
-G	runtime·g0;	// idle goroutine for m0
-G*	runtime·lastg;
-M*	runtime·allm;
-M*	runtime·extram;
-P*	runtime·allp[MaxGomaxprocs+1];
-int8*	runtime·goos;
-int32	runtime·ncpu;
-int32	runtime·newprocs;
-
-Mutex runtime·allglock;	// the following vars are protected by this lock or by stoptheworld
-G**	runtime·allg;
-Slice	runtime·allgs;
-uintptr runtime·allglen;
-ForceGCState	runtime·forcegc;
-
-void runtime·mstart(void);
-static void runqput(P*, G*);
-static G* runqget(P*);
-static bool runqputslow(P*, G*, uint32, uint32);
-static G* runqsteal(P*, P*);
-static void mput(M*);
-static M* mget(void);
-static void mcommoninit(M*);
-static void schedule(void);
-static void procresize(int32);
-static void acquirep(P*);
-static P* releasep(void);
-static void newm(void(*)(void), P*);
-static void stopm(void);
-static void startm(P*, bool);
-static void handoffp(P*);
-static void wakep(void);
-static void stoplockedm(void);
-static void startlockedm(G*);
-static void sysmon(void);
-static uint32 retake(int64);
-static void incidlelocked(int32);
-static void checkdead(void);
-static void exitsyscall0(G*);
-void runtime·park_m(G*);
-static void goexit0(G*);
-static void gfput(P*, G*);
-static G* gfget(P*);
-static void gfpurge(P*);
-static void globrunqput(G*);
-static void globrunqputbatch(G*, G*, int32);
-static G* globrunqget(P*, int32);
-static P* pidleget(void);
-static void pidleput(P*);
-static void injectglist(G*);
-static bool preemptall(void);
-static bool preemptone(P*);
-static bool exitsyscallfast(void);
-static bool haveexperiment(int8*);
-void runtime·allgadd(G*);
-static void dropg(void);
-
-extern String runtime·buildVersion;
-
-// For cgo-using programs with external linking,
-// export "main" (defined in assembly) so that libc can handle basic
-// C runtime startup and call the Go program as if it were
-// the C main function.
-#pragma cgo_export_static main
-
-// Filled in by dynamic linker when Cgo is available.
-void (*_cgo_init)(void);
-void (*_cgo_malloc)(void);
-void (*_cgo_free)(void);
-
-// Copy for Go code.
-void* runtime·cgoMalloc;
-void* runtime·cgoFree;
-
-// The bootstrap sequence is:
-//
-//	call osinit
-//	call schedinit
-//	make & queue new G
-//	call runtime·mstart
-//
-// The new G calls runtime·main.
-void
-runtime·schedinit(void)
-{
-	int32 n, procs;
-	byte *p;
-
-	// raceinit must be the first call to race detector.
-	// In particular, it must be done before mallocinit below calls racemapshadow.
-	if(raceenabled)
-		g->racectx = runtime·raceinit();
-
-	runtime·sched.maxmcount = 10000;
-
-	runtime·tracebackinit();
-	runtime·symtabinit();
-	runtime·stackinit();
-	runtime·mallocinit();
-	mcommoninit(g->m);
-	
-	runtime·goargs();
-	runtime·goenvs();
-	runtime·parsedebugvars();
-	runtime·gcinit();
-
-	runtime·sched.lastpoll = runtime·nanotime();
-	procs = 1;
-	p = runtime·getenv("GOMAXPROCS");
-	if(p != nil && (n = runtime·atoi(p)) > 0) {
-		if(n > MaxGomaxprocs)
-			n = MaxGomaxprocs;
-		procs = n;
-	}
-	procresize(procs);
-
-	if(runtime·buildVersion.str == nil) {
-		// Condition should never trigger.  This code just serves
-		// to ensure runtime·buildVersion is kept in the resulting binary.
-		runtime·buildVersion.str = (uint8*)"unknown";
-		runtime·buildVersion.len = 7;
-	}
-
-	runtime·cgoMalloc = _cgo_malloc;
-	runtime·cgoFree = _cgo_free;
-}
-
-void
-runtime·newsysmon(void)
-{
-	newm(sysmon, nil);
-}
-
-static void
-dumpgstatus(G* gp)
-{
-	runtime·printf("runtime: gp: gp=%p, goid=%D, gp->atomicstatus=%x\n", gp, gp->goid, runtime·readgstatus(gp));
-	runtime·printf("runtime:  g:  g=%p, goid=%D,  g->atomicstatus=%x\n", g, g->goid, runtime·readgstatus(g));
-}
-
-static void
-checkmcount(void)
-{
-	// sched lock is held
-	if(runtime·sched.mcount > runtime·sched.maxmcount){
-		runtime·printf("runtime: program exceeds %d-thread limit\n", runtime·sched.maxmcount);
-		runtime·throw("thread exhaustion");
-	}
-}
-
-static void
-mcommoninit(M *mp)
-{
-	// g0 stack won't make sense for user (and is not necessary unwindable).
-	if(g != g->m->g0)
-		runtime·callers(1, mp->createstack, nelem(mp->createstack));
-
-	mp->fastrand = 0x49f6428aUL + mp->id + runtime·cputicks();
-
-	runtime·lock(&runtime·sched.lock);
-	mp->id = runtime·sched.mcount++;
-	checkmcount();
-	runtime·mpreinit(mp);
-	if(mp->gsignal)
-		mp->gsignal->stackguard1 = mp->gsignal->stack.lo + StackGuard;
-
-	// Add to runtime·allm so garbage collector doesn't free g->m
-	// when it is just in a register or thread-local storage.
-	mp->alllink = runtime·allm;
-	// runtime·NumCgoCall() iterates over allm w/o schedlock,
-	// so we need to publish it safely.
-	runtime·atomicstorep(&runtime·allm, mp);
-	runtime·unlock(&runtime·sched.lock);
-}
-
-// Mark gp ready to run.
-void
-runtime·ready(G *gp)
-{
-	uint32 status;
-
-	status = runtime·readgstatus(gp);
-	// Mark runnable.
-	g->m->locks++;  // disable preemption because it can be holding p in a local var
-	if((status&~Gscan) != Gwaiting){
-		dumpgstatus(gp);
-		runtime·throw("bad g->status in ready");
-	}
-	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
-	runtime·casgstatus(gp, Gwaiting, Grunnable);
-	runqput(g->m->p, gp);
-	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0)  // TODO: fast atomic
-		wakep();
-	g->m->locks--;
-	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
-		g->stackguard0 = StackPreempt;
-}
-
-void
-runtime·ready_m(void)
-{
-	G *gp;
-
-	gp = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	runtime·ready(gp);
-}
-
-int32
-runtime·gcprocs(void)
-{
-	int32 n;
-
-	// Figure out how many CPUs to use during GC.
-	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
-	runtime·lock(&runtime·sched.lock);
-	n = runtime·gomaxprocs;
-	if(n > runtime·ncpu)
-		n = runtime·ncpu;
-	if(n > MaxGcproc)
-		n = MaxGcproc;
-	if(n > runtime·sched.nmidle+1) // one M is currently running
-		n = runtime·sched.nmidle+1;
-	runtime·unlock(&runtime·sched.lock);
-	return n;
-}
-
-static bool
-needaddgcproc(void)
-{
-	int32 n;
-
-	runtime·lock(&runtime·sched.lock);
-	n = runtime·gomaxprocs;
-	if(n > runtime·ncpu)
-		n = runtime·ncpu;
-	if(n > MaxGcproc)
-		n = MaxGcproc;
-	n -= runtime·sched.nmidle+1; // one M is currently running
-	runtime·unlock(&runtime·sched.lock);
-	return n > 0;
-}
-
-void
-runtime·helpgc(int32 nproc)
-{
-	M *mp;
-	int32 n, pos;
-
-	runtime·lock(&runtime·sched.lock);
-	pos = 0;
-	for(n = 1; n < nproc; n++) {  // one M is currently running
-		if(runtime·allp[pos]->mcache == g->m->mcache)
-			pos++;
-		mp = mget();
-		if(mp == nil)
-			runtime·throw("runtime·gcprocs inconsistency");
-		mp->helpgc = n;
-		mp->mcache = runtime·allp[pos]->mcache;
-		pos++;
-		runtime·notewakeup(&mp->park);
-	}
-	runtime·unlock(&runtime·sched.lock);
-}
-
-// Similar to stoptheworld but best-effort and can be called several times.
-// There is no reverse operation, used during crashing.
-// This function must not lock any mutexes.
-void
-runtime·freezetheworld(void)
-{
-	int32 i;
-
-	if(runtime·gomaxprocs == 1)
-		return;
-	// stopwait and preemption requests can be lost
-	// due to races with concurrently executing threads,
-	// so try several times
-	for(i = 0; i < 5; i++) {
-		// this should tell the scheduler to not start any new goroutines
-		runtime·sched.stopwait = 0x7fffffff;
-		runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
-		// this should stop running goroutines
-		if(!preemptall())
-			break;  // no running goroutines
-		runtime·usleep(1000);
-	}
-	// to be sure
-	runtime·usleep(1000);
-	preemptall();
-	runtime·usleep(1000);
-}
-
-static bool
-isscanstatus(uint32 status)
-{
-	if(status == Gscan)
-		runtime·throw("isscanstatus: Bad status Gscan");
-	return (status&Gscan) == Gscan;
-}
-
-// All reads and writes of g's status go through readgstatus, casgstatus
-// castogscanstatus, casfromgscanstatus.
-#pragma textflag NOSPLIT
-uint32
-runtime·readgstatus(G *gp)
-{
-	return runtime·atomicload(&gp->atomicstatus);
-}
-
-// The Gscanstatuses are acting like locks and this releases them.
-// If it proves to be a performance hit we should be able to make these
-// simple atomic stores but for now we are going to throw if
-// we see an inconsistent state.
-void
-runtime·casfromgscanstatus(G *gp, uint32 oldval, uint32 newval)
-{
-	bool success = false;
-
-	// Check that transition is valid.
-	switch(oldval) {
-	case Gscanrunnable:
-	case Gscanwaiting:
-	case Gscanrunning:
-	case Gscansyscall:
-		if(newval == (oldval&~Gscan))
-			success = runtime·cas(&gp->atomicstatus, oldval, newval);
-		break;
-	case Gscanenqueue:
-		if(newval == Gwaiting)
-			success = runtime·cas(&gp->atomicstatus, oldval, newval);
-		break;
-	}	
-	if(!success){
-		runtime·printf("runtime: casfromgscanstatus failed gp=%p, oldval=%d, newval=%d\n",  
-			gp, oldval, newval);
-		dumpgstatus(gp);
-		runtime·throw("casfromgscanstatus: gp->status is not in scan state");
-	}
-}
-
-// This will return false if the gp is not in the expected status and the cas fails. 
-// This acts like a lock acquire while the casfromgstatus acts like a lock release.
-bool
-runtime·castogscanstatus(G *gp, uint32 oldval, uint32 newval)
-{
-	switch(oldval) {
-	case Grunnable:
-	case Gwaiting:
-	case Gsyscall:
-		if(newval == (oldval|Gscan))
-			return runtime·cas(&gp->atomicstatus, oldval, newval);
-		break;
-	case Grunning:
-		if(newval == Gscanrunning || newval == Gscanenqueue)
-			return runtime·cas(&gp->atomicstatus, oldval, newval);
-		break;   
-	}
-
-	runtime·printf("runtime: castogscanstatus oldval=%d newval=%d\n", oldval, newval);
-	runtime·throw("castogscanstatus");
-	return false; // not reached
-}
-
-static void badcasgstatus(void);
-static void helpcasgstatus(void);
-
-// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
-// and casfromgscanstatus instead.
-// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 
-// put it in the Gscan state is finished.
-#pragma textflag NOSPLIT
-void
-runtime·casgstatus(G *gp, uint32 oldval, uint32 newval)
-{
-	void (*fn)(void);
-
-	if((oldval&Gscan) || (newval&Gscan) || oldval == newval) {
-		g->m->scalararg[0] = oldval;
-		g->m->scalararg[1] = newval;
-		fn = badcasgstatus;
-		runtime·onM(&fn);
-	}
-
-	// loop if gp->atomicstatus is in a scan state giving
-	// GC time to finish and change the state to oldval.
-	while(!runtime·cas(&gp->atomicstatus, oldval, newval)) {
-
-	}	
-}
-
-static void
-badcasgstatus(void)
-{
-	uint32 oldval, newval;
-	
-	oldval = g->m->scalararg[0];
-	newval = g->m->scalararg[1];
-	g->m->scalararg[0] = 0;
-	g->m->scalararg[1] = 0;
-
-	runtime·printf("casgstatus: oldval=%d, newval=%d\n", oldval, newval);
-	runtime·throw("casgstatus: bad incoming values");
-}
-
-static void
-helpcasgstatus(void)
-{
-	G *gp;
-	
-	gp = g->m->ptrarg[0];
-	g->m->ptrarg[0] = 0;
-	runtime·gcphasework(gp);
-}
-
-// stopg ensures that gp is stopped at a GC safe point where its stack can be scanned
-// or in the context of a moving collector the pointers can be flipped from pointing 
-// to old object to pointing to new objects. 
-// If stopg returns true, the caller knows gp is at a GC safe point and will remain there until
-// the caller calls restartg.
-// If stopg returns false, the caller is not responsible for calling restartg. This can happen
-// if another thread, either the gp itself or another GC thread is taking the responsibility 
-// to do the GC work related to this thread.
-bool
-runtime·stopg(G *gp)
-{
-	uint32 s;
-
-	for(;;) {
-		if(gp->gcworkdone)
-			return false;
-
-		s = runtime·readgstatus(gp);
-		switch(s) {
-		default:
-			dumpgstatus(gp);
-			runtime·throw("stopg: gp->atomicstatus is not valid");
-
-		case Gdead:
-			return false;
-
-		case Gcopystack:
-			// Loop until a new stack is in place.
-			break;
-
-		case Grunnable:
-		case Gsyscall:
-		case Gwaiting:
-			// Claim goroutine by setting scan bit.
-			if(!runtime·castogscanstatus(gp, s, s|Gscan))
-				break;
-			// In scan state, do work.
-			runtime·gcphasework(gp);
-			return true;
-
-		case Gscanrunnable:
-		case Gscanwaiting:
-		case Gscansyscall:
-			// Goroutine already claimed by another GC helper.
-			return false;
-
-		case Grunning:
-			if(runtime·gcphase == GCscan) {
-				gp->gcworkdone = true;
-				return false;
-				// Running routines not scanned during
-				// GCscan phase, we only scan non-running routines.
-			}
-				
-			// Claim goroutine, so we aren't racing with a status
-			// transition away from Grunning.
-			if(!runtime·castogscanstatus(gp, Grunning, Gscanrunning))
-				break;
-
-			// Mark gp for preemption.
-			if(!gp->gcworkdone) {
-				gp->preemptscan = true;
-				gp->preempt = true;
-				gp->stackguard0 = StackPreempt;
-			}
-
-			// Unclaim.
-			runtime·casfromgscanstatus(gp, Gscanrunning, Grunning);
-			return false;
-		}
-	}
-	// Should not be here....
-}
-
-// The GC requests that this routine be moved from a scanmumble state to a mumble state.
-void 
-runtime·restartg (G *gp)
-{
-	uint32 s;
-
-	s = runtime·readgstatus(gp);
-	switch(s) {
-	default:
-		dumpgstatus(gp); 
-		runtime·throw("restartg: unexpected status");
-
-	case Gdead:
-		break;
-
-	case Gscanrunnable:
-	case Gscanwaiting:
-	case Gscansyscall:
-		runtime·casfromgscanstatus(gp, s, s&~Gscan);
-		break;
-
-	case Gscanenqueue:
-		// Scan is now completed.
-		// Goroutine now needs to be made runnable.
-		// We put it on the global run queue; ready blocks on the global scheduler lock.
-		runtime·casfromgscanstatus(gp, Gscanenqueue, Gwaiting);
-		if(gp != g->m->curg)
-			runtime·throw("processing Gscanenqueue on wrong m");
-		dropg();
-		runtime·ready(gp);
-		break;
-	}
-}
-
-static void
-stopscanstart(G* gp)
-{
-	if(g == gp)
-		runtime·throw("GC not moved to G0");
-	if(runtime·stopg(gp)) {
-		if(!isscanstatus(runtime·readgstatus(gp))) {
-			dumpgstatus(gp);
-			runtime·throw("GC not in scan state");
-		}
-		runtime·restartg(gp);
-	}
-}
-
-// Runs on g0 and does the actual work after putting the g back on the run queue.
-static void
-mquiesce(G *gpmaster)
-{
-	G* gp;
-	uint32 i;
-	uint32 status;
-	uint32 activeglen;
-
-	// enqueue the calling goroutine.
-	runtime·restartg(gpmaster);
-
-	activeglen = runtime·allglen;
-	for(i = 0; i < activeglen; i++) {
-		gp = runtime·allg[i];
-		if(runtime·readgstatus(gp) == Gdead) 
-			gp->gcworkdone = true; // noop scan.
-		else 
-			gp->gcworkdone = false; 
-		stopscanstart(gp); 
-	}
-
-	// Check that the G's gcwork (such as scanning) has been done. If not do it now. 
-	// You can end up doing work here if the page trap on a Grunning Goroutine has
-	// not been sprung or in some race situations. For example a runnable goes dead
-	// and is started up again with a gp->gcworkdone set to false.
-	for(i = 0; i < activeglen; i++) {
-		gp = runtime·allg[i];
-		while (!gp->gcworkdone) {
-			status = runtime·readgstatus(gp);
-			if(status == Gdead) {
-				gp->gcworkdone = true; // scan is a noop
-				break;
-				//do nothing, scan not needed. 
-			}
-			if(status == Grunning && gp->stackguard0 == (uintptr)StackPreempt && runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) // nanosecond arg 
-				runtime·noteclear(&runtime·sched.stopnote);
-			else 
-				stopscanstart(gp);
-		}
-	}
-
-	for(i = 0; i < activeglen; i++) {
-		gp = runtime·allg[i];
-		status = runtime·readgstatus(gp);
-		if(isscanstatus(status)) {
-			runtime·printf("mstopandscang:bottom: post scan bad status gp=%p has status %x\n", gp, status);
-			dumpgstatus(gp);
-		}
-		if(!gp->gcworkdone && status != Gdead) {
-			runtime·printf("mstopandscang:bottom: post scan gp=%p->gcworkdone still false\n", gp);
-			dumpgstatus(gp);
-		}
-	}
-
-	schedule(); // Never returns.
-}
-
-// quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point.
-// If the global runtime·gcphase is GCmark quiesce will ensure that all of the goroutine's stacks
-// have been scanned before it returns.
-void
-runtime·quiesce(G* mastergp)
-{
-	void (*fn)(G*);
-
-	runtime·castogscanstatus(mastergp, Grunning, Gscanenqueue);
-	// Now move this to the g0 (aka m) stack.
-	// g0 will potentially scan this thread and put mastergp on the runqueue 
-	fn = mquiesce;
-	runtime·mcall(&fn);
-}
-
-// This is used by the GC as well as the routines that do stack dumps. In the case
-// of GC all the routines can be reliably stopped. This is not always the case
-// when the system is in panic or being exited.
-void
-runtime·stoptheworld(void)
-{
-	int32 i;
-	uint32 s;
-	P *p;
-	bool wait;
-
-	// If we hold a lock, then we won't be able to stop another M
-	// that is blocked trying to acquire the lock.
-	if(g->m->locks > 0)
-		runtime·throw("stoptheworld: holding locks");
-
-	runtime·lock(&runtime·sched.lock);
-	runtime·sched.stopwait = runtime·gomaxprocs;
-	runtime·atomicstore((uint32*)&runtime·sched.gcwaiting, 1);
-	preemptall();
-	// stop current P
-	g->m->p->status = Pgcstop; // Pgcstop is only diagnostic.
-	runtime·sched.stopwait--;
-	// try to retake all P's in Psyscall status
-	for(i = 0; i < runtime·gomaxprocs; i++) {
-		p = runtime·allp[i];
-		s = p->status;
-		if(s == Psyscall && runtime·cas(&p->status, s, Pgcstop))
-			runtime·sched.stopwait--;
-	}
-	// stop idle P's
-	while(p = pidleget()) {
-		p->status = Pgcstop;
-		runtime·sched.stopwait--;
-	}
-	wait = runtime·sched.stopwait > 0;
-	runtime·unlock(&runtime·sched.lock);
-
-	// wait for remaining P's to stop voluntarily
-	if(wait) {
-		for(;;) {
-			// wait for 100us, then try to re-preempt in case of any races
-			if(runtime·notetsleep(&runtime·sched.stopnote, 100*1000)) {
-				runtime·noteclear(&runtime·sched.stopnote);
-				break;
-			}
-			preemptall();
-		}
-	}
-	if(runtime·sched.stopwait)
-		runtime·throw("stoptheworld: not stopped");
-	for(i = 0; i < runtime·gomaxprocs; i++) {
-		p = runtime·allp[i];
-		if(p->status != Pgcstop)
-			runtime·throw("stoptheworld: not stopped");
-	}
-}
-
-static void
-mhelpgc(void)
-{
-	g->m->helpgc = -1;
-}
-
-void
-runtime·starttheworld(void)
-{
-	P *p, *p1;
-	M *mp;
-	G *gp;
-	bool add;
-
-	g->m->locks++;  // disable preemption because it can be holding p in a local var
-	gp = runtime·netpoll(false);  // non-blocking
-	injectglist(gp);
-	add = needaddgcproc();
-	runtime·lock(&runtime·sched.lock);
-	if(runtime·newprocs) {
-		procresize(runtime·newprocs);
-		runtime·newprocs = 0;
-	} else
-		procresize(runtime·gomaxprocs);
-	runtime·sched.gcwaiting = 0;
-
-	p1 = nil;
-	while(p = pidleget()) {
-		// procresize() puts p's with work at the beginning of the list.
-		// Once we reach a p without a run queue, the rest don't have one either.
-		if(p->runqhead == p->runqtail) {
-			pidleput(p);
-			break;
-		}
-		p->m = mget();
-		p->link = p1;
-		p1 = p;
-	}
-	if(runtime·sched.sysmonwait) {
-		runtime·sched.sysmonwait = false;
-		runtime·notewakeup(&runtime·sched.sysmonnote);
-	}
-	runtime·unlock(&runtime·sched.lock);
-
-	while(p1) {
-		p = p1;
-		p1 = p1->link;
-		if(p->m) {
-			mp = p->m;
-			p->m = nil;
-			if(mp->nextp)
-				runtime·throw("starttheworld: inconsistent mp->nextp");
-			mp->nextp = p;
-			runtime·notewakeup(&mp->park);
-		} else {
-			// Start M to run P.  Do not start another M below.
-			newm(nil, p);
-			add = false;
-		}
-	}
-
-	if(add) {
-		// If GC could have used another helper proc, start one now,
-		// in the hope that it will be available next time.
-		// It would have been even better to start it before the collection,
-		// but doing so requires allocating memory, so it's tricky to
-		// coordinate.  This lazy approach works out in practice:
-		// we don't mind if the first couple gc rounds don't have quite
-		// the maximum number of procs.
-		newm(mhelpgc, nil);
-	}
-	g->m->locks--;
-	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
-		g->stackguard0 = StackPreempt;
-}
-
-static void mstart(void);
-
-// Called to start an M.
-#pragma textflag NOSPLIT
-void
-runtime·mstart(void)
-{
-	uintptr x, size;
-	
-	if(g->stack.lo == 0) {
-		// Initialize stack bounds from system stack.
-		// Cgo may have left stack size in stack.hi.
-		size = g->stack.hi;
-		if(size == 0)
-			size = 8192;
-		g->stack.hi = (uintptr)&x;
-		g->stack.lo = g->stack.hi - size + 1024;
-	}
-	
-	// Initialize stack guards so that we can start calling
-	// both Go and C functions with stack growth prologues.
-	g->stackguard0 = g->stack.lo + StackGuard;
-	g->stackguard1 = g->stackguard0;
-	mstart();
-}
-
-static void
-mstart(void)
-{
-	if(g != g->m->g0)
-		runtime·throw("bad runtime·mstart");
-
-	// Record top of stack for use by mcall.
-	// Once we call schedule we're never coming back,
-	// so other calls can reuse this stack space.
-	runtime·gosave(&g->m->g0->sched);
-	g->m->g0->sched.pc = (uintptr)-1;  // make sure it is never used
-	runtime·asminit();
-	runtime·minit();
-
-	// Install signal handlers; after minit so that minit can
-	// prepare the thread to be able to handle the signals.
-	if(g->m == &runtime·m0)
-		runtime·initsig();
-	
-	if(g->m->mstartfn)
-		g->m->mstartfn();
-
-	if(g->m->helpgc) {
-		g->m->helpgc = 0;
-		stopm();
-	} else if(g->m != &runtime·m0) {
-		acquirep(g->m->nextp);
-		g->m->nextp = nil;
-	}
-	schedule();
-
-	// TODO(brainman): This point is never reached, because scheduler
-	// does not release os threads at the moment. But once this path
-	// is enabled, we must remove our seh here.
-}
-
-// When running with cgo, we call _cgo_thread_start
-// to start threads for us so that we can play nicely with
-// foreign code.
-void (*_cgo_thread_start)(void*);
-
-typedef struct CgoThreadStart CgoThreadStart;
-struct CgoThreadStart
-{
-	G *g;
-	uintptr *tls;
-	void (*fn)(void);
-};
-
-M *runtime·newM(void); // in proc.go
-
-// Allocate a new m unassociated with any thread.
-// Can use p for allocation context if needed.
-M*
-runtime·allocm(P *p)
-{
-	M *mp;
-
-	g->m->locks++;  // disable GC because it can be called from sysmon
-	if(g->m->p == nil)
-		acquirep(p);  // temporarily borrow p for mallocs in this function
-	mp = runtime·newM();
-	mcommoninit(mp);
-
-	// In case of cgo or Solaris, pthread_create will make us a stack.
-	// Windows and Plan 9 will layout sched stack on OS stack.
-	if(runtime·iscgo || Solaris || Windows || Plan9)
-		mp->g0 = runtime·malg(-1);
-	else
-		mp->g0 = runtime·malg(8192);
-	runtime·writebarrierptr_nostore(&mp->g0, mp->g0);
-	mp->g0->m = mp;
-	runtime·writebarrierptr_nostore(&mp->g0->m, mp->g0->m);
-
-	if(p == g->m->p)
-		releasep();
-	g->m->locks--;
-	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
-		g->stackguard0 = StackPreempt;
-
-	return mp;
-}
-
-G *runtime·newG(void); // in proc.go
-
-static G*
-allocg(void)
-{
-	return runtime·newG();
-}
-
-static M* lockextra(bool nilokay);
-static void unlockextra(M*);
-
-// needm is called when a cgo callback happens on a
-// thread without an m (a thread not created by Go).
-// In this case, needm is expected to find an m to use
-// and return with m, g initialized correctly.
-// Since m and g are not set now (likely nil, but see below)
-// needm is limited in what routines it can call. In particular
-// it can only call nosplit functions (textflag 7) and cannot
-// do any scheduling that requires an m.
-//
-// In order to avoid needing heavy lifting here, we adopt
-// the following strategy: there is a stack of available m's
-// that can be stolen. Using compare-and-swap
-// to pop from the stack has ABA races, so we simulate
-// a lock by doing an exchange (via casp) to steal the stack
-// head and replace the top pointer with MLOCKED (1).
-// This serves as a simple spin lock that we can use even
-// without an m. The thread that locks the stack in this way
-// unlocks the stack by storing a valid stack head pointer.
-//
-// In order to make sure that there is always an m structure
-// available to be stolen, we maintain the invariant that there
-// is always one more than needed. At the beginning of the
-// program (if cgo is in use) the list is seeded with a single m.
-// If needm finds that it has taken the last m off the list, its job
-// is - once it has installed its own m so that it can do things like
-// allocate memory - to create a spare m and put it on the list.
-//
-// Each of these extra m's also has a g0 and a curg that are
-// pressed into service as the scheduling stack and current
-// goroutine for the duration of the cgo callback.
-//
-// When the callback is done with the m, it calls dropm to
-// put the m back on the list.
-#pragma textflag NOSPLIT
-void
-runtime·needm(byte x)
-{
-	M *mp;
-
-	if(runtime·needextram) {
-		// Can happen if C/C++ code calls Go from a global ctor.
-		// Can not throw, because scheduler is not initialized yet.
-		runtime·write(2, "fatal error: cgo callback before cgo call\n",
-			sizeof("fatal error: cgo callback before cgo call\n")-1);
-		runtime·exit(1);
-	}
-
-	// Lock extra list, take head, unlock popped list.
-	// nilokay=false is safe here because of the invariant above,
-	// that the extra list always contains or will soon contain
-	// at least one m.
-	mp = lockextra(false);
-
-	// Set needextram when we've just emptied the list,
-	// so that the eventual call into cgocallbackg will
-	// allocate a new m for the extra list. We delay the
-	// allocation until then so that it can be done
-	// after exitsyscall makes sure it is okay to be
-	// running at all (that is, there's no garbage collection
-	// running right now).
-	mp->needextram = mp->schedlink == nil;
-	unlockextra(mp->schedlink);
-
-	// Install g (= m->g0) and set the stack bounds
-	// to match the current stack. We don't actually know
-	// how big the stack is, like we don't know how big any
-	// scheduling stack is, but we assume there's at least 32 kB,
-	// which is more than enough for us.
-	runtime·setg(mp->g0);
-	g->stack.hi = (uintptr)(&x + 1024);
-	g->stack.lo = (uintptr)(&x - 32*1024);
-	g->stackguard0 = g->stack.lo + StackGuard;
-
-	// Initialize this thread to use the m.
-	runtime·asminit();
-	runtime·minit();
-}
-
-// newextram allocates an m and puts it on the extra list.
-// It is called with a working local m, so that it can do things
-// like call schedlock and allocate.
-void
-runtime·newextram(void)
-{
-	M *mp, *mnext;
-	G *gp;
-
-	// Create extra goroutine locked to extra m.
-	// The goroutine is the context in which the cgo callback will run.
-	// The sched.pc will never be returned to, but setting it to
-	// runtime.goexit makes clear to the traceback routines where
-	// the goroutine stack ends.
-	mp = runtime·allocm(nil);
-	gp = runtime·malg(4096);
-	gp->sched.pc = (uintptr)runtime·goexit + PCQuantum;
-	gp->sched.sp = gp->stack.hi;
-	gp->sched.sp -= 4*sizeof(uintreg); // extra space in case of reads slightly beyond frame
-	gp->sched.lr = 0;
-	gp->sched.g = gp;
-	gp->syscallpc = gp->sched.pc;
-	gp->syscallsp = gp->sched.sp;
-	// malg returns status as Gidle, change to Gsyscall before adding to allg
-	// where GC will see it.
-	runtime·casgstatus(gp, Gidle, Gsyscall);
-	gp->m = mp;
-	mp->curg = gp;
-	mp->locked = LockInternal;
-	mp->lockedg = gp;
-	gp->lockedm = mp;
-	gp->goid = runtime·xadd64(&runtime·sched.goidgen, 1);
-	if(raceenabled)
-		gp->racectx = runtime·racegostart(runtime·newextram);
-	// put on allg for garbage collector
-	runtime·allgadd(gp);
-
-	// Add m to the extra list.
-	mnext = lockextra(true);
-	mp->schedlink = mnext;
-	unlockextra(mp);
-}
-
-// dropm is called when a cgo callback has called needm but is now
-// done with the callback and returning back into the non-Go thread.
-// It puts the current m back onto the extra list.
-//
-// The main expense here is the call to signalstack to release the
-// m's signal stack, and then the call to needm on the next callback
-// from this thread. It is tempting to try to save the m for next time,
-// which would eliminate both these costs, but there might not be
-// a next time: the current thread (which Go does not control) might exit.
-// If we saved the m for that thread, there would be an m leak each time
-// such a thread exited. Instead, we acquire and release an m on each
-// call. These should typically not be scheduling operations, just a few
-// atomics, so the cost should be small.
-//
-// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
-// variable using pthread_key_create. Unlike the pthread keys we already use
-// on OS X, this dummy key would never be read by Go code. It would exist
-// only so that we could register at thread-exit-time destructor.
-// That destructor would put the m back onto the extra list.
-// This is purely a performance optimization. The current version,
-// in which dropm happens on each cgo call, is still correct too.
-// We may have to keep the current version on systems with cgo
-// but without pthreads, like Windows.
-void
-runtime·dropm(void)
-{
-	M *mp, *mnext;
-
-	// Undo whatever initialization minit did during needm.
-	runtime·unminit();
-
-	// Clear m and g, and return m to the extra list.
-	// After the call to setmg we can only call nosplit functions.
-	mp = g->m;
-	runtime·setg(nil);
-
-	mnext = lockextra(true);
-	mp->schedlink = mnext;
-	unlockextra(mp);
-}
-
-#define MLOCKED 1
-
-// lockextra locks the extra list and returns the list head.
-// The caller must unlock the list by storing a new list head
-// to runtime.extram. If nilokay is true, then lockextra will
-// return a nil list head if that's what it finds. If nilokay is false,
-// lockextra will keep waiting until the list head is no longer nil.
-#pragma textflag NOSPLIT
-static M*
-lockextra(bool nilokay)
-{
-	uintptr mpx;
-	void (*yield)(void);
-
-	for(;;) {
-		mpx = runtime·atomicloaduintptr((uintptr*)&runtime·extram);
-		if(mpx == MLOCKED) {
-			yield = runtime·osyield;
-			yield();
-			continue;
-		}
-		if(mpx == 0 && !nilokay) {
-			runtime·usleep(1);
-			continue;
-		}
-		if(!runtime·casuintptr((uintptr*)&runtime·extram, mpx, MLOCKED)) {
-			yield = runtime·osyield;
-			yield();
-			continue;
-		}
-		break;
-	}
-	return (M*)mpx;
-}
-
-#pragma textflag NOSPLIT
-static void
-unlockextra(M *mp)
-{
-	runtime·atomicstorep(&runtime·extram, mp);
-}
-
-
-// Create a new m.  It will start off with a call to fn, or else the scheduler.
-static void
-newm(void(*fn)(void), P *p)
-{
-	M *mp;
-
-	mp = runtime·allocm(p);
-	mp->nextp = p;
-	mp->mstartfn = fn;
-
-	if(runtime·iscgo) {
-		CgoThreadStart ts;
-
-		if(_cgo_thread_start == nil)
-			runtime·throw("_cgo_thread_start missing");
-		ts.g = mp->g0;
-		ts.tls = mp->tls;
-		ts.fn = runtime·mstart;
-		runtime·asmcgocall(_cgo_thread_start, &ts);
-		return;
-	}
-	runtime·newosproc(mp, (byte*)mp->g0->stack.hi);
-}
-
-// Stops execution of the current m until new work is available.
-// Returns with acquired P.
-static void
-stopm(void)
-{
-	if(g->m->locks)
-		runtime·throw("stopm holding locks");
-	if(g->m->p)
-		runtime·throw("stopm holding p");
-	if(g->m->spinning) {
-		g->m->spinning = false;
-		runtime·xadd(&runtime·sched.nmspinning, -1);
-	}
-
-retry:
-	runtime·lock(&runtime·sched.lock);
-	mput(g->m);
-	runtime·unlock(&runtime·sched.lock);
-	runtime·notesleep(&g->m->park);
-	runtime·noteclear(&g->m->park);
-	if(g->m->helpgc) {
-		runtime·gchelper();
-		g->m->helpgc = 0;
-		g->m->mcache = nil;
-		goto retry;
-	}
-	acquirep(g->m->nextp);
-	g->m->nextp = nil;
-}
-
-static void
-mspinning(void)
-{
-	g->m->spinning = true;
-}
-
-// Schedules some M to run the p (creates an M if necessary).
-// If p==nil, tries to get an idle P, if no idle P's does nothing.
-static void
-startm(P *p, bool spinning)
-{
-	M *mp;
-	void (*fn)(void);
-
-	runtime·lock(&runtime·sched.lock);
-	if(p == nil) {
-		p = pidleget();
-		if(p == nil) {
-			runtime·unlock(&runtime·sched.lock);
-			if(spinning)
-				runtime·xadd(&runtime·sched.nmspinning, -1);
-			return;
-		}
-	}
-	mp = mget();
-	runtime·unlock(&runtime·sched.lock);
-	if(mp == nil) {
-		fn = nil;
-		if(spinning)
-			fn = mspinning;
-		newm(fn, p);
-		return;
-	}
-	if(mp->spinning)
-		runtime·throw("startm: m is spinning");
-	if(mp->nextp)
-		runtime·throw("startm: m has p");
-	mp->spinning = spinning;
-	mp->nextp = p;
-	runtime·notewakeup(&mp->park);
-}
-
-// Hands off P from syscall or locked M.
-static void
-handoffp(P *p)
-{
-	// if it has local work, start it straight away
-	if(p->runqhead != p->runqtail || runtime·sched.runqsize) {
-		startm(p, false);
-		return;
-	}
-	// no local work, check that there are no spinning/idle M's,
-	// otherwise our help is not required
-	if(runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) == 0 &&  // TODO: fast atomic
-		runtime·cas(&runtime·sched.nmspinning, 0, 1)){
-		startm(p, true);
-		return;
-	}
-	runtime·lock(&runtime·sched.lock);
-	if(runtime·sched.gcwaiting) {
-		p->status = Pgcstop;
-		if(--runtime·sched.stopwait == 0)
-			runtime·notewakeup(&runtime·sched.stopnote);
-		runtime·unlock(&runtime·sched.lock);
-		return;
-	}
-	if(runtime·sched.runqsize) {
-		runtime·unlock(&runtime·sched.lock);
-		startm(p, false);
-		return;
-	}
-	// If this is the last running P and nobody is polling network,
-	// need to wakeup another M to poll network.
-	if(runtime·sched.npidle == runtime·gomaxprocs-1 && runtime·atomicload64(&runtime·sched.lastpoll) != 0) {
-		runtime·unlock(&runtime·sched.lock);
-		startm(p, false);
-		return;
-	}
-	pidleput(p);
-	runtime·unlock(&runtime·sched.lock);
-}
-
-// Tries to add one more P to execute G's.
-// Called when a G is made runnable (newproc, ready).
-static void
-wakep(void)
-{
-	// be conservative about spinning threads
-	if(!runtime·cas(&runtime·sched.nmspinning, 0, 1))
-		return;
-	startm(nil, true);
-}
-
-// Stops execution of the current m that is locked to a g until the g is runnable again.
-// Returns with acquired P.
-static void
-stoplockedm(void)
-{
-	P *p;
-	uint32 status;
-
-	if(g->m->lockedg == nil || g->m->lockedg->lockedm != g->m)
-		runtime·throw("stoplockedm: inconsistent locking");
-	if(g->m->p) {
-		// Schedule another M to run this p.
-		p = releasep();
-		handoffp(p);
-	}
-	incidlelocked(1);
-	// Wait until another thread schedules lockedg again.
-	runtime·notesleep(&g->m->park);
-	runtime·noteclear(&g->m->park);
-	status = runtime·readgstatus(g->m->lockedg);
-	if((status&~Gscan) != Grunnable){
-		runtime·printf("runtime:stoplockedm: g is not Grunnable or Gscanrunnable");
-		dumpgstatus(g);
-		runtime·throw("stoplockedm: not runnable");
-	}
-	acquirep(g->m->nextp);
-	g->m->nextp = nil;
-}
-
-// Schedules the locked m to run the locked gp.
-static void
-startlockedm(G *gp)
-{
-	M *mp;
-	P *p;
-
-	mp = gp->lockedm;
-	if(mp == g->m)
-		runtime·throw("startlockedm: locked to me");
-	if(mp->nextp)
-		runtime·throw("startlockedm: m has p");
-	// directly handoff current P to the locked m
-	incidlelocked(-1);
-	p = releasep();
-	mp->nextp = p;
-	runtime·notewakeup(&mp->park);
-	stopm();
-}
-
-// Stops the current m for stoptheworld.
-// Returns when the world is restarted.
-static void
-gcstopm(void)
-{
-	P *p;
-
-	if(!runtime·sched.gcwaiting)
-		runtime·throw("gcstopm: not waiting for gc");
-	if(g->m->spinning) {
-		g->m->spinning = false;
-		runtime·xadd(&runtime·sched.nmspinning, -1);
-	}
-	p = releasep();
-	runtime·lock(&runtime·sched.lock);
-	p->status = Pgcstop;
-	if(--runtime·sched.stopwait == 0)
-		runtime·notewakeup(&runtime·sched.stopnote);
-	runtime·unlock(&runtime·sched.lock);
-	stopm();
-}
-
-// Schedules gp to run on the current M.
-// Never returns.
-static void
-execute(G *gp)
-{
-	int32 hz;
-	
-	runtime·casgstatus(gp, Grunnable, Grunning);
-	gp->waitsince = 0;
-	gp->preempt = false;
-	gp->stackguard0 = gp->stack.lo + StackGuard;
-	g->m->p->schedtick++;
-	g->m->curg = gp;
-	gp->m = g->m;
-
-	// Check whether the profiler needs to be turned on or off.
-	hz = runtime·sched.profilehz;
-	if(g->m->profilehz != hz)
-		runtime·resetcpuprofiler(hz);
-
-	runtime·gogo(&gp->sched);
-}
-
-// Finds a runnable goroutine to execute.
-// Tries to steal from other P's, get g from global queue, poll network.
-static G*
-findrunnable(void)
-{
-	G *gp;
-	P *p;
-	int32 i;
-
-top:
-	if(runtime·sched.gcwaiting) {
-		gcstopm();
-		goto top;
-	}
-	if(runtime·fingwait && runtime·fingwake && (gp = runtime·wakefing()) != nil)
-		runtime·ready(gp);
-	// local runq
-	gp = runqget(g->m->p);
-	if(gp)
-		return gp;
-	// global runq
-	if(runtime·sched.runqsize) {
-		runtime·lock(&runtime·sched.lock);
-		gp = globrunqget(g->m->p, 0);
-		runtime·unlock(&runtime·sched.lock);
-		if(gp)
-			return gp;
-	}
-	// poll network
-	gp = runtime·netpoll(false);  // non-blocking
-	if(gp) {
-		injectglist(gp->schedlink);
-		runtime·casgstatus(gp, Gwaiting, Grunnable);
-		return gp;
-	}
-	// If number of spinning M's >= number of busy P's, block.
-	// This is necessary to prevent excessive CPU consumption
-	// when GOMAXPROCS>>1 but the program parallelism is low.
-	if(!g->m->spinning && 2 * runtime·atomicload(&runtime·sched.nmspinning) >= runtime·gomaxprocs - runtime·atomicload(&runtime·sched.npidle))  // TODO: fast atomic
-		goto stop;
-	if(!g->m->spinning) {
-		g->m->spinning = true;
-		runtime·xadd(&runtime·sched.nmspinning, 1);
-	}
-	// random steal from other P's
-	for(i = 0; i < 2*runtime·gomaxprocs; i++) {
-		if(runtime·sched.gcwaiting)
-			goto top;
-		p = runtime·allp[runtime·fastrand1()%runtime·gomaxprocs];
-		if(p == g->m->p)
-			gp = runqget(p);
-		else
-			gp = runqsteal(g->m->p, p);
-		if(gp)
-			return gp;
-	}
-stop:
-	// return P and block
-	runtime·lock(&runtime·sched.lock);
-	if(runtime·sched.gcwaiting) {
-		runtime·unlock(&runtime·sched.lock);
-		goto top;
-	}
-	if(runtime·sched.runqsize) {
-		gp = globrunqget(g->m->p, 0);
-		runtime·unlock(&runtime·sched.lock);
-		return gp;
-	}
-	p = releasep();
-	pidleput(p);
-	runtime·unlock(&runtime·sched.lock);
-	if(g->m->spinning) {
-		g->m->spinning = false;
-		runtime·xadd(&runtime·sched.nmspinning, -1);
-	}
-	// check all runqueues once again
-	for(i = 0; i < runtime·gomaxprocs; i++) {
-		p = runtime·allp[i];
-		if(p && p->runqhead != p->runqtail) {
-			runtime·lock(&runtime·sched.lock);
-			p = pidleget();
-			runtime·unlock(&runtime·sched.lock);
-			if(p) {
-				acquirep(p);
-				goto top;
-			}
-			break;
-		}
-	}
-	// poll network
-	if(runtime·xchg64(&runtime·sched.lastpoll, 0) != 0) {
-		if(g->m->p)
-			runtime·throw("findrunnable: netpoll with p");
-		if(g->m->spinning)
-			runtime·throw("findrunnable: netpoll with spinning");
-		gp = runtime·netpoll(true);  // block until new work is available
-		runtime·atomicstore64(&runtime·sched.lastpoll, runtime·nanotime());
-		if(gp) {
-			runtime·lock(&runtime·sched.lock);
-			p = pidleget();
-			runtime·unlock(&runtime·sched.lock);
-			if(p) {
-				acquirep(p);
-				injectglist(gp->schedlink);
-				runtime·casgstatus(gp, Gwaiting, Grunnable);
-				return gp;
-			}
-			injectglist(gp);
-		}
-	}
-	stopm();
-	goto top;
-}
-
-static void
-resetspinning(void)
-{
-	int32 nmspinning;
-
-	if(g->m->spinning) {
-		g->m->spinning = false;
-		nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
-		if(nmspinning < 0)
-			runtime·throw("findrunnable: negative nmspinning");
-	} else
-		nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
-
-	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
-	// so see if we need to wakeup another P here.
-	if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
-		wakep();
-}
-
-// Injects the list of runnable G's into the scheduler.
-// Can run concurrently with GC.
-static void
-injectglist(G *glist)
-{
-	int32 n;
-	G *gp;
-
-	if(glist == nil)
-		return;
-	runtime·lock(&runtime·sched.lock);
-	for(n = 0; glist; n++) {
-		gp = glist;
-		glist = gp->schedlink;
-		runtime·casgstatus(gp, Gwaiting, Grunnable); 
-		globrunqput(gp);
-	}
-	runtime·unlock(&runtime·sched.lock);
-
-	for(; n && runtime·sched.npidle; n--)
-		startm(nil, false);
-}
-
-// One round of scheduler: find a runnable goroutine and execute it.
-// Never returns.
-static void
-schedule(void)
-{
-	G *gp;
-	uint32 tick;
-
-	if(g->m->locks)
-		runtime·throw("schedule: holding locks");
-
-	if(g->m->lockedg) {
-		stoplockedm();
-		execute(g->m->lockedg);  // Never returns.
-	}
-
-top:
-	if(runtime·sched.gcwaiting) {
-		gcstopm();
-		goto top;
-	}
-
-	gp = nil;
-	// Check the global runnable queue once in a while to ensure fairness.
-	// Otherwise two goroutines can completely occupy the local runqueue
-	// by constantly respawning each other.
-	tick = g->m->p->schedtick;
-	// This is a fancy way to say tick%61==0,
-	// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
-	if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime·sched.runqsize > 0) {
-		runtime·lock(&runtime·sched.lock);
-		gp = globrunqget(g->m->p, 1);
-		runtime·unlock(&runtime·sched.lock);
-		if(gp)
-			resetspinning();
-	}
-	if(gp == nil) {
-		gp = runqget(g->m->p);
-		if(gp && g->m->spinning)
-			runtime·throw("schedule: spinning with local work");
-	}
-	if(gp == nil) {
-		gp = findrunnable();  // blocks until work is available
-		resetspinning();
-	}
-
-	if(gp->lockedm) {
-		// Hands off own p to the locked m,
-		// then blocks waiting for a new p.
-		startlockedm(gp);
-		goto top;
-	}
-
-	execute(gp);
-}
-
-// dropg removes the association between m and the current goroutine m->curg (gp for short).
-// Typically a caller sets gp's status away from Grunning and then
-// immediately calls dropg to finish the job. The caller is also responsible
-// for arranging that gp will be restarted using runtime·ready at an
-// appropriate time. After calling dropg and arranging for gp to be
-// readied later, the caller can do other work but eventually should
-// call schedule to restart the scheduling of goroutines on this m.
-static void
-dropg(void)
-{
-	if(g->m->lockedg == nil) {
-		g->m->curg->m = nil;
-		g->m->curg = nil;
-	}
-}
-
-// Puts the current goroutine into a waiting state and calls unlockf.
-// If unlockf returns false, the goroutine is resumed.
-void
-runtime·park(bool(*unlockf)(G*, void*), void *lock, String reason)
-{
-	void (*fn)(G*);
-
-	g->m->waitlock = lock;
-	g->m->waitunlockf = unlockf;
-	g->waitreason = reason;
-	fn = runtime·park_m;
-	runtime·mcall(&fn);
-}
-
-bool
-runtime·parkunlock_c(G *gp, void *lock)
-{
-	USED(gp);
-	runtime·unlock(lock);
-	return true;
-}
-
-// Puts the current goroutine into a waiting state and unlocks the lock.
-// The goroutine can be made runnable again by calling runtime·ready(gp).
-void
-runtime·parkunlock(Mutex *lock, String reason)
-{
-	runtime·park(runtime·parkunlock_c, lock, reason);
-}
-
-// runtime·park continuation on g0.
-void
-runtime·park_m(G *gp)
-{
-	bool ok;
-
-	runtime·casgstatus(gp, Grunning, Gwaiting);
-	dropg();
-
-	if(g->m->waitunlockf) {
-		ok = g->m->waitunlockf(gp, g->m->waitlock);
-		g->m->waitunlockf = nil;
-		g->m->waitlock = nil;
-		if(!ok) {
-			runtime·casgstatus(gp, Gwaiting, Grunnable); 
-			execute(gp);  // Schedule it back, never returns.
-		}
-	}
-
-	schedule();
-}
-
-// Gosched continuation on g0.
-void
-runtime·gosched_m(G *gp)
-{
-	uint32 status;
-
-	status = runtime·readgstatus(gp);
-	if((status&~Gscan) != Grunning){
-		dumpgstatus(gp);
-		runtime·throw("bad g status");
-	}
-	runtime·casgstatus(gp, Grunning, Grunnable);
-	dropg();
-	runtime·lock(&runtime·sched.lock);
-	globrunqput(gp);
-	runtime·unlock(&runtime·sched.lock);
-
-	schedule();
-}
-
-// Finishes execution of the current goroutine.
-// Must be NOSPLIT because it is called from Go.
-#pragma textflag NOSPLIT
-void
-runtime·goexit1(void)
-{
-	void (*fn)(G*);
-
-	if(raceenabled)
-		runtime·racegoend();
-	fn = goexit0;
-	runtime·mcall(&fn);
-}
-
-// runtime·goexit continuation on g0.
-static void
-goexit0(G *gp)
-{
-	runtime·casgstatus(gp, Grunning, Gdead);
-	gp->m = nil;
-	gp->lockedm = nil;
-	g->m->lockedg = nil;
-	gp->paniconfault = 0;
-	gp->defer = nil; // should be true already but just in case.
-	gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
-	gp->writebuf.array = nil;
-	gp->writebuf.len = 0;
-	gp->writebuf.cap = 0;
-	gp->waitreason.str = nil;
-	gp->waitreason.len = 0;
-	gp->param = nil;
-
-	dropg();
-
-	if(g->m->locked & ~LockExternal) {
-		runtime·printf("invalid m->locked = %d\n", g->m->locked);
-		runtime·throw("internal lockOSThread error");
-	}	
-	g->m->locked = 0;
-	gfput(g->m->p, gp);
-	schedule();
-}
-
-#pragma textflag NOSPLIT
-static void
-save(uintptr pc, uintptr sp)
-{
-	g->sched.pc = pc;
-	g->sched.sp = sp;
-	g->sched.lr = 0;
-	g->sched.ret = 0;
-	g->sched.ctxt = 0;
-	g->sched.g = g;
-}
-
-static void entersyscall_bad(void);
-static void entersyscall_sysmon(void);
-static void entersyscall_gcwait(void);
-
-// The goroutine g is about to enter a system call.
-// Record that it's not using the cpu anymore.
-// This is called only from the go syscall library and cgocall,
-// not from the low-level system calls used by the runtime.
-//
-// Entersyscall cannot split the stack: the runtime·gosave must
-// make g->sched refer to the caller's stack segment, because
-// entersyscall is going to return immediately after.
-//
-// Nothing entersyscall calls can split the stack either.
-// We cannot safely move the stack during an active call to syscall,
-// because we do not know which of the uintptr arguments are
-// really pointers (back into the stack).
-// In practice, this means that we make the fast path run through
-// entersyscall doing no-split things, and the slow path has to use onM
-// to run bigger things on the m stack.
-//
-// reentersyscall is the entry point used by cgo callbacks, where explicitly
-// saved SP and PC are restored. This is needed when exitsyscall will be called
-// from a function further up in the call stack than the parent, as g->syscallsp
-// must always point to a valid stack frame. entersyscall below is the normal
-// entry point for syscalls, which obtains the SP and PC from the caller.
-#pragma textflag NOSPLIT
-void
-runtime·reentersyscall(uintptr pc, uintptr sp)
-{
-	void (*fn)(void);
-
-	// Disable preemption because during this function g is in Gsyscall status,
-	// but can have inconsistent g->sched, do not let GC observe it.
-	g->m->locks++;
-	
-	// Entersyscall must not call any function that might split/grow the stack.
-	// (See details in comment above.)
-	// Catch calls that might, by replacing the stack guard with something that
-	// will trip any stack check and leaving a flag to tell newstack to die.
-	g->stackguard0 = StackPreempt;
-	g->throwsplit = 1;
-
-	// Leave SP around for GC and traceback.
-	save(pc, sp);
-	g->syscallsp = sp;
-	g->syscallpc = pc;
-	runtime·casgstatus(g, Grunning, Gsyscall);
-	if(g->syscallsp < g->stack.lo || g->stack.hi < g->syscallsp) {
-		fn = entersyscall_bad;
-		runtime·onM(&fn);
-	}
-
-	if(runtime·atomicload(&runtime·sched.sysmonwait)) {  // TODO: fast atomic
-		fn = entersyscall_sysmon;
-		runtime·onM(&fn);
-		save(pc, sp);
-	}
-
-	g->m->mcache = nil;
-	g->m->p->m = nil;
-	runtime·atomicstore(&g->m->p->status, Psyscall);
-	if(runtime·sched.gcwaiting) {
-		fn = entersyscall_gcwait;
-		runtime·onM(&fn);
-		save(pc, sp);
-	}
-
-	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
-	// We set stackguard to StackPreempt so that first split stack check calls morestack.
-	// Morestack detects this case and throws.
-	g->stackguard0 = StackPreempt;
-	g->m->locks--;
-}
-
-// Standard syscall entry used by the go syscall library and normal cgo calls.
-#pragma textflag NOSPLIT
-void
-·entersyscall(int32 dummy)
-{
-	runtime·reentersyscall((uintptr)runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-}
-
-static void
-entersyscall_bad(void)
-{
-	G *gp;
-	
-	gp = g->m->curg;
-	runtime·printf("entersyscall inconsistent %p [%p,%p]\n",
-		gp->syscallsp, gp->stack.lo, gp->stack.hi);
-	runtime·throw("entersyscall");
-}
-
-static void
-entersyscall_sysmon(void)
-{
-	runtime·lock(&runtime·sched.lock);
-	if(runtime·atomicload(&runtime·sched.sysmonwait)) {
-		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
-		runtime·notewakeup(&runtime·sched.sysmonnote);
-	}
-	runtime·unlock(&runtime·sched.lock);
-}
-
-static void
-entersyscall_gcwait(void)
-{
-	runtime·lock(&runtime·sched.lock);
-	if (runtime·sched.stopwait > 0 && runtime·cas(&g->m->p->status, Psyscall, Pgcstop)) {
-		if(--runtime·sched.stopwait == 0)
-			runtime·notewakeup(&runtime·sched.stopnote);
-	}
-	runtime·unlock(&runtime·sched.lock);
-}
-
-static void entersyscallblock_handoff(void);
-
-// The same as runtime·entersyscall(), but with a hint that the syscall is blocking.
-#pragma textflag NOSPLIT
-void
-·entersyscallblock(int32 dummy)
-{
-	void (*fn)(void);
-
-	g->m->locks++;  // see comment in entersyscall
-	g->throwsplit = 1;
-	g->stackguard0 = StackPreempt;  // see comment in entersyscall
-
-	// Leave SP around for GC and traceback.
-	save((uintptr)runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-	g->syscallsp = g->sched.sp;
-	g->syscallpc = g->sched.pc;
-	runtime·casgstatus(g, Grunning, Gsyscall);
-	if(g->syscallsp < g->stack.lo || g->stack.hi < g->syscallsp) {
-		fn = entersyscall_bad;
-		runtime·onM(&fn);
-	}
-	
-	fn = entersyscallblock_handoff;
-	runtime·onM(&fn);
-
-	// Resave for traceback during blocked call.
-	save((uintptr)runtime·getcallerpc(&dummy), runtime·getcallersp(&dummy));
-
-	g->m->locks--;
-}
-
-static void
-entersyscallblock_handoff(void)
-{
-	handoffp(releasep());
-}
-
-// The goroutine g exited its system call.
-// Arrange for it to run on a cpu again.
-// This is called only from the go syscall library, not
-// from the low-level system calls used by the runtime.
-#pragma textflag NOSPLIT
-void
-·exitsyscall(int32 dummy)
-{
-	void (*fn)(G*);
-
-	g->m->locks++;  // see comment in entersyscall
-
-	if(runtime·getcallersp(&dummy) > g->syscallsp)
-		runtime·throw("exitsyscall: syscall frame is no longer valid");
-
-	g->waitsince = 0;
-	if(exitsyscallfast()) {
-		// There's a cpu for us, so we can run.
-		g->m->p->syscalltick++;
-		// We need to cas the status and scan before resuming...
-		runtime·casgstatus(g, Gsyscall, Grunning);
-
-		// Garbage collector isn't running (since we are),
-		// so okay to clear syscallsp.
-		g->syscallsp = (uintptr)nil;
-		g->m->locks--;
-		if(g->preempt) {
-			// restore the preemption request in case we've cleared it in newstack
-			g->stackguard0 = StackPreempt;
-		} else {
-			// otherwise restore the real stackguard, we've spoiled it in entersyscall/entersyscallblock
-			g->stackguard0 = g->stack.lo + StackGuard;
-		}
-		g->throwsplit = 0;
-		return;
-	}
-
-	g->m->locks--;
-
-	// Call the scheduler.
-	fn = exitsyscall0;
-	runtime·mcall(&fn);
-
-	// Scheduler returned, so we're allowed to run now.
-	// Delete the syscallsp information that we left for
-	// the garbage collector during the system call.
-	// Must wait until now because until gosched returns
-	// we don't know for sure that the garbage collector
-	// is not running.
-	g->syscallsp = (uintptr)nil;
-	g->m->p->syscalltick++;
-	g->throwsplit = 0;
-}
-
-static void exitsyscallfast_pidle(void);
-
-#pragma textflag NOSPLIT
-static bool
-exitsyscallfast(void)
-{
-	void (*fn)(void);
-
-	// Freezetheworld sets stopwait but does not retake P's.
-	if(runtime·sched.stopwait) {
-		g->m->mcache = nil; 
-		g->m->p = nil;
-		return false;
-	}
-
-	// Try to re-acquire the last P.
-	if(g->m->p && g->m->p->status == Psyscall && runtime·cas(&g->m->p->status, Psyscall, Prunning)) {
-		// There's a cpu for us, so we can run.
-		g->m->mcache = g->m->p->mcache;
-		g->m->p->m = g->m;
-		return true;
-	}
-	// Try to get any other idle P.
-	g->m->mcache = nil;
-	g->m->p = nil;
-	if(runtime·sched.pidle) {
-		fn = exitsyscallfast_pidle;
-		runtime·onM(&fn);
-		if(g->m->scalararg[0]) {
-			g->m->scalararg[0] = 0;
-			return true;
-		}
-	}
-	return false;
-}
-
-static void
-exitsyscallfast_pidle(void)
-{
-	P *p;
-
-	runtime·lock(&runtime·sched.lock);
-	p = pidleget();
-	if(p && runtime·atomicload(&runtime·sched.sysmonwait)) {
-		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
-		runtime·notewakeup(&runtime·sched.sysmonnote);
-	}
-	runtime·unlock(&runtime·sched.lock);
-	if(p) {
-		acquirep(p);
-		g->m->scalararg[0] = 1;
-	} else
-		g->m->scalararg[0] = 0;
-}
-
-// runtime·exitsyscall slow path on g0.
-// Failed to acquire P, enqueue gp as runnable.
-static void
-exitsyscall0(G *gp)
-{
-	P *p;
-
-	runtime·casgstatus(gp, Gsyscall, Grunnable);
-	dropg();
-	runtime·lock(&runtime·sched.lock);
-	p = pidleget();
-	if(p == nil)
-		globrunqput(gp);
-	else if(runtime·atomicload(&runtime·sched.sysmonwait)) {
-		runtime·atomicstore(&runtime·sched.sysmonwait, 0);
-		runtime·notewakeup(&runtime·sched.sysmonnote);
-	}
-	runtime·unlock(&runtime·sched.lock);
-	if(p) {
-		acquirep(p);
-		execute(gp);  // Never returns.
-	}
-	if(g->m->lockedg) {
-		// Wait until another thread schedules gp and so m again.
-		stoplockedm();
-		execute(gp);  // Never returns.
-	}
-	stopm();
-	schedule();  // Never returns.
-}
-
-static void
-beforefork(void)
-{
-	G *gp;
-	
-	gp = g->m->curg;
-	// Fork can hang if preempted with signals frequently enough (see issue 5517).
-	// Ensure that we stay on the same M where we disable profiling.
-	gp->m->locks++;
-	if(gp->m->profilehz != 0)
-		runtime·resetcpuprofiler(0);
-
-	// This function is called before fork in syscall package.
-	// Code between fork and exec must not allocate memory nor even try to grow stack.
-	// Here we spoil g->stackguard to reliably detect any attempts to grow stack.
-	// runtime_AfterFork will undo this in parent process, but not in child.
-	gp->stackguard0 = StackFork;
-}
-
-// Called from syscall package before fork.
-#pragma textflag NOSPLIT
-void
-syscall·runtime_BeforeFork(void)
-{
-	void (*fn)(void);
-	
-	fn = beforefork;
-	runtime·onM(&fn);
-}
-
-static void
-afterfork(void)
-{
-	int32 hz;
-	G *gp;
-	
-	gp = g->m->curg;
-	// See the comment in runtime_BeforeFork.
-	gp->stackguard0 = gp->stack.lo + StackGuard;
-
-	hz = runtime·sched.profilehz;
-	if(hz != 0)
-		runtime·resetcpuprofiler(hz);
-	gp->m->locks--;
-}
-
-// Called from syscall package after fork in parent.
-#pragma textflag NOSPLIT
-void
-syscall·runtime_AfterFork(void)
-{
-	void (*fn)(void);
-	
-	fn = afterfork;
-	runtime·onM(&fn);
-}
-
-// Hook used by runtime·malg to call runtime·stackalloc on the
-// scheduler stack.  This exists because runtime·stackalloc insists
-// on being called on the scheduler stack, to avoid trying to grow
-// the stack while allocating a new stack segment.
-static void
-mstackalloc(G *gp)
-{
-	G *newg;
-	uintptr size;
-
-	newg = g->m->ptrarg[0];
-	size = g->m->scalararg[0];
-
-	newg->stack = runtime·stackalloc(size);
-
-	runtime·gogo(&gp->sched);
-}
-
-// Allocate a new g, with a stack big enough for stacksize bytes.
-G*
-runtime·malg(int32 stacksize)
-{
-	G *newg;
-	void (*fn)(G*);
-
-	newg = allocg();
-	if(stacksize >= 0) {
-		stacksize = runtime·round2(StackSystem + stacksize);
-		if(g == g->m->g0) {
-			// running on scheduler stack already.
-			newg->stack = runtime·stackalloc(stacksize);
-		} else {
-			// have to call stackalloc on scheduler stack.
-			g->m->scalararg[0] = stacksize;
-			g->m->ptrarg[0] = newg;
-			fn = mstackalloc;
-			runtime·mcall(&fn);
-			g->m->ptrarg[0] = nil;
-		}
-		newg->stackguard0 = newg->stack.lo + StackGuard;
-		newg->stackguard1 = ~(uintptr)0;
-	}
-	return newg;
-}
-
-static void
-newproc_m(void)
-{
-	byte *argp;
-	void *callerpc;
-	FuncVal *fn;
-	int32 siz;
-
-	siz = g->m->scalararg[0];
-	callerpc = (void*)g->m->scalararg[1];	
-	argp = g->m->ptrarg[0];
-	fn = (FuncVal*)g->m->ptrarg[1];
-
-	runtime·newproc1(fn, argp, siz, 0, callerpc);
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-}
-
-// Create a new g running fn with siz bytes of arguments.
-// Put it on the queue of g's waiting to run.
-// The compiler turns a go statement into a call to this.
-// Cannot split the stack because it assumes that the arguments
-// are available sequentially after &fn; they would not be
-// copied if a stack split occurred.
-#pragma textflag NOSPLIT
-void
-runtime·newproc(int32 siz, FuncVal* fn, ...)
-{
-	byte *argp;
-	void (*mfn)(void);
-
-	if(thechar == '5' || thechar == '9')
-		argp = (byte*)(&fn+2);  // skip caller's saved LR
-	else
-		argp = (byte*)(&fn+1);
-
-	g->m->locks++;
-	g->m->scalararg[0] = siz;
-	g->m->scalararg[1] = (uintptr)runtime·getcallerpc(&siz);
-	g->m->ptrarg[0] = argp;
-	g->m->ptrarg[1] = fn;
-	mfn = newproc_m;
-	runtime·onM(&mfn);
-	g->m->locks--;
-}
-
-void runtime·main(void);
-
-// Create a new g running fn with narg bytes of arguments starting
-// at argp and returning nret bytes of results.  callerpc is the
-// address of the go statement that created this.  The new g is put
-// on the queue of g's waiting to run.
-G*
-runtime·newproc1(FuncVal *fn, byte *argp, int32 narg, int32 nret, void *callerpc)
-{
-	byte *sp;
-	G *newg;
-	P *p;
-	int32 siz;
-
-	if(fn == nil) {
-		g->m->throwing = -1;  // do not dump full stacks
-		runtime·throw("go of nil func value");
-	}
-	g->m->locks++;  // disable preemption because it can be holding p in a local var
-	siz = narg + nret;
-	siz = (siz+7) & ~7;
-
-	// We could allocate a larger initial stack if necessary.
-	// Not worth it: this is almost always an error.
-	// 4*sizeof(uintreg): extra space added below
-	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
-	if(siz >= StackMin - 4*sizeof(uintreg) - sizeof(uintreg))
-		runtime·throw("runtime.newproc: function arguments too large for new goroutine");
-
-	p = g->m->p;
-	if((newg = gfget(p)) == nil) {
-		newg = runtime·malg(StackMin);
-		runtime·casgstatus(newg, Gidle, Gdead);
-		runtime·allgadd(newg); // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
-	}
-	if(newg->stack.hi == 0)
-		runtime·throw("newproc1: newg missing stack");
-
-	if(runtime·readgstatus(newg) != Gdead) 
-		runtime·throw("newproc1: new g is not Gdead");
-
-	sp = (byte*)newg->stack.hi;
-	sp -= 4*sizeof(uintreg); // extra space in case of reads slightly beyond frame
-	sp -= siz;
-	runtime·memmove(sp, argp, narg);
-	if(thechar == '5' || thechar == '9') {
-		// caller's LR
-		sp -= sizeof(void*);
-		*(void**)sp = nil;
-	}
-
-	runtime·memclr((byte*)&newg->sched, sizeof newg->sched);
-	newg->sched.sp = (uintptr)sp;
-	newg->sched.pc = (uintptr)runtime·goexit + PCQuantum; // +PCQuantum so that previous instruction is in same function
-	newg->sched.g = newg;
-	runtime·gostartcallfn(&newg->sched, fn);
-	newg->gopc = (uintptr)callerpc;
-	runtime·casgstatus(newg, Gdead, Grunnable);
-
-	if(p->goidcache == p->goidcacheend) {
-		// Sched.goidgen is the last allocated id,
-		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
-		// At startup sched.goidgen=0, so main goroutine receives goid=1.
-		p->goidcache = runtime·xadd64(&runtime·sched.goidgen, GoidCacheBatch);
-		p->goidcache -= GoidCacheBatch - 1;
-		p->goidcacheend = p->goidcache + GoidCacheBatch;
-	}
-	newg->goid = p->goidcache++;
-	if(raceenabled)
-		newg->racectx = runtime·racegostart((void*)callerpc);
-	runqput(p, newg);
-
-	if(runtime·atomicload(&runtime·sched.npidle) != 0 && runtime·atomicload(&runtime·sched.nmspinning) == 0 && fn->fn != runtime·main)  // TODO: fast atomic
-		wakep();
-	g->m->locks--;
-	if(g->m->locks == 0 && g->preempt)  // restore the preemption request in case we've cleared it in newstack
-		g->stackguard0 = StackPreempt;
-	return newg;
-}
-
-// Put on gfree list.
-// If local list is too long, transfer a batch to the global list.
-static void
-gfput(P *p, G *gp)
-{
-	uintptr stksize;
-
-	if(runtime·readgstatus(gp) != Gdead) 
-		runtime·throw("gfput: bad status (not Gdead)");
-
-	stksize = gp->stack.hi - gp->stack.lo;
-	
-	if(stksize != FixedStack) {
-		// non-standard stack size - free it.
-		runtime·stackfree(gp->stack);
-		gp->stack.lo = 0;
-		gp->stack.hi = 0;
-		gp->stackguard0 = 0;
-	}
-	gp->schedlink = p->gfree;
-	p->gfree = gp;
-	p->gfreecnt++;
-	if(p->gfreecnt >= 64) {
-		runtime·lock(&runtime·sched.gflock);
-		while(p->gfreecnt >= 32) {
-			p->gfreecnt--;
-			gp = p->gfree;
-			p->gfree = gp->schedlink;
-			gp->schedlink = runtime·sched.gfree;
-			runtime·sched.gfree = gp;
-			runtime·sched.ngfree++;
-		}
-		runtime·unlock(&runtime·sched.gflock);
-	}
-}
-
-// Get from gfree list.
-// If local list is empty, grab a batch from global list.
-static G*
-gfget(P *p)
-{
-	G *gp;
-	void (*fn)(G*);
-
-retry:
-	gp = p->gfree;
-	if(gp == nil && runtime·sched.gfree) {
-		runtime·lock(&runtime·sched.gflock);
-		while(p->gfreecnt < 32 && runtime·sched.gfree != nil) {
-			p->gfreecnt++;
-			gp = runtime·sched.gfree;
-			runtime·sched.gfree = gp->schedlink;
-			runtime·sched.ngfree--;
-			gp->schedlink = p->gfree;
-			p->gfree = gp;
-		}
-		runtime·unlock(&runtime·sched.gflock);
-		goto retry;
-	}
-	if(gp) {
-		p->gfree = gp->schedlink;
-		p->gfreecnt--;
-
-		if(gp->stack.lo == 0) {
-			// Stack was deallocated in gfput.  Allocate a new one.
-			if(g == g->m->g0) {
-				gp->stack = runtime·stackalloc(FixedStack);
-			} else {
-				g->m->scalararg[0] = FixedStack;
-				g->m->ptrarg[0] = gp;
-				fn = mstackalloc;
-				runtime·mcall(&fn);
-				g->m->ptrarg[0] = nil;
-			}
-			gp->stackguard0 = gp->stack.lo + StackGuard;
-		} else {
-			if(raceenabled)
-				runtime·racemalloc((void*)gp->stack.lo, gp->stack.hi - gp->stack.lo);
-		}
-	}
-	return gp;
-}
-
-// Purge all cached G's from gfree list to the global list.
-static void
-gfpurge(P *p)
-{
-	G *gp;
-
-	runtime·lock(&runtime·sched.gflock);
-	while(p->gfreecnt != 0) {
-		p->gfreecnt--;
-		gp = p->gfree;
-		p->gfree = gp->schedlink;
-		gp->schedlink = runtime·sched.gfree;
-		runtime·sched.gfree = gp;
-		runtime·sched.ngfree++;
-	}
-	runtime·unlock(&runtime·sched.gflock);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·Breakpoint(void)
-{
-	runtime·breakpoint();
-}
-
-// lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
-// after they modify m->locked. Do not allow preemption during this call,
-// or else the m might be different in this function than in the caller.
-#pragma textflag NOSPLIT
-static void
-lockOSThread(void)
-{
-	g->m->lockedg = g;
-	g->lockedm = g->m;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·LockOSThread(void)
-{
-	g->m->locked |= LockExternal;
-	lockOSThread();
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·lockOSThread(void)
-{
-	g->m->locked += LockInternal;
-	lockOSThread();
-}
-
-
-// unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
-// after they update m->locked. Do not allow preemption during this call,
-// or else the m might be in different in this function than in the caller.
-#pragma textflag NOSPLIT
-static void
-unlockOSThread(void)
-{
-	if(g->m->locked != 0)
-		return;
-	g->m->lockedg = nil;
-	g->lockedm = nil;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·UnlockOSThread(void)
-{
-	g->m->locked &= ~LockExternal;
-	unlockOSThread();
-}
-
-static void badunlockOSThread(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·unlockOSThread(void)
-{
-	void (*fn)(void);
-
-	if(g->m->locked < LockInternal) {
-		fn = badunlockOSThread;
-		runtime·onM(&fn);
-	}
-	g->m->locked -= LockInternal;
-	unlockOSThread();
-}
-
-static void
-badunlockOSThread(void)
-{
-	runtime·throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
-}
-
-#pragma textflag NOSPLIT
-int32
-runtime·gcount(void)
-{
-	P *p, **pp;
-	int32 n;
-
-	n = runtime·allglen - runtime·sched.ngfree;
-	for(pp=runtime·allp; p=*pp; pp++)
-		n -= p->gfreecnt;
-	// All these variables can be changed concurrently, so the result can be inconsistent.
-	// But at least the current goroutine is running.
-	if(n < 1)
-		n = 1;
-	return n;
-}
-
-int32
-runtime·mcount(void)
-{
-	return runtime·sched.mcount;
-}
-
-static struct ProfState {
-	uint32 lock;
-	int32 hz;
-} prof;
-
-static void System(void) { System(); }
-static void ExternalCode(void) { ExternalCode(); }
-static void GC(void) { GC(); }
-
-extern void runtime·cpuproftick(uintptr*, int32);
-extern byte runtime·etext[];
-
-// Called if we receive a SIGPROF signal.
-void
-runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp)
-{
-	int32 n;
-	bool traceback;
-	// Do not use global m in this function, use mp instead.
-	// On windows one m is sending reports about all the g's, so m means a wrong thing.
-	byte m;
-	uintptr stk[100];
-
-	m = 0;
-	USED(m);
-
-	if(prof.hz == 0)
-		return;
-
-	// Profiling runs concurrently with GC, so it must not allocate.
-	mp->mallocing++;
-
-	// Define that a "user g" is a user-created goroutine, and a "system g"
-	// is one that is m->g0 or m->gsignal. We've only made sure that we
-	// can unwind user g's, so exclude the system g's.
-	//
-	// It is not quite as easy as testing gp == m->curg (the current user g)
-	// because we might be interrupted for profiling halfway through a
-	// goroutine switch. The switch involves updating three (or four) values:
-	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
-	// because once it gets updated the new g is running.
-	//
-	// When switching from a user g to a system g, LR is not considered live,
-	// so the update only affects g, SP, and PC. Since PC must be last, there
-	// the possible partial transitions in ordinary execution are (1) g alone is updated,
-	// (2) both g and SP are updated, and (3) SP alone is updated.
-	// If g is updated, we'll see a system g and not look closer.
-	// If SP alone is updated, we can detect the partial transition by checking
-	// whether the SP is within g's stack bounds. (We could also require that SP
-	// be changed only after g, but the stack bounds check is needed by other
-	// cases, so there is no need to impose an additional requirement.)
-	//
-	// There is one exceptional transition to a system g, not in ordinary execution.
-	// When a signal arrives, the operating system starts the signal handler running
-	// with an updated PC and SP. The g is updated last, at the beginning of the
-	// handler. There are two reasons this is okay. First, until g is updated the
-	// g and SP do not match, so the stack bounds check detects the partial transition.
-	// Second, signal handlers currently run with signals disabled, so a profiling
-	// signal cannot arrive during the handler.
-	//
-	// When switching from a system g to a user g, there are three possibilities.
-	//
-	// First, it may be that the g switch has no PC update, because the SP
-	// either corresponds to a user g throughout (as in runtime.asmcgocall)
-	// or because it has been arranged to look like a user g frame
-	// (as in runtime.cgocallback_gofunc). In this case, since the entire
-	// transition is a g+SP update, a partial transition updating just one of 
-	// those will be detected by the stack bounds check.
-	//
-	// Second, when returning from a signal handler, the PC and SP updates
-	// are performed by the operating system in an atomic update, so the g
-	// update must be done before them. The stack bounds check detects
-	// the partial transition here, and (again) signal handlers run with signals
-	// disabled, so a profiling signal cannot arrive then anyway.
-	//
-	// Third, the common case: it may be that the switch updates g, SP, and PC
-	// separately, as in runtime.gogo.
-	//
-	// Because runtime.gogo is the only instance, we check whether the PC lies
-	// within that function, and if so, not ask for a traceback. This approach
-	// requires knowing the size of the runtime.gogo function, which we
-	// record in arch_*.h and check in runtime_test.go.
-	//
-	// There is another apparently viable approach, recorded here in case
-	// the "PC within runtime.gogo" check turns out not to be usable.
-	// It would be possible to delay the update of either g or SP until immediately
-	// before the PC update instruction. Then, because of the stack bounds check,
-	// the only problematic interrupt point is just before that PC update instruction,
-	// and the sigprof handler can detect that instruction and simulate stepping past
-	// it in order to reach a consistent state. On ARM, the update of g must be made
-	// in two places (in R10 and also in a TLS slot), so the delayed update would
-	// need to be the SP update. The sigprof handler must read the instruction at
-	// the current PC and if it was the known instruction (for example, JMP BX or 
-	// MOV R2, PC), use that other register in place of the PC value.
-	// The biggest drawback to this solution is that it requires that we can tell
-	// whether it's safe to read from the memory pointed at by PC.
-	// In a correct program, we can test PC == nil and otherwise read,
-	// but if a profiling signal happens at the instant that a program executes
-	// a bad jump (before the program manages to handle the resulting fault)
-	// the profiling handler could fault trying to read nonexistent memory.
-	//
-	// To recap, there are no constraints on the assembly being used for the
-	// transition. We simply require that g and SP match and that the PC is not
-	// in runtime.gogo.
-	traceback = true;
-	if(gp == nil || gp != mp->curg ||
-	   (uintptr)sp < gp->stack.lo || gp->stack.hi < (uintptr)sp ||
-	   ((uint8*)runtime·gogo <= pc && pc < (uint8*)runtime·gogo + RuntimeGogoBytes))
-		traceback = false;
-
-	n = 0;
-	if(traceback)
-		n = runtime·gentraceback((uintptr)pc, (uintptr)sp, (uintptr)lr, gp, 0, stk, nelem(stk), nil, nil, TraceTrap);
-	if(!traceback || n <= 0) {
-		// Normal traceback is impossible or has failed.
-		// See if it falls into several common cases.
-		n = 0;
-		if(mp->ncgo > 0 && mp->curg != nil &&
-			mp->curg->syscallpc != 0 && mp->curg->syscallsp != 0) {
-			// Cgo, we can't unwind and symbolize arbitrary C code,
-			// so instead collect Go stack that leads to the cgo call.
-			// This is especially important on windows, since all syscalls are cgo calls.
-			n = runtime·gentraceback(mp->curg->syscallpc, mp->curg->syscallsp, 0, mp->curg, 0, stk, nelem(stk), nil, nil, 0);
-		}
-#ifdef GOOS_windows
-		if(n == 0 && mp->libcallg != nil && mp->libcallpc != 0 && mp->libcallsp != 0) {
-			// Libcall, i.e. runtime syscall on windows.
-			// Collect Go stack that leads to the call.
-			n = runtime·gentraceback(mp->libcallpc, mp->libcallsp, 0, mp->libcallg, 0, stk, nelem(stk), nil, nil, 0);
-		}
-#endif
-		if(n == 0) {
-			// If all of the above has failed, account it against abstract "System" or "GC".
-			n = 2;
-			// "ExternalCode" is better than "etext".
-			if((uintptr)pc > (uintptr)runtime·etext)
-				pc = (byte*)ExternalCode + PCQuantum;
-			stk[0] = (uintptr)pc;
-			if(mp->gcing || mp->helpgc)
-				stk[1] = (uintptr)GC + PCQuantum;
-			else
-				stk[1] = (uintptr)System + PCQuantum;
-		}
-	}
-
-	if(prof.hz != 0) {
-		// Simple cas-lock to coordinate with setcpuprofilerate.
-		while(!runtime·cas(&prof.lock, 0, 1))
-			runtime·osyield();
-		if(prof.hz != 0)
-			runtime·cpuproftick(stk, n);
-		runtime·atomicstore(&prof.lock, 0);
-	}
-	mp->mallocing--;
-}
-
-// Arrange to call fn with a traceback hz times a second.
-void
-runtime·setcpuprofilerate_m(void)
-{
-	int32 hz;
-	
-	hz = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-
-	// Force sane arguments.
-	if(hz < 0)
-		hz = 0;
-
-	// Disable preemption, otherwise we can be rescheduled to another thread
-	// that has profiling enabled.
-	g->m->locks++;
-
-	// Stop profiler on this thread so that it is safe to lock prof.
-	// if a profiling signal came in while we had prof locked,
-	// it would deadlock.
-	runtime·resetcpuprofiler(0);
-
-	while(!runtime·cas(&prof.lock, 0, 1))
-		runtime·osyield();
-	prof.hz = hz;
-	runtime·atomicstore(&prof.lock, 0);
-
-	runtime·lock(&runtime·sched.lock);
-	runtime·sched.profilehz = hz;
-	runtime·unlock(&runtime·sched.lock);
-
-	if(hz != 0)
-		runtime·resetcpuprofiler(hz);
-
-	g->m->locks--;
-}
-
-P *runtime·newP(void);
-
-// Change number of processors.  The world is stopped, sched is locked.
-// gcworkbufs are not being modified by either the GC or 
-// the write barrier code.
-static void
-procresize(int32 new)
-{
-	int32 i, old;
-	bool empty;
-	G *gp;
-	P *p;
-
-	old = runtime·gomaxprocs;
-	if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
-		runtime·throw("procresize: invalid arg");
-	// initialize new P's
-	for(i = 0; i < new; i++) {
-		p = runtime·allp[i];
-		if(p == nil) {
-			p = runtime·newP();
-			p->id = i;
-			p->status = Pgcstop;
-			runtime·atomicstorep(&runtime·allp[i], p);
-		}
-		if(p->mcache == nil) {
-			if(old==0 && i==0)
-				p->mcache = g->m->mcache;  // bootstrap
-			else
-				p->mcache = runtime·allocmcache();
-		}
-	}
-
-	// redistribute runnable G's evenly
-	// collect all runnable goroutines in global queue preserving FIFO order
-	// FIFO order is required to ensure fairness even during frequent GCs
-	// see http://golang.org/issue/7126
-	empty = false;
-	while(!empty) {
-		empty = true;
-		for(i = 0; i < old; i++) {
-			p = runtime·allp[i];
-			if(p->runqhead == p->runqtail)
-				continue;
-			empty = false;
-			// pop from tail of local queue
-			p->runqtail--;
-			gp = p->runq[p->runqtail%nelem(p->runq)];
-			// push onto head of global queue
-			gp->schedlink = runtime·sched.runqhead;
-			runtime·sched.runqhead = gp;
-			if(runtime·sched.runqtail == nil)
-				runtime·sched.runqtail = gp;
-			runtime·sched.runqsize++;
-		}
-	}
-	// fill local queues with at most nelem(p->runq)/2 goroutines
-	// start at 1 because current M already executes some G and will acquire allp[0] below,
-	// so if we have a spare G we want to put it into allp[1].
-	for(i = 1; i < new * nelem(p->runq)/2 && runtime·sched.runqsize > 0; i++) {
-		gp = runtime·sched.runqhead;
-		runtime·sched.runqhead = gp->schedlink;
-		if(runtime·sched.runqhead == nil)
-			runtime·sched.runqtail = nil;
-		runtime·sched.runqsize--;
-		runqput(runtime·allp[i%new], gp);
-	}
-
-	// free unused P's
-	for(i = new; i < old; i++) {
-		p = runtime·allp[i];
-		runtime·freemcache(p->mcache);
-		p->mcache = nil;
-		gfpurge(p);
-		p->status = Pdead;
-		// can't free P itself because it can be referenced by an M in syscall
-	}
-
-	if(g->m->p)
-		g->m->p->m = nil;
-	g->m->p = nil;
-	g->m->mcache = nil;
-	p = runtime·allp[0];
-	p->m = nil;
-	p->status = Pidle;
-	acquirep(p);
-	for(i = new-1; i > 0; i--) {
-		p = runtime·allp[i];
-		p->status = Pidle;
-		pidleput(p);
-	}
-	runtime·atomicstore((uint32*)&runtime·gomaxprocs, new);
-}
-
-// Associate p and the current m.
-static void
-acquirep(P *p)
-{
-	if(g->m->p || g->m->mcache)
-		runtime·throw("acquirep: already in go");
-	if(p->m || p->status != Pidle) {
-		runtime·printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
-		runtime·throw("acquirep: invalid p state");
-	}
-	g->m->mcache = p->mcache;
-	g->m->p = p;
-	p->m = g->m;
-	p->status = Prunning;
-}
-
-// Disassociate p and the current m.
-static P*
-releasep(void)
-{
-	P *p;
-
-	if(g->m->p == nil || g->m->mcache == nil)
-		runtime·throw("releasep: invalid arg");
-	p = g->m->p;
-	if(p->m != g->m || p->mcache != g->m->mcache || p->status != Prunning) {
-		runtime·printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
-			g->m, g->m->p, p->m, g->m->mcache, p->mcache, p->status);
-		runtime·throw("releasep: invalid p state");
-	}
-	g->m->p = nil;
-	g->m->mcache = nil;
-	p->m = nil;
-	p->status = Pidle;
-	return p;
-}
-
-static void
-incidlelocked(int32 v)
-{
-	runtime·lock(&runtime·sched.lock);
-	runtime·sched.nmidlelocked += v;
-	if(v > 0)
-		checkdead();
-	runtime·unlock(&runtime·sched.lock);
-}
-
-// Check for deadlock situation.
-// The check is based on number of running M's, if 0 -> deadlock.
-static void
-checkdead(void)
-{
-	G *gp;
-	P *p;
-	M *mp;
-	int32 run, grunning, s;
-	uintptr i;
-
-	// -1 for sysmon
-	run = runtime·sched.mcount - runtime·sched.nmidle - runtime·sched.nmidlelocked - 1;
-	if(run > 0)
-		return;
-	// If we are dying because of a signal caught on an already idle thread,
-	// freezetheworld will cause all running threads to block.
-	// And runtime will essentially enter into deadlock state,
-	// except that there is a thread that will call runtime·exit soon.
-	if(runtime·panicking > 0)
-		return;
-	if(run < 0) {
-		runtime·printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
-			runtime·sched.nmidle, runtime·sched.nmidlelocked, runtime·sched.mcount);
-		runtime·throw("checkdead: inconsistent counts");
-	}
-	grunning = 0;
-	runtime·lock(&runtime·allglock);
-	for(i = 0; i < runtime·allglen; i++) {
-		gp = runtime·allg[i];
-		if(gp->issystem)
-			continue;
-		s = runtime·readgstatus(gp);
-		switch(s&~Gscan) {
-		case Gwaiting:
-			grunning++;
-			break;
-		case Grunnable:
-		case Grunning:
-		case Gsyscall:
-			runtime·unlock(&runtime·allglock);
-			runtime·printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
-			runtime·throw("checkdead: runnable g");
-			break;
-		}
-	}
-	runtime·unlock(&runtime·allglock);
-	if(grunning == 0)  // possible if main goroutine calls runtime·Goexit()
-		runtime·throw("no goroutines (main called runtime.Goexit) - deadlock!");
-
-	// Maybe jump time forward for playground.
-	if((gp = runtime·timejump()) != nil) {
-		runtime·casgstatus(gp, Gwaiting, Grunnable);
-		globrunqput(gp);
- 		p = pidleget();
- 		if(p == nil)
- 			runtime·throw("checkdead: no p for timer");
- 		mp = mget();
- 		if(mp == nil)
- 			newm(nil, p);
- 		else {
- 			mp->nextp = p;
- 			runtime·notewakeup(&mp->park);
- 		}
- 		return;
- 	}
-
-	g->m->throwing = -1;  // do not dump full stacks
-	runtime·throw("all goroutines are asleep - deadlock!");
-}
-
-static void
-sysmon(void)
-{
-	uint32 idle, delay, nscavenge;
-	int64 now, unixnow, lastpoll, lasttrace, lastgc;
-	int64 forcegcperiod, scavengelimit, lastscavenge, maxsleep;
-	G *gp;
-
-	// If we go two minutes without a garbage collection, force one to run.
-	forcegcperiod = 2*60*1e9;
-	// If a heap span goes unused for 5 minutes after a garbage collection,
-	// we hand it back to the operating system.
-	scavengelimit = 5*60*1e9;
-	if(runtime·debug.scavenge > 0) {
-		// Scavenge-a-lot for testing.
-		forcegcperiod = 10*1e6;
-		scavengelimit = 20*1e6;
-	}
-	lastscavenge = runtime·nanotime();
-	nscavenge = 0;
-	// Make wake-up period small enough for the sampling to be correct.
-	maxsleep = forcegcperiod/2;
-	if(scavengelimit < forcegcperiod)
-		maxsleep = scavengelimit/2;
-
-	lasttrace = 0;
-	idle = 0;  // how many cycles in succession we had not wokeup somebody
-	delay = 0;
-	for(;;) {
-		if(idle == 0)  // start with 20us sleep...
-			delay = 20;
-		else if(idle > 50)  // start doubling the sleep after 1ms...
-			delay *= 2;
-		if(delay > 10*1000)  // up to 10ms
-			delay = 10*1000;
-		runtime·usleep(delay);
-		if(runtime·debug.schedtrace <= 0 &&
-			(runtime·sched.gcwaiting || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs)) {  // TODO: fast atomic
-			runtime·lock(&runtime·sched.lock);
-			if(runtime·atomicload(&runtime·sched.gcwaiting) || runtime·atomicload(&runtime·sched.npidle) == runtime·gomaxprocs) {
-				runtime·atomicstore(&runtime·sched.sysmonwait, 1);
-				runtime·unlock(&runtime·sched.lock);
-				runtime·notetsleep(&runtime·sched.sysmonnote, maxsleep);
-				runtime·lock(&runtime·sched.lock);
-				runtime·atomicstore(&runtime·sched.sysmonwait, 0);
-				runtime·noteclear(&runtime·sched.sysmonnote);
-				idle = 0;
-				delay = 20;
-			}
-			runtime·unlock(&runtime·sched.lock);
-		}
-		// poll network if not polled for more than 10ms
-		lastpoll = runtime·atomicload64(&runtime·sched.lastpoll);
-		now = runtime·nanotime();
-		unixnow = runtime·unixnanotime();
-		if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
-			runtime·cas64(&runtime·sched.lastpoll, lastpoll, now);
-			gp = runtime·netpoll(false);  // non-blocking
-			if(gp) {
-				// Need to decrement number of idle locked M's
-				// (pretending that one more is running) before injectglist.
-				// Otherwise it can lead to the following situation:
-				// injectglist grabs all P's but before it starts M's to run the P's,
-				// another M returns from syscall, finishes running its G,
-				// observes that there is no work to do and no other running M's
-				// and reports deadlock.
-				incidlelocked(-1);
-				injectglist(gp);
-				incidlelocked(1);
-			}
-		}
-		// retake P's blocked in syscalls
-		// and preempt long running G's
-		if(retake(now))
-			idle = 0;
-		else
-			idle++;
-
-		// check if we need to force a GC
-		lastgc = runtime·atomicload64(&mstats.last_gc);
-		if(lastgc != 0 && unixnow - lastgc > forcegcperiod && runtime·atomicload(&runtime·forcegc.idle)) {
-			runtime·lock(&runtime·forcegc.lock);
-			runtime·forcegc.idle = 0;
-			runtime·forcegc.g->schedlink = nil;
-			injectglist(runtime·forcegc.g);
-			runtime·unlock(&runtime·forcegc.lock);
-		}
-
-		// scavenge heap once in a while
-		if(lastscavenge + scavengelimit/2 < now) {
-			runtime·MHeap_Scavenge(nscavenge, now, scavengelimit);
-			lastscavenge = now;
-			nscavenge++;
-		}
-
-		if(runtime·debug.schedtrace > 0 && lasttrace + runtime·debug.schedtrace*1000000ll <= now) {
-			lasttrace = now;
-			runtime·schedtrace(runtime·debug.scheddetail);
-		}
-	}
-}
-
-typedef struct Pdesc Pdesc;
-struct Pdesc
-{
-	uint32	schedtick;
-	int64	schedwhen;
-	uint32	syscalltick;
-	int64	syscallwhen;
-};
-#pragma dataflag NOPTR
-static Pdesc pdesc[MaxGomaxprocs];
-
-static uint32
-retake(int64 now)
-{
-	uint32 i, s, n;
-	int64 t;
-	P *p;
-	Pdesc *pd;
-
-	n = 0;
-	for(i = 0; i < runtime·gomaxprocs; i++) {
-		p = runtime·allp[i];
-		if(p==nil)
-			continue;
-		pd = &pdesc[i];
-		s = p->status;
-		if(s == Psyscall) {
-			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
-			t = p->syscalltick;
-			if(pd->syscalltick != t) {
-				pd->syscalltick = t;
-				pd->syscallwhen = now;
-				continue;
-			}
-			// On the one hand we don't want to retake Ps if there is no other work to do,
-			// but on the other hand we want to retake them eventually
-			// because they can prevent the sysmon thread from deep sleep.
-			if(p->runqhead == p->runqtail &&
-				runtime·atomicload(&runtime·sched.nmspinning) + runtime·atomicload(&runtime·sched.npidle) > 0 &&
-				pd->syscallwhen + 10*1000*1000 > now)
-				continue;
-			// Need to decrement number of idle locked M's
-			// (pretending that one more is running) before the CAS.
-			// Otherwise the M from which we retake can exit the syscall,
-			// increment nmidle and report deadlock.
-			incidlelocked(-1);
-			if(runtime·cas(&p->status, s, Pidle)) {
-				n++;
-				handoffp(p);
-			}
-			incidlelocked(1);
-		} else if(s == Prunning) {
-			// Preempt G if it's running for more than 10ms.
-			t = p->schedtick;
-			if(pd->schedtick != t) {
-				pd->schedtick = t;
-				pd->schedwhen = now;
-				continue;
-			}
-			if(pd->schedwhen + 10*1000*1000 > now)
-				continue;
-			preemptone(p);
-		}
-	}
-	return n;
-}
-
-// Tell all goroutines that they have been preempted and they should stop.
-// This function is purely best-effort.  It can fail to inform a goroutine if a
-// processor just started running it.
-// No locks need to be held.
-// Returns true if preemption request was issued to at least one goroutine.
-static bool
-preemptall(void)
-{
-	P *p;
-	int32 i;
-	bool res;
-
-	res = false;
-	for(i = 0; i < runtime·gomaxprocs; i++) {
-		p = runtime·allp[i];
-		if(p == nil || p->status != Prunning)
-			continue;
-		res |= preemptone(p);
-	}
-	return res;
-}
-
-// Tell the goroutine running on processor P to stop.
-// This function is purely best-effort.  It can incorrectly fail to inform the
-// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
-// correct goroutine, that goroutine might ignore the request if it is
-// simultaneously executing runtime·newstack.
-// No lock needs to be held.
-// Returns true if preemption request was issued.
-// The actual preemption will happen at some point in the future
-// and will be indicated by the gp->status no longer being
-// Grunning
-static bool
-preemptone(P *p)
-{
-	M *mp;
-	G *gp;
-
-	mp = p->m;
-	if(mp == nil || mp == g->m)
-		return false;
-	gp = mp->curg;
-	if(gp == nil || gp == mp->g0)
-		return false;
-	gp->preempt = true;
-	// Every call in a go routine checks for stack overflow by
-	// comparing the current stack pointer to gp->stackguard0.
-	// Setting gp->stackguard0 to StackPreempt folds
-	// preemption into the normal stack overflow check.
-	gp->stackguard0 = StackPreempt;
-	return true;
-}
-
-void
-runtime·schedtrace(bool detailed)
-{
-	static int64 starttime;
-	int64 now;
-	int64 id1, id2, id3;
-	int32 i, t, h;
-	uintptr gi;
-	int8 *fmt;
-	M *mp, *lockedm;
-	G *gp, *lockedg;
-	P *p;
-
-	now = runtime·nanotime();
-	if(starttime == 0)
-		starttime = now;
-
-	runtime·lock(&runtime·sched.lock);
-	runtime·printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d spinningthreads=%d idlethreads=%d runqueue=%d",
-		(now-starttime)/1000000, runtime·gomaxprocs, runtime·sched.npidle, runtime·sched.mcount,
-		runtime·sched.nmspinning, runtime·sched.nmidle, runtime·sched.runqsize);
-	if(detailed) {
-		runtime·printf(" gcwaiting=%d nmidlelocked=%d stopwait=%d sysmonwait=%d\n",
-			runtime·sched.gcwaiting, runtime·sched.nmidlelocked,
-			runtime·sched.stopwait, runtime·sched.sysmonwait);
-	}
-	// We must be careful while reading data from P's, M's and G's.
-	// Even if we hold schedlock, most data can be changed concurrently.
-	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
-	for(i = 0; i < runtime·gomaxprocs; i++) {
-		p = runtime·allp[i];
-		if(p == nil)
-			continue;
-		mp = p->m;
-		h = runtime·atomicload(&p->runqhead);
-		t = runtime·atomicload(&p->runqtail);
-		if(detailed)
-			runtime·printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
-				i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
-		else {
-			// In non-detailed mode format lengths of per-P run queues as:
-			// [len1 len2 len3 len4]
-			fmt = " %d";
-			if(runtime·gomaxprocs == 1)
-				fmt = " [%d]\n";
-			else if(i == 0)
-				fmt = " [%d";
-			else if(i == runtime·gomaxprocs-1)
-				fmt = " %d]\n";
-			runtime·printf(fmt, t-h);
-		}
-	}
-	if(!detailed) {
-		runtime·unlock(&runtime·sched.lock);
-		return;
-	}
-	for(mp = runtime·allm; mp; mp = mp->alllink) {
-		p = mp->p;
-		gp = mp->curg;
-		lockedg = mp->lockedg;
-		id1 = -1;
-		if(p)
-			id1 = p->id;
-		id2 = -1;
-		if(gp)
-			id2 = gp->goid;
-		id3 = -1;
-		if(lockedg)
-			id3 = lockedg->goid;
-		runtime·printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
-			" locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
-			mp->id, id1, id2,
-			mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
-			mp->spinning, g->m->blocked, id3);
-	}
-	runtime·lock(&runtime·allglock);
-	for(gi = 0; gi < runtime·allglen; gi++) {
-		gp = runtime·allg[gi];
-		mp = gp->m;
-		lockedm = gp->lockedm;
-		runtime·printf("  G%D: status=%d(%S) m=%d lockedm=%d\n",
-			gp->goid, runtime·readgstatus(gp), gp->waitreason, mp ? mp->id : -1,
-			lockedm ? lockedm->id : -1);
-	}
-	runtime·unlock(&runtime·allglock);
-	runtime·unlock(&runtime·sched.lock);
-}
-
-// Put mp on midle list.
-// Sched must be locked.
-static void
-mput(M *mp)
-{
-	mp->schedlink = runtime·sched.midle;
-	runtime·sched.midle = mp;
-	runtime·sched.nmidle++;
-	checkdead();
-}
-
-// Try to get an m from midle list.
-// Sched must be locked.
-static M*
-mget(void)
-{
-	M *mp;
-
-	if((mp = runtime·sched.midle) != nil){
-		runtime·sched.midle = mp->schedlink;
-		runtime·sched.nmidle--;
-	}
-	return mp;
-}
-
-// Put gp on the global runnable queue.
-// Sched must be locked.
-static void
-globrunqput(G *gp)
-{
-	gp->schedlink = nil;
-	if(runtime·sched.runqtail)
-		runtime·sched.runqtail->schedlink = gp;
-	else
-		runtime·sched.runqhead = gp;
-	runtime·sched.runqtail = gp;
-	runtime·sched.runqsize++;
-}
-
-// Put a batch of runnable goroutines on the global runnable queue.
-// Sched must be locked.
-static void
-globrunqputbatch(G *ghead, G *gtail, int32 n)
-{
-	gtail->schedlink = nil;
-	if(runtime·sched.runqtail)
-		runtime·sched.runqtail->schedlink = ghead;
-	else
-		runtime·sched.runqhead = ghead;
-	runtime·sched.runqtail = gtail;
-	runtime·sched.runqsize += n;
-}
-
-// Try get a batch of G's from the global runnable queue.
-// Sched must be locked.
-static G*
-globrunqget(P *p, int32 max)
-{
-	G *gp, *gp1;
-	int32 n;
-
-	if(runtime·sched.runqsize == 0)
-		return nil;
-	n = runtime·sched.runqsize/runtime·gomaxprocs+1;
-	if(n > runtime·sched.runqsize)
-		n = runtime·sched.runqsize;
-	if(max > 0 && n > max)
-		n = max;
-	if(n > nelem(p->runq)/2)
-		n = nelem(p->runq)/2;
-	runtime·sched.runqsize -= n;
-	if(runtime·sched.runqsize == 0)
-		runtime·sched.runqtail = nil;
-	gp = runtime·sched.runqhead;
-	runtime·sched.runqhead = gp->schedlink;
-	n--;
-	while(n--) {
-		gp1 = runtime·sched.runqhead;
-		runtime·sched.runqhead = gp1->schedlink;
-		runqput(p, gp1);
-	}
-	return gp;
-}
-
-// Put p to on pidle list.
-// Sched must be locked.
-static void
-pidleput(P *p)
-{
-	p->link = runtime·sched.pidle;
-	runtime·sched.pidle = p;
-	runtime·xadd(&runtime·sched.npidle, 1);  // TODO: fast atomic
-}
-
-// Try get a p from pidle list.
-// Sched must be locked.
-static P*
-pidleget(void)
-{
-	P *p;
-
-	p = runtime·sched.pidle;
-	if(p) {
-		runtime·sched.pidle = p->link;
-		runtime·xadd(&runtime·sched.npidle, -1);  // TODO: fast atomic
-	}
-	return p;
-}
-
-// Try to put g on local runnable queue.
-// If it's full, put onto global queue.
-// Executed only by the owner P.
-static void
-runqput(P *p, G *gp)
-{
-	uint32 h, t;
-
-retry:
-	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
-	t = p->runqtail;
-	if(t - h < nelem(p->runq)) {
-		p->runq[t%nelem(p->runq)] = gp;
-		runtime·atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
-		return;
-	}
-	if(runqputslow(p, gp, h, t))
-		return;
-	// the queue is not full, now the put above must suceed
-	goto retry;
-}
-
-// Put g and a batch of work from local runnable queue on global queue.
-// Executed only by the owner P.
-static bool
-runqputslow(P *p, G *gp, uint32 h, uint32 t)
-{
-	G *batch[nelem(p->runq)/2+1];
-	uint32 n, i;
-
-	// First, grab a batch from local queue.
-	n = t-h;
-	n = n/2;
-	if(n != nelem(p->runq)/2)
-		runtime·throw("runqputslow: queue is not full");
-	for(i=0; i<n; i++)
-		batch[i] = p->runq[(h+i)%nelem(p->runq)];
-	if(!runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
-		return false;
-	batch[n] = gp;
-	// Link the goroutines.
-	for(i=0; i<n; i++)
-		batch[i]->schedlink = batch[i+1];
-	// Now put the batch on global queue.
-	runtime·lock(&runtime·sched.lock);
-	globrunqputbatch(batch[0], batch[n], n+1);
-	runtime·unlock(&runtime·sched.lock);
-	return true;
-}
-
-// Get g from local runnable queue.
-// Executed only by the owner P.
-static G*
-runqget(P *p)
-{
-	G *gp;
-	uint32 t, h;
-
-	for(;;) {
-		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
-		t = p->runqtail;
-		if(t == h)
-			return nil;
-		gp = p->runq[h%nelem(p->runq)];
-		if(runtime·cas(&p->runqhead, h, h+1))  // cas-release, commits consume
-			return gp;
-	}
-}
-
-// Grabs a batch of goroutines from local runnable queue.
-// batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
-// Can be executed by any P.
-static uint32
-runqgrab(P *p, G **batch)
-{
-	uint32 t, h, n, i;
-
-	for(;;) {
-		h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
-		t = runtime·atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
-		n = t-h;
-		n = n - n/2;
-		if(n == 0)
-			break;
-		if(n > nelem(p->runq)/2)  // read inconsistent h and t
-			continue;
-		for(i=0; i<n; i++)
-			batch[i] = p->runq[(h+i)%nelem(p->runq)];
-		if(runtime·cas(&p->runqhead, h, h+n))  // cas-release, commits consume
-			break;
-	}
-	return n;
-}
-
-// Steal half of elements from local runnable queue of p2
-// and put onto local runnable queue of p.
-// Returns one of the stolen elements (or nil if failed).
-static G*
-runqsteal(P *p, P *p2)
-{
-	G *gp;
-	G *batch[nelem(p->runq)/2];
-	uint32 t, h, n, i;
-
-	n = runqgrab(p2, batch);
-	if(n == 0)
-		return nil;
-	n--;
-	gp = batch[n];
-	if(n == 0)
-		return gp;
-	h = runtime·atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
-	t = p->runqtail;
-	if(t - h + n >= nelem(p->runq))
-		runtime·throw("runqsteal: runq overflow");
-	for(i=0; i<n; i++, t++)
-		p->runq[t%nelem(p->runq)] = batch[i];
-	runtime·atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
-	return gp;
-}
-
-void
-runtime·testSchedLocalQueue(void)
-{
-	P *p;
-	G *gs;
-	int32 i, j;
-
-	p = (P*)runtime·mallocgc(sizeof(*p), nil, FlagNoScan);
-	gs = (G*)runtime·mallocgc(nelem(p->runq)*sizeof(*gs), nil, FlagNoScan);
-
-	for(i = 0; i < nelem(p->runq); i++) {
-		if(runqget(p) != nil)
-			runtime·throw("runq is not empty initially");
-		for(j = 0; j < i; j++)
-			runqput(p, &gs[i]);
-		for(j = 0; j < i; j++) {
-			if(runqget(p) != &gs[i]) {
-				runtime·printf("bad element at iter %d/%d\n", i, j);
-				runtime·throw("bad element");
-			}
-		}
-		if(runqget(p) != nil)
-			runtime·throw("runq is not empty afterwards");
-	}
-}
-
-void
-runtime·testSchedLocalQueueSteal(void)
-{
-	P *p1, *p2;
-	G *gs, *gp;
-	int32 i, j, s;
-
-	p1 = (P*)runtime·mallocgc(sizeof(*p1), nil, FlagNoScan);
-	p2 = (P*)runtime·mallocgc(sizeof(*p2), nil, FlagNoScan);
-	gs = (G*)runtime·mallocgc(nelem(p1->runq)*sizeof(*gs), nil, FlagNoScan);
-
-	for(i = 0; i < nelem(p1->runq); i++) {
-		for(j = 0; j < i; j++) {
-			gs[j].sig = 0;
-			runqput(p1, &gs[j]);
-		}
-		gp = runqsteal(p2, p1);
-		s = 0;
-		if(gp) {
-			s++;
-			gp->sig++;
-		}
-		while(gp = runqget(p2)) {
-			s++;
-			gp->sig++;
-		}
-		while(gp = runqget(p1))
-			gp->sig++;
-		for(j = 0; j < i; j++) {
-			if(gs[j].sig != 1) {
-				runtime·printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
-				runtime·throw("bad element");
-			}
-		}
-		if(s != i/2 && s != i/2+1) {
-			runtime·printf("bad steal %d, want %d or %d, iter %d\n",
-				s, i/2, i/2+1, i);
-			runtime·throw("bad steal");
-		}
-	}
-}
-
-void
-runtime·setmaxthreads_m(void)
-{
-	int32 in;
-	int32 out;
-
-	in = g->m->scalararg[0];
-
-	runtime·lock(&runtime·sched.lock);
-	out = runtime·sched.maxmcount;
-	runtime·sched.maxmcount = in;
-	checkmcount();
-	runtime·unlock(&runtime·sched.lock);
-
-	g->m->scalararg[0] = out;
-}
-
-static int8 experiment[] = GOEXPERIMENT; // defined in zaexperiment.h
-
-static bool
-haveexperiment(int8 *name)
-{
-	int32 i, j;
-	
-	for(i=0; i<sizeof(experiment); i++) {
-		if((i == 0 || experiment[i-1] == ',') && experiment[i] == name[0]) {
-			for(j=0; name[j]; j++)
-				if(experiment[i+j] != name[j])
-					goto nomatch;
-			if(experiment[i+j] != '\0' && experiment[i+j] != ',')
-				goto nomatch;
-			return 1;
-		}
-	nomatch:;
-	}
-	return 0;
-}
-
-#pragma textflag NOSPLIT
-void
-sync·runtime_procPin(intptr p)
-{
-	M *mp;
-
-	mp = g->m;
-	// Disable preemption.
-	mp->locks++;
-	p = mp->p->id;
-	FLUSH(&p);
-}
-
-#pragma textflag NOSPLIT
-void
-sync·runtime_procUnpin()
-{
-	g->m->locks--;
-}
diff --git a/src/runtime/proc.go b/src/runtime/proc.go
index f41ffbf..12e2e71 100644
--- a/src/runtime/proc.go
+++ b/src/runtime/proc.go
@@ -6,8 +6,6 @@
 
 import "unsafe"
 
-func newsysmon()
-
 func runtime_init()
 func main_init()
 func main_main()
@@ -29,7 +27,7 @@
 		maxstacksize = 250000000
 	}
 
-	onM(newsysmon)
+	systemstack(newsysmon)
 
 	// Lock the main goroutine onto this, the main OS thread,
 	// during initialization.  Most programs won't care, but a few
@@ -55,6 +53,24 @@
 
 	memstats.enablegc = true // now that runtime is initialized, GC is okay
 
+	if iscgo {
+		if _cgo_thread_start == nil {
+			gothrow("_cgo_thread_start missing")
+		}
+		if _cgo_malloc == nil {
+			gothrow("_cgo_malloc missing")
+		}
+		if _cgo_free == nil {
+			gothrow("_cgo_free missing")
+		}
+		if _cgo_setenv == nil {
+			gothrow("_cgo_setenv missing")
+		}
+		if _cgo_unsetenv == nil {
+			gothrow("_cgo_unsetenv missing")
+		}
+	}
+
 	main_init()
 
 	needUnlock = false
@@ -80,8 +96,6 @@
 	}
 }
 
-var parkunlock_c byte
-
 // start forcegc helper goroutine
 func init() {
 	go forcegchelper()
@@ -115,7 +129,7 @@
 
 // Puts the current goroutine into a waiting state and calls unlockf.
 // If unlockf returns false, the goroutine is resumed.
-func gopark(unlockf unsafe.Pointer, lock unsafe.Pointer, reason string) {
+func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string) {
 	mp := acquirem()
 	gp := mp.curg
 	status := readgstatus(gp)
@@ -123,7 +137,7 @@
 		gothrow("gopark: bad g status")
 	}
 	mp.waitlock = lock
-	mp.waitunlockf = unlockf
+	mp.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
 	gp.waitreason = reason
 	releasem(mp)
 	// can't do anything that might move the G between Ms here.
@@ -133,14 +147,13 @@
 // Puts the current goroutine into a waiting state and unlocks the lock.
 // The goroutine can be made runnable again by calling goready(gp).
 func goparkunlock(lock *mutex, reason string) {
-	gopark(unsafe.Pointer(&parkunlock_c), unsafe.Pointer(lock), reason)
+	gopark(parkunlock_c, unsafe.Pointer(lock), reason)
 }
 
 func goready(gp *g) {
-	mp := acquirem()
-	mp.ptrarg[0] = unsafe.Pointer(gp)
-	onM(ready_m)
-	releasem(mp)
+	systemstack(func() {
+		ready(gp)
+	})
 }
 
 //go:nosplit
@@ -226,6 +239,11 @@
 	return new(g)
 }
 
+var (
+	allgs    []*g
+	allglock mutex
+)
+
 func allgadd(gp *g) {
 	if readgstatus(gp) == _Gidle {
 		gothrow("allgadd: bad status Gidle")
diff --git a/src/runtime/proc1.go b/src/runtime/proc1.go
new file mode 100644
index 0000000..8c941dd
--- /dev/null
+++ b/src/runtime/proc1.go
@@ -0,0 +1,3186 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+var (
+	m0 m
+	g0 g
+)
+
+// Goroutine scheduler
+// The scheduler's job is to distribute ready-to-run goroutines over worker threads.
+//
+// The main concepts are:
+// G - goroutine.
+// M - worker thread, or machine.
+// P - processor, a resource that is required to execute Go code.
+//     M must have an associated P to execute Go code, however it can be
+//     blocked or in a syscall w/o an associated P.
+//
+// Design doc at http://golang.org/s/go11sched.
+
+const (
+	// Number of goroutine ids to grab from sched.goidgen to local per-P cache at once.
+	// 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
+	_GoidCacheBatch = 16
+)
+
+/*
+SchedT	sched;
+int32	gomaxprocs;
+uint32	needextram;
+bool	iscgo;
+M	m0;
+G	g0;	// idle goroutine for m0
+G*	lastg;
+M*	allm;
+M*	extram;
+P*	allp[MaxGomaxprocs+1];
+int8*	goos;
+int32	ncpu;
+int32	newprocs;
+
+Mutex allglock;	// the following vars are protected by this lock or by stoptheworld
+G**	allg;
+Slice	allgs;
+uintptr allglen;
+ForceGCState	forcegc;
+
+void mstart(void);
+static void runqput(P*, G*);
+static G* runqget(P*);
+static bool runqputslow(P*, G*, uint32, uint32);
+static G* runqsteal(P*, P*);
+static void mput(M*);
+static M* mget(void);
+static void mcommoninit(M*);
+static void schedule(void);
+static void procresize(int32);
+static void acquirep(P*);
+static P* releasep(void);
+static void newm(void(*)(void), P*);
+static void stopm(void);
+static void startm(P*, bool);
+static void handoffp(P*);
+static void wakep(void);
+static void stoplockedm(void);
+static void startlockedm(G*);
+static void sysmon(void);
+static uint32 retake(int64);
+static void incidlelocked(int32);
+static void checkdead(void);
+static void exitsyscall0(G*);
+void park_m(G*);
+static void goexit0(G*);
+static void gfput(P*, G*);
+static G* gfget(P*);
+static void gfpurge(P*);
+static void globrunqput(G*);
+static void globrunqputbatch(G*, G*, int32);
+static G* globrunqget(P*, int32);
+static P* pidleget(void);
+static void pidleput(P*);
+static void injectglist(G*);
+static bool preemptall(void);
+static bool preemptone(P*);
+static bool exitsyscallfast(void);
+static bool haveexperiment(int8*);
+void allgadd(G*);
+static void dropg(void);
+
+extern String buildVersion;
+*/
+
+// The bootstrap sequence is:
+//
+//	call osinit
+//	call schedinit
+//	make & queue new G
+//	call runtime·mstart
+//
+// The new G calls runtime·main.
+func schedinit() {
+	// raceinit must be the first call to race detector.
+	// In particular, it must be done before mallocinit below calls racemapshadow.
+	_g_ := getg()
+	if raceenabled {
+		_g_.racectx = raceinit()
+	}
+
+	sched.maxmcount = 10000
+
+	tracebackinit()
+	symtabinit()
+	stackinit()
+	mallocinit()
+	mcommoninit(_g_.m)
+
+	goargs()
+	goenvs()
+	parsedebugvars()
+	gcinit()
+
+	sched.lastpoll = uint64(nanotime())
+	procs := 1
+	if n := goatoi(gogetenv("GOMAXPROCS")); n > 0 {
+		if n > _MaxGomaxprocs {
+			n = _MaxGomaxprocs
+		}
+		procs = n
+	}
+	procresize(int32(procs))
+
+	if buildVersion == "" {
+		// Condition should never trigger.  This code just serves
+		// to ensure runtime·buildVersion is kept in the resulting binary.
+		buildVersion = "unknown"
+	}
+}
+
+func newsysmon() {
+	_newm(sysmon, nil)
+}
+
+func dumpgstatus(gp *g) {
+	_g_ := getg()
+	print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
+	print("runtime:  g:  g=", _g_, ", goid=", _g_.goid, ",  g->atomicstatus=", readgstatus(_g_), "\n")
+}
+
+func checkmcount() {
+	// sched lock is held
+	if sched.mcount > sched.maxmcount {
+		print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n")
+		gothrow("thread exhaustion")
+	}
+}
+
+func mcommoninit(mp *m) {
+	_g_ := getg()
+
+	// g0 stack won't make sense for user (and is not necessary unwindable).
+	if _g_ != _g_.m.g0 {
+		callers(1, &mp.createstack[0], len(mp.createstack))
+	}
+
+	mp.fastrand = 0x49f6428a + uint32(mp.id) + uint32(cputicks())
+	if mp.fastrand == 0 {
+		mp.fastrand = 0x49f6428a
+	}
+
+	lock(&sched.lock)
+	mp.id = sched.mcount
+	sched.mcount++
+	checkmcount()
+	mpreinit(mp)
+	if mp.gsignal != nil {
+		mp.gsignal.stackguard1 = mp.gsignal.stack.lo + _StackGuard
+	}
+
+	// Add to allm so garbage collector doesn't free g->m
+	// when it is just in a register or thread-local storage.
+	mp.alllink = allm
+
+	// NumCgoCall() iterates over allm w/o schedlock,
+	// so we need to publish it safely.
+	atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp))
+	unlock(&sched.lock)
+}
+
+// Mark gp ready to run.
+func ready(gp *g) {
+	status := readgstatus(gp)
+
+	// Mark runnable.
+	_g_ := getg()
+	_g_.m.locks++ // disable preemption because it can be holding p in a local var
+	if status&^_Gscan != _Gwaiting {
+		dumpgstatus(gp)
+		gothrow("bad g->status in ready")
+	}
+
+	// status is Gwaiting or Gscanwaiting, make Grunnable and put on runq
+	casgstatus(gp, _Gwaiting, _Grunnable)
+	runqput(_g_.m.p, gp)
+	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 { // TODO: fast atomic
+		wakep()
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+}
+
+func gcprocs() int32 {
+	// Figure out how many CPUs to use during GC.
+	// Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
+	lock(&sched.lock)
+	n := gomaxprocs
+	if n > ncpu {
+		n = ncpu
+	}
+	if n > _MaxGcproc {
+		n = _MaxGcproc
+	}
+	if n > sched.nmidle+1 { // one M is currently running
+		n = sched.nmidle + 1
+	}
+	unlock(&sched.lock)
+	return n
+}
+
+func needaddgcproc() bool {
+	lock(&sched.lock)
+	n := gomaxprocs
+	if n > ncpu {
+		n = ncpu
+	}
+	if n > _MaxGcproc {
+		n = _MaxGcproc
+	}
+	n -= sched.nmidle + 1 // one M is currently running
+	unlock(&sched.lock)
+	return n > 0
+}
+
+func helpgc(nproc int32) {
+	_g_ := getg()
+	lock(&sched.lock)
+	pos := 0
+	for n := int32(1); n < nproc; n++ { // one M is currently running
+		if allp[pos].mcache == _g_.m.mcache {
+			pos++
+		}
+		mp := mget()
+		if mp == nil {
+			gothrow("gcprocs inconsistency")
+		}
+		mp.helpgc = n
+		mp.mcache = allp[pos].mcache
+		pos++
+		notewakeup(&mp.park)
+	}
+	unlock(&sched.lock)
+}
+
+// Similar to stoptheworld but best-effort and can be called several times.
+// There is no reverse operation, used during crashing.
+// This function must not lock any mutexes.
+func freezetheworld() {
+	if gomaxprocs == 1 {
+		return
+	}
+	// stopwait and preemption requests can be lost
+	// due to races with concurrently executing threads,
+	// so try several times
+	for i := 0; i < 5; i++ {
+		// this should tell the scheduler to not start any new goroutines
+		sched.stopwait = 0x7fffffff
+		atomicstore(&sched.gcwaiting, 1)
+		// this should stop running goroutines
+		if !preemptall() {
+			break // no running goroutines
+		}
+		usleep(1000)
+	}
+	// to be sure
+	usleep(1000)
+	preemptall()
+	usleep(1000)
+}
+
+func isscanstatus(status uint32) bool {
+	if status == _Gscan {
+		gothrow("isscanstatus: Bad status Gscan")
+	}
+	return status&_Gscan == _Gscan
+}
+
+// All reads and writes of g's status go through readgstatus, casgstatus
+// castogscanstatus, casfrom_Gscanstatus.
+//go:nosplit
+func readgstatus(gp *g) uint32 {
+	return atomicload(&gp.atomicstatus)
+}
+
+// The Gscanstatuses are acting like locks and this releases them.
+// If it proves to be a performance hit we should be able to make these
+// simple atomic stores but for now we are going to throw if
+// we see an inconsistent state.
+func casfrom_Gscanstatus(gp *g, oldval, newval uint32) {
+	success := false
+
+	// Check that transition is valid.
+	switch oldval {
+	case _Gscanrunnable,
+		_Gscanwaiting,
+		_Gscanrunning,
+		_Gscansyscall:
+		if newval == oldval&^_Gscan {
+			success = cas(&gp.atomicstatus, oldval, newval)
+		}
+	case _Gscanenqueue:
+		if newval == _Gwaiting {
+			success = cas(&gp.atomicstatus, oldval, newval)
+		}
+	}
+	if !success {
+		print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n")
+		dumpgstatus(gp)
+		gothrow("casfrom_Gscanstatus: gp->status is not in scan state")
+	}
+}
+
+// This will return false if the gp is not in the expected status and the cas fails.
+// This acts like a lock acquire while the casfromgstatus acts like a lock release.
+func castogscanstatus(gp *g, oldval, newval uint32) bool {
+	switch oldval {
+	case _Grunnable,
+		_Gwaiting,
+		_Gsyscall:
+		if newval == oldval|_Gscan {
+			return cas(&gp.atomicstatus, oldval, newval)
+		}
+	case _Grunning:
+		if newval == _Gscanrunning || newval == _Gscanenqueue {
+			return cas(&gp.atomicstatus, oldval, newval)
+		}
+	}
+	print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n")
+	gothrow("castogscanstatus")
+	panic("not reached")
+}
+
+// If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus
+// and casfrom_Gscanstatus instead.
+// casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that
+// put it in the Gscan state is finished.
+//go:nosplit
+func casgstatus(gp *g, oldval, newval uint32) {
+	if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval {
+		systemstack(func() {
+			print("casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n")
+			gothrow("casgstatus: bad incoming values")
+		})
+	}
+
+	// loop if gp->atomicstatus is in a scan state giving
+	// GC time to finish and change the state to oldval.
+	for !cas(&gp.atomicstatus, oldval, newval) {
+	}
+}
+
+// stopg ensures that gp is stopped at a GC safe point where its stack can be scanned
+// or in the context of a moving collector the pointers can be flipped from pointing
+// to old object to pointing to new objects.
+// If stopg returns true, the caller knows gp is at a GC safe point and will remain there until
+// the caller calls restartg.
+// If stopg returns false, the caller is not responsible for calling restartg. This can happen
+// if another thread, either the gp itself or another GC thread is taking the responsibility
+// to do the GC work related to this thread.
+func stopg(gp *g) bool {
+	for {
+		if gp.gcworkdone {
+			return false
+		}
+
+		switch s := readgstatus(gp); s {
+		default:
+			dumpgstatus(gp)
+			gothrow("stopg: gp->atomicstatus is not valid")
+
+		case _Gdead:
+			return false
+
+		case _Gcopystack:
+			// Loop until a new stack is in place.
+
+		case _Grunnable,
+			_Gsyscall,
+			_Gwaiting:
+			// Claim goroutine by setting scan bit.
+			if !castogscanstatus(gp, s, s|_Gscan) {
+				break
+			}
+			// In scan state, do work.
+			gcphasework(gp)
+			return true
+
+		case _Gscanrunnable,
+			_Gscanwaiting,
+			_Gscansyscall:
+			// Goroutine already claimed by another GC helper.
+			return false
+
+		case _Grunning:
+			if gcphase == _GCscan {
+				// Running routines not scanned during
+				// GCscan phase, we only scan non-running routines.
+				gp.gcworkdone = true
+				return false
+			}
+
+			// Claim goroutine, so we aren't racing with a status
+			// transition away from Grunning.
+			if !castogscanstatus(gp, _Grunning, _Gscanrunning) {
+				break
+			}
+
+			// Mark gp for preemption.
+			if !gp.gcworkdone {
+				gp.preemptscan = true
+				gp.preempt = true
+				gp.stackguard0 = stackPreempt
+			}
+
+			// Unclaim.
+			casfrom_Gscanstatus(gp, _Gscanrunning, _Grunning)
+			return false
+		}
+	}
+}
+
+// The GC requests that this routine be moved from a scanmumble state to a mumble state.
+func restartg(gp *g) {
+	s := readgstatus(gp)
+	switch s {
+	default:
+		dumpgstatus(gp)
+		gothrow("restartg: unexpected status")
+
+	case _Gdead:
+		// ok
+
+	case _Gscanrunnable,
+		_Gscanwaiting,
+		_Gscansyscall:
+		casfrom_Gscanstatus(gp, s, s&^_Gscan)
+
+	// Scan is now completed.
+	// Goroutine now needs to be made runnable.
+	// We put it on the global run queue; ready blocks on the global scheduler lock.
+	case _Gscanenqueue:
+		casfrom_Gscanstatus(gp, _Gscanenqueue, _Gwaiting)
+		if gp != getg().m.curg {
+			gothrow("processing Gscanenqueue on wrong m")
+		}
+		dropg()
+		ready(gp)
+	}
+}
+
+func stopscanstart(gp *g) {
+	_g_ := getg()
+	if _g_ == gp {
+		gothrow("GC not moved to G0")
+	}
+	if stopg(gp) {
+		if !isscanstatus(readgstatus(gp)) {
+			dumpgstatus(gp)
+			gothrow("GC not in scan state")
+		}
+		restartg(gp)
+	}
+}
+
+// Runs on g0 and does the actual work after putting the g back on the run queue.
+func mquiesce(gpmaster *g) {
+	// enqueue the calling goroutine.
+	restartg(gpmaster)
+
+	activeglen := len(allgs)
+	for i := 0; i < activeglen; i++ {
+		gp := allgs[i]
+		if readgstatus(gp) == _Gdead {
+			gp.gcworkdone = true // noop scan.
+		} else {
+			gp.gcworkdone = false
+		}
+		stopscanstart(gp)
+	}
+
+	// Check that the G's gcwork (such as scanning) has been done. If not do it now.
+	// You can end up doing work here if the page trap on a Grunning Goroutine has
+	// not been sprung or in some race situations. For example a runnable goes dead
+	// and is started up again with a gp->gcworkdone set to false.
+	for i := 0; i < activeglen; i++ {
+		gp := allgs[i]
+		for !gp.gcworkdone {
+			status := readgstatus(gp)
+			if status == _Gdead {
+				//do nothing, scan not needed.
+				gp.gcworkdone = true // scan is a noop
+				break
+			}
+			if status == _Grunning && gp.stackguard0 == uintptr(stackPreempt) && notetsleep(&sched.stopnote, 100*1000) { // nanosecond arg
+				noteclear(&sched.stopnote)
+			} else {
+				stopscanstart(gp)
+			}
+		}
+	}
+
+	for i := 0; i < activeglen; i++ {
+		gp := allgs[i]
+		status := readgstatus(gp)
+		if isscanstatus(status) {
+			print("mstopandscang:bottom: post scan bad status gp=", gp, " has status ", hex(status), "\n")
+			dumpgstatus(gp)
+		}
+		if !gp.gcworkdone && status != _Gdead {
+			print("mstopandscang:bottom: post scan gp=", gp, "->gcworkdone still false\n")
+			dumpgstatus(gp)
+		}
+	}
+
+	schedule() // Never returns.
+}
+
+// quiesce moves all the goroutines to a GC safepoint which for now is a at preemption point.
+// If the global gcphase is GCmark quiesce will ensure that all of the goroutine's stacks
+// have been scanned before it returns.
+func quiesce(mastergp *g) {
+	castogscanstatus(mastergp, _Grunning, _Gscanenqueue)
+	// Now move this to the g0 (aka m) stack.
+	// g0 will potentially scan this thread and put mastergp on the runqueue
+	mcall(mquiesce)
+}
+
+// This is used by the GC as well as the routines that do stack dumps. In the case
+// of GC all the routines can be reliably stopped. This is not always the case
+// when the system is in panic or being exited.
+func stoptheworld() {
+	_g_ := getg()
+
+	// If we hold a lock, then we won't be able to stop another M
+	// that is blocked trying to acquire the lock.
+	if _g_.m.locks > 0 {
+		gothrow("stoptheworld: holding locks")
+	}
+
+	lock(&sched.lock)
+	sched.stopwait = gomaxprocs
+	atomicstore(&sched.gcwaiting, 1)
+	preemptall()
+	// stop current P
+	_g_.m.p.status = _Pgcstop // Pgcstop is only diagnostic.
+	sched.stopwait--
+	// try to retake all P's in Psyscall status
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		s := p.status
+		if s == _Psyscall && cas(&p.status, s, _Pgcstop) {
+			sched.stopwait--
+		}
+	}
+	// stop idle P's
+	for {
+		p := pidleget()
+		if p == nil {
+			break
+		}
+		p.status = _Pgcstop
+		sched.stopwait--
+	}
+	wait := sched.stopwait > 0
+	unlock(&sched.lock)
+
+	// wait for remaining P's to stop voluntarily
+	if wait {
+		for {
+			// wait for 100us, then try to re-preempt in case of any races
+			if notetsleep(&sched.stopnote, 100*1000) {
+				noteclear(&sched.stopnote)
+				break
+			}
+			preemptall()
+		}
+	}
+	if sched.stopwait != 0 {
+		gothrow("stoptheworld: not stopped")
+	}
+	for i := 0; i < int(gomaxprocs); i++ {
+		p := allp[i]
+		if p.status != _Pgcstop {
+			gothrow("stoptheworld: not stopped")
+		}
+	}
+}
+
+func mhelpgc() {
+	_g_ := getg()
+	_g_.m.helpgc = -1
+}
+
+func starttheworld() {
+	_g_ := getg()
+
+	_g_.m.locks++        // disable preemption because it can be holding p in a local var
+	gp := netpoll(false) // non-blocking
+	injectglist(gp)
+	add := needaddgcproc()
+	lock(&sched.lock)
+	if newprocs != 0 {
+		procresize(newprocs)
+		newprocs = 0
+	} else {
+		procresize(gomaxprocs)
+	}
+	sched.gcwaiting = 0
+
+	var p1 *p
+	for {
+		p := pidleget()
+		if p == nil {
+			break
+		}
+		// procresize() puts p's with work at the beginning of the list.
+		// Once we reach a p without a run queue, the rest don't have one either.
+		if p.runqhead == p.runqtail {
+			pidleput(p)
+			break
+		}
+		p.m = mget()
+		p.link = p1
+		p1 = p
+	}
+	if sched.sysmonwait != 0 {
+		sched.sysmonwait = 0
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+
+	for p1 != nil {
+		p := p1
+		p1 = p1.link
+		if p.m != nil {
+			mp := p.m
+			p.m = nil
+			if mp.nextp != nil {
+				gothrow("starttheworld: inconsistent mp->nextp")
+			}
+			mp.nextp = p
+			notewakeup(&mp.park)
+		} else {
+			// Start M to run P.  Do not start another M below.
+			_newm(nil, p)
+			add = false
+		}
+	}
+
+	if add {
+		// If GC could have used another helper proc, start one now,
+		// in the hope that it will be available next time.
+		// It would have been even better to start it before the collection,
+		// but doing so requires allocating memory, so it's tricky to
+		// coordinate.  This lazy approach works out in practice:
+		// we don't mind if the first couple gc rounds don't have quite
+		// the maximum number of procs.
+		_newm(mhelpgc, nil)
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+}
+
+// Called to start an M.
+//go:nosplit
+func mstart() {
+	_g_ := getg()
+
+	if _g_.stack.lo == 0 {
+		// Initialize stack bounds from system stack.
+		// Cgo may have left stack size in stack.hi.
+		size := _g_.stack.hi
+		if size == 0 {
+			size = 8192
+		}
+		_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&size)))
+		_g_.stack.lo = _g_.stack.hi - size + 1024
+	}
+	// Initialize stack guards so that we can start calling
+	// both Go and C functions with stack growth prologues.
+	_g_.stackguard0 = _g_.stack.lo + _StackGuard
+	_g_.stackguard1 = _g_.stackguard0
+	mstart1()
+}
+
+func mstart1() {
+	_g_ := getg()
+
+	if _g_ != _g_.m.g0 {
+		gothrow("bad runtime·mstart")
+	}
+
+	// Record top of stack for use by mcall.
+	// Once we call schedule we're never coming back,
+	// so other calls can reuse this stack space.
+	gosave(&_g_.m.g0.sched)
+	_g_.m.g0.sched.pc = ^uintptr(0) // make sure it is never used
+	asminit()
+	minit()
+
+	// Install signal handlers; after minit so that minit can
+	// prepare the thread to be able to handle the signals.
+	if _g_.m == &m0 {
+		initsig()
+	}
+
+	if _g_.m.mstartfn != nil {
+		fn := *(*func())(unsafe.Pointer(&_g_.m.mstartfn))
+		fn()
+	}
+
+	if _g_.m.helpgc != 0 {
+		_g_.m.helpgc = 0
+		stopm()
+	} else if _g_.m != &m0 {
+		acquirep(_g_.m.nextp)
+		_g_.m.nextp = nil
+	}
+	schedule()
+
+	// TODO(brainman): This point is never reached, because scheduler
+	// does not release os threads at the moment. But once this path
+	// is enabled, we must remove our seh here.
+}
+
+// When running with cgo, we call _cgo_thread_start
+// to start threads for us so that we can play nicely with
+// foreign code.
+var cgoThreadStart unsafe.Pointer
+
+type cgothreadstart struct {
+	g   *g
+	tls *uint64
+	fn  unsafe.Pointer
+}
+
+// Allocate a new m unassociated with any thread.
+// Can use p for allocation context if needed.
+func allocm(_p_ *p) *m {
+	_g_ := getg()
+	_g_.m.locks++ // disable GC because it can be called from sysmon
+	if _g_.m.p == nil {
+		acquirep(_p_) // temporarily borrow p for mallocs in this function
+	}
+	mp := newM()
+	mcommoninit(mp)
+
+	// In case of cgo or Solaris, pthread_create will make us a stack.
+	// Windows and Plan 9 will layout sched stack on OS stack.
+	if iscgo || GOOS == "solaris" || GOOS == "windows" || GOOS == "plan9" {
+		mp.g0 = malg(-1)
+	} else {
+		mp.g0 = malg(8192)
+	}
+	mp.g0.m = mp
+
+	if _p_ == _g_.m.p {
+		releasep()
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+
+	return mp
+}
+
+func allocg() *g {
+	return newG()
+}
+
+// needm is called when a cgo callback happens on a
+// thread without an m (a thread not created by Go).
+// In this case, needm is expected to find an m to use
+// and return with m, g initialized correctly.
+// Since m and g are not set now (likely nil, but see below)
+// needm is limited in what routines it can call. In particular
+// it can only call nosplit functions (textflag 7) and cannot
+// do any scheduling that requires an m.
+//
+// In order to avoid needing heavy lifting here, we adopt
+// the following strategy: there is a stack of available m's
+// that can be stolen. Using compare-and-swap
+// to pop from the stack has ABA races, so we simulate
+// a lock by doing an exchange (via casp) to steal the stack
+// head and replace the top pointer with MLOCKED (1).
+// This serves as a simple spin lock that we can use even
+// without an m. The thread that locks the stack in this way
+// unlocks the stack by storing a valid stack head pointer.
+//
+// In order to make sure that there is always an m structure
+// available to be stolen, we maintain the invariant that there
+// is always one more than needed. At the beginning of the
+// program (if cgo is in use) the list is seeded with a single m.
+// If needm finds that it has taken the last m off the list, its job
+// is - once it has installed its own m so that it can do things like
+// allocate memory - to create a spare m and put it on the list.
+//
+// Each of these extra m's also has a g0 and a curg that are
+// pressed into service as the scheduling stack and current
+// goroutine for the duration of the cgo callback.
+//
+// When the callback is done with the m, it calls dropm to
+// put the m back on the list.
+//go:nosplit
+func needm(x byte) {
+	if needextram != 0 {
+		// Can happen if C/C++ code calls Go from a global ctor.
+		// Can not throw, because scheduler is not initialized yet.
+		// XXX
+		// write(2, unsafe.Pointer("fatal error: cgo callback before cgo call\n"), sizeof("fatal error: cgo callback before cgo call\n") - 1)
+		exit(1)
+	}
+
+	// Lock extra list, take head, unlock popped list.
+	// nilokay=false is safe here because of the invariant above,
+	// that the extra list always contains or will soon contain
+	// at least one m.
+	mp := lockextra(false)
+
+	// Set needextram when we've just emptied the list,
+	// so that the eventual call into cgocallbackg will
+	// allocate a new m for the extra list. We delay the
+	// allocation until then so that it can be done
+	// after exitsyscall makes sure it is okay to be
+	// running at all (that is, there's no garbage collection
+	// running right now).
+	mp.needextram = mp.schedlink == nil
+	unlockextra(mp.schedlink)
+
+	// Install g (= m->g0) and set the stack bounds
+	// to match the current stack. We don't actually know
+	// how big the stack is, like we don't know how big any
+	// scheduling stack is, but we assume there's at least 32 kB,
+	// which is more than enough for us.
+	setg(mp.g0)
+	_g_ := getg()
+	_g_.stack.hi = uintptr(noescape(unsafe.Pointer(&x))) + 1024
+	_g_.stack.lo = uintptr(noescape(unsafe.Pointer(&x))) - 32*1024
+	_g_.stackguard0 = _g_.stack.lo + _StackGuard
+
+	// Initialize this thread to use the m.
+	asminit()
+	minit()
+}
+
+// newextram allocates an m and puts it on the extra list.
+// It is called with a working local m, so that it can do things
+// like call schedlock and allocate.
+func newextram() {
+	// Create extra goroutine locked to extra m.
+	// The goroutine is the context in which the cgo callback will run.
+	// The sched.pc will never be returned to, but setting it to
+	// goexit makes clear to the traceback routines where
+	// the goroutine stack ends.
+	mp := allocm(nil)
+	gp := malg(4096)
+	gp.sched.pc = funcPC(goexit) + _PCQuantum
+	gp.sched.sp = gp.stack.hi
+	gp.sched.sp -= 4 * regSize // extra space in case of reads slightly beyond frame
+	gp.sched.lr = 0
+	gp.sched.g = gp
+	gp.syscallpc = gp.sched.pc
+	gp.syscallsp = gp.sched.sp
+	// malg returns status as Gidle, change to Gsyscall before adding to allg
+	// where GC will see it.
+	casgstatus(gp, _Gidle, _Gsyscall)
+	gp.m = mp
+	mp.curg = gp
+	mp.locked = _LockInternal
+	mp.lockedg = gp
+	gp.lockedm = mp
+	gp.goid = int64(xadd64(&sched.goidgen, 1))
+	if raceenabled {
+		gp.racectx = racegostart(funcPC(newextram))
+	}
+	// put on allg for garbage collector
+	allgadd(gp)
+
+	// Add m to the extra list.
+	mnext := lockextra(true)
+	mp.schedlink = mnext
+	unlockextra(mp)
+}
+
+// dropm is called when a cgo callback has called needm but is now
+// done with the callback and returning back into the non-Go thread.
+// It puts the current m back onto the extra list.
+//
+// The main expense here is the call to signalstack to release the
+// m's signal stack, and then the call to needm on the next callback
+// from this thread. It is tempting to try to save the m for next time,
+// which would eliminate both these costs, but there might not be
+// a next time: the current thread (which Go does not control) might exit.
+// If we saved the m for that thread, there would be an m leak each time
+// such a thread exited. Instead, we acquire and release an m on each
+// call. These should typically not be scheduling operations, just a few
+// atomics, so the cost should be small.
+//
+// TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
+// variable using pthread_key_create. Unlike the pthread keys we already use
+// on OS X, this dummy key would never be read by Go code. It would exist
+// only so that we could register at thread-exit-time destructor.
+// That destructor would put the m back onto the extra list.
+// This is purely a performance optimization. The current version,
+// in which dropm happens on each cgo call, is still correct too.
+// We may have to keep the current version on systems with cgo
+// but without pthreads, like Windows.
+func dropm() {
+	// Undo whatever initialization minit did during needm.
+	unminit()
+
+	// Clear m and g, and return m to the extra list.
+	// After the call to setmg we can only call nosplit functions.
+	mp := getg().m
+	setg(nil)
+
+	mnext := lockextra(true)
+	mp.schedlink = mnext
+	unlockextra(mp)
+}
+
+var extram uintptr
+
+// lockextra locks the extra list and returns the list head.
+// The caller must unlock the list by storing a new list head
+// to extram. If nilokay is true, then lockextra will
+// return a nil list head if that's what it finds. If nilokay is false,
+// lockextra will keep waiting until the list head is no longer nil.
+//go:nosplit
+func lockextra(nilokay bool) *m {
+	const locked = 1
+
+	for {
+		old := atomicloaduintptr(&extram)
+		if old == locked {
+			yield := osyield
+			yield()
+			continue
+		}
+		if old == 0 && !nilokay {
+			usleep(1)
+			continue
+		}
+		if casuintptr(&extram, old, locked) {
+			return (*m)(unsafe.Pointer(old))
+		}
+		yield := osyield
+		yield()
+		continue
+	}
+}
+
+//go:nosplit
+func unlockextra(mp *m) {
+	atomicstoreuintptr(&extram, uintptr(unsafe.Pointer(mp)))
+}
+
+// Create a new m.  It will start off with a call to fn, or else the scheduler.
+func _newm(fn func(), _p_ *p) {
+	mp := allocm(_p_)
+	mp.nextp = _p_
+	mp.mstartfn = *(*unsafe.Pointer)(unsafe.Pointer(&fn))
+
+	if iscgo {
+		var ts cgothreadstart
+		if _cgo_thread_start == nil {
+			gothrow("_cgo_thread_start missing")
+		}
+		ts.g = mp.g0
+		ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0]))
+		ts.fn = unsafe.Pointer(funcPC(mstart))
+		asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts))
+		return
+	}
+	newosproc(mp, unsafe.Pointer(mp.g0.stack.hi))
+}
+
+// Stops execution of the current m until new work is available.
+// Returns with acquired P.
+func stopm() {
+	_g_ := getg()
+
+	if _g_.m.locks != 0 {
+		gothrow("stopm holding locks")
+	}
+	if _g_.m.p != nil {
+		gothrow("stopm holding p")
+	}
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		xadd(&sched.nmspinning, -1)
+	}
+
+retry:
+	lock(&sched.lock)
+	mput(_g_.m)
+	unlock(&sched.lock)
+	notesleep(&_g_.m.park)
+	noteclear(&_g_.m.park)
+	if _g_.m.helpgc != 0 {
+		gchelper()
+		_g_.m.helpgc = 0
+		_g_.m.mcache = nil
+		goto retry
+	}
+	acquirep(_g_.m.nextp)
+	_g_.m.nextp = nil
+}
+
+func mspinning() {
+	getg().m.spinning = true
+}
+
+// Schedules some M to run the p (creates an M if necessary).
+// If p==nil, tries to get an idle P, if no idle P's does nothing.
+func startm(_p_ *p, spinning bool) {
+	lock(&sched.lock)
+	if _p_ == nil {
+		_p_ = pidleget()
+		if _p_ == nil {
+			unlock(&sched.lock)
+			if spinning {
+				xadd(&sched.nmspinning, -1)
+			}
+			return
+		}
+	}
+	mp := mget()
+	unlock(&sched.lock)
+	if mp == nil {
+		var fn func()
+		if spinning {
+			fn = mspinning
+		}
+		_newm(fn, _p_)
+		return
+	}
+	if mp.spinning {
+		gothrow("startm: m is spinning")
+	}
+	if mp.nextp != nil {
+		gothrow("startm: m has p")
+	}
+	mp.spinning = spinning
+	mp.nextp = _p_
+	notewakeup(&mp.park)
+}
+
+// Hands off P from syscall or locked M.
+func handoffp(_p_ *p) {
+	// if it has local work, start it straight away
+	if _p_.runqhead != _p_.runqtail || sched.runqsize != 0 {
+		startm(_p_, false)
+		return
+	}
+	// no local work, check that there are no spinning/idle M's,
+	// otherwise our help is not required
+	if atomicload(&sched.nmspinning)+atomicload(&sched.npidle) == 0 && cas(&sched.nmspinning, 0, 1) { // TODO: fast atomic
+		startm(_p_, true)
+		return
+	}
+	lock(&sched.lock)
+	if sched.gcwaiting != 0 {
+		_p_.status = _Pgcstop
+		sched.stopwait--
+		if sched.stopwait == 0 {
+			notewakeup(&sched.stopnote)
+		}
+		unlock(&sched.lock)
+		return
+	}
+	if sched.runqsize != 0 {
+		unlock(&sched.lock)
+		startm(_p_, false)
+		return
+	}
+	// If this is the last running P and nobody is polling network,
+	// need to wakeup another M to poll network.
+	if sched.npidle == uint32(gomaxprocs-1) && atomicload64(&sched.lastpoll) != 0 {
+		unlock(&sched.lock)
+		startm(_p_, false)
+		return
+	}
+	pidleput(_p_)
+	unlock(&sched.lock)
+}
+
+// Tries to add one more P to execute G's.
+// Called when a G is made runnable (newproc, ready).
+func wakep() {
+	// be conservative about spinning threads
+	if !cas(&sched.nmspinning, 0, 1) {
+		return
+	}
+	startm(nil, true)
+}
+
+// Stops execution of the current m that is locked to a g until the g is runnable again.
+// Returns with acquired P.
+func stoplockedm() {
+	_g_ := getg()
+
+	if _g_.m.lockedg == nil || _g_.m.lockedg.lockedm != _g_.m {
+		gothrow("stoplockedm: inconsistent locking")
+	}
+	if _g_.m.p != nil {
+		// Schedule another M to run this p.
+		_p_ := releasep()
+		handoffp(_p_)
+	}
+	incidlelocked(1)
+	// Wait until another thread schedules lockedg again.
+	notesleep(&_g_.m.park)
+	noteclear(&_g_.m.park)
+	status := readgstatus(_g_.m.lockedg)
+	if status&^_Gscan != _Grunnable {
+		print("runtime:stoplockedm: g is not Grunnable or Gscanrunnable\n")
+		dumpgstatus(_g_)
+		gothrow("stoplockedm: not runnable")
+	}
+	acquirep(_g_.m.nextp)
+	_g_.m.nextp = nil
+}
+
+// Schedules the locked m to run the locked gp.
+func startlockedm(gp *g) {
+	_g_ := getg()
+
+	mp := gp.lockedm
+	if mp == _g_.m {
+		gothrow("startlockedm: locked to me")
+	}
+	if mp.nextp != nil {
+		gothrow("startlockedm: m has p")
+	}
+	// directly handoff current P to the locked m
+	incidlelocked(-1)
+	_p_ := releasep()
+	mp.nextp = _p_
+	notewakeup(&mp.park)
+	stopm()
+}
+
+// Stops the current m for stoptheworld.
+// Returns when the world is restarted.
+func gcstopm() {
+	_g_ := getg()
+
+	if sched.gcwaiting == 0 {
+		gothrow("gcstopm: not waiting for gc")
+	}
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		xadd(&sched.nmspinning, -1)
+	}
+	_p_ := releasep()
+	lock(&sched.lock)
+	_p_.status = _Pgcstop
+	sched.stopwait--
+	if sched.stopwait == 0 {
+		notewakeup(&sched.stopnote)
+	}
+	unlock(&sched.lock)
+	stopm()
+}
+
+// Schedules gp to run on the current M.
+// Never returns.
+func execute(gp *g) {
+	_g_ := getg()
+
+	casgstatus(gp, _Grunnable, _Grunning)
+	gp.waitsince = 0
+	gp.preempt = false
+	gp.stackguard0 = gp.stack.lo + _StackGuard
+	_g_.m.p.schedtick++
+	_g_.m.curg = gp
+	gp.m = _g_.m
+
+	// Check whether the profiler needs to be turned on or off.
+	hz := sched.profilehz
+	if _g_.m.profilehz != hz {
+		resetcpuprofiler(hz)
+	}
+
+	gogo(&gp.sched)
+}
+
+// Finds a runnable goroutine to execute.
+// Tries to steal from other P's, get g from global queue, poll network.
+func findrunnable() *g {
+	_g_ := getg()
+
+top:
+	if sched.gcwaiting != 0 {
+		gcstopm()
+		goto top
+	}
+	if fingwait && fingwake {
+		if gp := wakefing(); gp != nil {
+			ready(gp)
+		}
+	}
+
+	// local runq
+	if gp := runqget(_g_.m.p); gp != nil {
+		return gp
+	}
+
+	// global runq
+	if sched.runqsize != 0 {
+		lock(&sched.lock)
+		gp := globrunqget(_g_.m.p, 0)
+		unlock(&sched.lock)
+		if gp != nil {
+			return gp
+		}
+	}
+
+	// poll network - returns list of goroutines
+	if gp := netpoll(false); gp != nil { // non-blocking
+		injectglist(gp.schedlink)
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		return gp
+	}
+
+	// If number of spinning M's >= number of busy P's, block.
+	// This is necessary to prevent excessive CPU consumption
+	// when GOMAXPROCS>>1 but the program parallelism is low.
+	if !_g_.m.spinning && 2*atomicload(&sched.nmspinning) >= uint32(gomaxprocs)-atomicload(&sched.npidle) { // TODO: fast atomic
+		goto stop
+	}
+	if !_g_.m.spinning {
+		_g_.m.spinning = true
+		xadd(&sched.nmspinning, 1)
+	}
+	// random steal from other P's
+	for i := 0; i < int(2*gomaxprocs); i++ {
+		if sched.gcwaiting != 0 {
+			goto top
+		}
+		_p_ := allp[fastrand1()%uint32(gomaxprocs)]
+		var gp *g
+		if _p_ == _g_.m.p {
+			gp = runqget(_p_)
+		} else {
+			gp = runqsteal(_g_.m.p, _p_)
+		}
+		if gp != nil {
+			return gp
+		}
+	}
+stop:
+
+	// return P and block
+	lock(&sched.lock)
+	if sched.gcwaiting != 0 {
+		unlock(&sched.lock)
+		goto top
+	}
+	if sched.runqsize != 0 {
+		gp := globrunqget(_g_.m.p, 0)
+		unlock(&sched.lock)
+		return gp
+	}
+	_p_ := releasep()
+	pidleput(_p_)
+	unlock(&sched.lock)
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		xadd(&sched.nmspinning, -1)
+	}
+
+	// check all runqueues once again
+	for i := 0; i < int(gomaxprocs); i++ {
+		_p_ := allp[i]
+		if _p_ != nil && _p_.runqhead != _p_.runqtail {
+			lock(&sched.lock)
+			_p_ = pidleget()
+			unlock(&sched.lock)
+			if _p_ != nil {
+				acquirep(_p_)
+				goto top
+			}
+			break
+		}
+	}
+
+	// poll network
+	if xchg64(&sched.lastpoll, 0) != 0 {
+		if _g_.m.p != nil {
+			gothrow("findrunnable: netpoll with p")
+		}
+		if _g_.m.spinning {
+			gothrow("findrunnable: netpoll with spinning")
+		}
+		gp := netpoll(true) // block until new work is available
+		atomicstore64(&sched.lastpoll, uint64(nanotime()))
+		if gp != nil {
+			lock(&sched.lock)
+			_p_ = pidleget()
+			unlock(&sched.lock)
+			if _p_ != nil {
+				acquirep(_p_)
+				injectglist(gp.schedlink)
+				casgstatus(gp, _Gwaiting, _Grunnable)
+				return gp
+			}
+			injectglist(gp)
+		}
+	}
+	stopm()
+	goto top
+}
+
+func resetspinning() {
+	_g_ := getg()
+
+	var nmspinning uint32
+	if _g_.m.spinning {
+		_g_.m.spinning = false
+		nmspinning = xadd(&sched.nmspinning, -1)
+		if nmspinning < 0 {
+			gothrow("findrunnable: negative nmspinning")
+		}
+	} else {
+		nmspinning = atomicload(&sched.nmspinning)
+	}
+
+	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
+	// so see if we need to wakeup another P here.
+	if nmspinning == 0 && atomicload(&sched.npidle) > 0 {
+		wakep()
+	}
+}
+
+// Injects the list of runnable G's into the scheduler.
+// Can run concurrently with GC.
+func injectglist(glist *g) {
+	if glist == nil {
+		return
+	}
+	lock(&sched.lock)
+	var n int
+	for n = 0; glist != nil; n++ {
+		gp := glist
+		glist = gp.schedlink
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		globrunqput(gp)
+	}
+	unlock(&sched.lock)
+	for ; n != 0 && sched.npidle != 0; n-- {
+		startm(nil, false)
+	}
+}
+
+// One round of scheduler: find a runnable goroutine and execute it.
+// Never returns.
+func schedule() {
+	_g_ := getg()
+
+	if _g_.m.locks != 0 {
+		gothrow("schedule: holding locks")
+	}
+
+	if _g_.m.lockedg != nil {
+		stoplockedm()
+		execute(_g_.m.lockedg) // Never returns.
+	}
+
+top:
+	if sched.gcwaiting != 0 {
+		gcstopm()
+		goto top
+	}
+
+	var gp *g
+	// Check the global runnable queue once in a while to ensure fairness.
+	// Otherwise two goroutines can completely occupy the local runqueue
+	// by constantly respawning each other.
+	tick := _g_.m.p.schedtick
+	// This is a fancy way to say tick%61==0,
+	// it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
+	if uint64(tick)-((uint64(tick)*0x4325c53f)>>36)*61 == 0 && sched.runqsize > 0 {
+		lock(&sched.lock)
+		gp = globrunqget(_g_.m.p, 1)
+		unlock(&sched.lock)
+		if gp != nil {
+			resetspinning()
+		}
+	}
+	if gp == nil {
+		gp = runqget(_g_.m.p)
+		if gp != nil && _g_.m.spinning {
+			gothrow("schedule: spinning with local work")
+		}
+	}
+	if gp == nil {
+		gp = findrunnable() // blocks until work is available
+		resetspinning()
+	}
+
+	if gp.lockedm != nil {
+		// Hands off own p to the locked m,
+		// then blocks waiting for a new p.
+		startlockedm(gp)
+		goto top
+	}
+
+	execute(gp)
+}
+
+// dropg removes the association between m and the current goroutine m->curg (gp for short).
+// Typically a caller sets gp's status away from Grunning and then
+// immediately calls dropg to finish the job. The caller is also responsible
+// for arranging that gp will be restarted using ready at an
+// appropriate time. After calling dropg and arranging for gp to be
+// readied later, the caller can do other work but eventually should
+// call schedule to restart the scheduling of goroutines on this m.
+func dropg() {
+	_g_ := getg()
+
+	if _g_.m.lockedg == nil {
+		_g_.m.curg.m = nil
+		_g_.m.curg = nil
+	}
+}
+
+// Puts the current goroutine into a waiting state and calls unlockf.
+// If unlockf returns false, the goroutine is resumed.
+func park(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason string) {
+	_g_ := getg()
+
+	_g_.m.waitlock = lock
+	_g_.m.waitunlockf = *(*unsafe.Pointer)(unsafe.Pointer(&unlockf))
+	_g_.waitreason = reason
+	mcall(park_m)
+}
+
+func parkunlock_c(gp *g, lock unsafe.Pointer) bool {
+	unlock((*mutex)(lock))
+	return true
+}
+
+// Puts the current goroutine into a waiting state and unlocks the lock.
+// The goroutine can be made runnable again by calling ready(gp).
+func parkunlock(lock *mutex, reason string) {
+	park(parkunlock_c, unsafe.Pointer(lock), reason)
+}
+
+// park continuation on g0.
+func park_m(gp *g) {
+	_g_ := getg()
+
+	casgstatus(gp, _Grunning, _Gwaiting)
+	dropg()
+
+	if _g_.m.waitunlockf != nil {
+		fn := *(*func(*g, unsafe.Pointer) bool)(unsafe.Pointer(&_g_.m.waitunlockf))
+		ok := fn(gp, _g_.m.waitlock)
+		_g_.m.waitunlockf = nil
+		_g_.m.waitlock = nil
+		if !ok {
+			casgstatus(gp, _Gwaiting, _Grunnable)
+			execute(gp) // Schedule it back, never returns.
+		}
+	}
+	schedule()
+}
+
+// Gosched continuation on g0.
+func gosched_m(gp *g) {
+	status := readgstatus(gp)
+	if status&^_Gscan != _Grunning {
+		dumpgstatus(gp)
+		gothrow("bad g status")
+	}
+	casgstatus(gp, _Grunning, _Grunnable)
+	dropg()
+	lock(&sched.lock)
+	globrunqput(gp)
+	unlock(&sched.lock)
+
+	schedule()
+}
+
+// Finishes execution of the current goroutine.
+// Must be NOSPLIT because it is called from Go. (TODO - probably not anymore)
+//go:nosplit
+func goexit1() {
+	if raceenabled {
+		racegoend()
+	}
+	mcall(goexit0)
+}
+
+// goexit continuation on g0.
+func goexit0(gp *g) {
+	_g_ := getg()
+
+	casgstatus(gp, _Grunning, _Gdead)
+	gp.m = nil
+	gp.lockedm = nil
+	_g_.m.lockedg = nil
+	gp.paniconfault = false
+	gp._defer = nil // should be true already but just in case.
+	gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data.
+	gp.writebuf = nil
+	gp.waitreason = ""
+	gp.param = nil
+
+	dropg()
+
+	if _g_.m.locked&^_LockExternal != 0 {
+		print("invalid m->locked = ", _g_.m.locked, "\n")
+		gothrow("internal lockOSThread error")
+	}
+	_g_.m.locked = 0
+	gfput(_g_.m.p, gp)
+	schedule()
+}
+
+//go:nosplit
+func save(pc, sp uintptr) {
+	_g_ := getg()
+
+	_g_.sched.pc = pc
+	_g_.sched.sp = sp
+	_g_.sched.lr = 0
+	_g_.sched.ret = 0
+	_g_.sched.ctxt = nil
+	// write as uintptr to avoid write barrier, which will smash _g_.sched.
+	*(*uintptr)(unsafe.Pointer(&_g_.sched.g)) = uintptr(unsafe.Pointer(_g_))
+}
+
+// The goroutine g is about to enter a system call.
+// Record that it's not using the cpu anymore.
+// This is called only from the go syscall library and cgocall,
+// not from the low-level system calls used by the
+//
+// Entersyscall cannot split the stack: the gosave must
+// make g->sched refer to the caller's stack segment, because
+// entersyscall is going to return immediately after.
+//
+// Nothing entersyscall calls can split the stack either.
+// We cannot safely move the stack during an active call to syscall,
+// because we do not know which of the uintptr arguments are
+// really pointers (back into the stack).
+// In practice, this means that we make the fast path run through
+// entersyscall doing no-split things, and the slow path has to use systemstack
+// to run bigger things on the system stack.
+//
+// reentersyscall is the entry point used by cgo callbacks, where explicitly
+// saved SP and PC are restored. This is needed when exitsyscall will be called
+// from a function further up in the call stack than the parent, as g->syscallsp
+// must always point to a valid stack frame. entersyscall below is the normal
+// entry point for syscalls, which obtains the SP and PC from the caller.
+//go:nosplit
+func reentersyscall(pc, sp uintptr) {
+	_g_ := getg()
+
+	// Disable preemption because during this function g is in Gsyscall status,
+	// but can have inconsistent g->sched, do not let GC observe it.
+	_g_.m.locks++
+
+	// Entersyscall must not call any function that might split/grow the stack.
+	// (See details in comment above.)
+	// Catch calls that might, by replacing the stack guard with something that
+	// will trip any stack check and leaving a flag to tell newstack to die.
+	_g_.stackguard0 = stackPreempt
+	_g_.throwsplit = true
+
+	// Leave SP around for GC and traceback.
+	save(pc, sp)
+	_g_.syscallsp = sp
+	_g_.syscallpc = pc
+	casgstatus(_g_, _Grunning, _Gsyscall)
+	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+		systemstack(func() {
+			print("entersyscall inconsistent ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+			gothrow("entersyscall")
+		})
+	}
+
+	if atomicload(&sched.sysmonwait) != 0 { // TODO: fast atomic
+		systemstack(entersyscall_sysmon)
+		save(pc, sp)
+	}
+
+	_g_.m.mcache = nil
+	_g_.m.p.m = nil
+	atomicstore(&_g_.m.p.status, _Psyscall)
+	if sched.gcwaiting != 0 {
+		systemstack(entersyscall_gcwait)
+		save(pc, sp)
+	}
+
+	// Goroutines must not split stacks in Gsyscall status (it would corrupt g->sched).
+	// We set _StackGuard to StackPreempt so that first split stack check calls morestack.
+	// Morestack detects this case and throws.
+	_g_.stackguard0 = stackPreempt
+	_g_.m.locks--
+}
+
+// Standard syscall entry used by the go syscall library and normal cgo calls.
+//go:nosplit
+func entersyscall(dummy int32) {
+	reentersyscall(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+}
+
+func entersyscall_sysmon() {
+	lock(&sched.lock)
+	if atomicload(&sched.sysmonwait) != 0 {
+		atomicstore(&sched.sysmonwait, 0)
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+}
+
+func entersyscall_gcwait() {
+	_g_ := getg()
+
+	lock(&sched.lock)
+	if sched.stopwait > 0 && cas(&_g_.m.p.status, _Psyscall, _Pgcstop) {
+		if sched.stopwait--; sched.stopwait == 0 {
+			notewakeup(&sched.stopnote)
+		}
+	}
+	unlock(&sched.lock)
+}
+
+// The same as entersyscall(), but with a hint that the syscall is blocking.
+//go:nosplit
+func entersyscallblock(dummy int32) {
+	_g_ := getg()
+
+	_g_.m.locks++ // see comment in entersyscall
+	_g_.throwsplit = true
+	_g_.stackguard0 = stackPreempt // see comment in entersyscall
+
+	// Leave SP around for GC and traceback.
+	pc := getcallerpc(unsafe.Pointer(&dummy))
+	sp := getcallersp(unsafe.Pointer(&dummy))
+	save(pc, sp)
+	_g_.syscallsp = _g_.sched.sp
+	_g_.syscallpc = _g_.sched.pc
+	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+		sp1 := sp
+		sp2 := _g_.sched.sp
+		sp3 := _g_.syscallsp
+		systemstack(func() {
+			print("entersyscallblock inconsistent ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+			gothrow("entersyscallblock")
+		})
+	}
+	casgstatus(_g_, _Grunning, _Gsyscall)
+	if _g_.syscallsp < _g_.stack.lo || _g_.stack.hi < _g_.syscallsp {
+		systemstack(func() {
+			print("entersyscallblock inconsistent ", hex(sp), " ", hex(_g_.sched.sp), " ", hex(_g_.syscallsp), " [", hex(_g_.stack.lo), ",", hex(_g_.stack.hi), "]\n")
+			gothrow("entersyscallblock")
+		})
+	}
+
+	systemstack(entersyscallblock_handoff)
+
+	// Resave for traceback during blocked call.
+	save(getcallerpc(unsafe.Pointer(&dummy)), getcallersp(unsafe.Pointer(&dummy)))
+
+	_g_.m.locks--
+}
+
+func entersyscallblock_handoff() {
+	handoffp(releasep())
+}
+
+// The goroutine g exited its system call.
+// Arrange for it to run on a cpu again.
+// This is called only from the go syscall library, not
+// from the low-level system calls used by the
+//go:nosplit
+func exitsyscall(dummy int32) {
+	_g_ := getg()
+
+	_g_.m.locks++ // see comment in entersyscall
+	if getcallersp(unsafe.Pointer(&dummy)) > _g_.syscallsp {
+		gothrow("exitsyscall: syscall frame is no longer valid")
+	}
+
+	_g_.waitsince = 0
+	if exitsyscallfast() {
+		if _g_.m.mcache == nil {
+			gothrow("lost mcache")
+		}
+		// There's a cpu for us, so we can run.
+		_g_.m.p.syscalltick++
+		// We need to cas the status and scan before resuming...
+		casgstatus(_g_, _Gsyscall, _Grunning)
+
+		// Garbage collector isn't running (since we are),
+		// so okay to clear syscallsp.
+		_g_.syscallsp = 0
+		_g_.m.locks--
+		if _g_.preempt {
+			// restore the preemption request in case we've cleared it in newstack
+			_g_.stackguard0 = stackPreempt
+		} else {
+			// otherwise restore the real _StackGuard, we've spoiled it in entersyscall/entersyscallblock
+			_g_.stackguard0 = _g_.stack.lo + _StackGuard
+		}
+		_g_.throwsplit = false
+		return
+	}
+
+	_g_.m.locks--
+
+	// Call the scheduler.
+	mcall(exitsyscall0)
+
+	if _g_.m.mcache == nil {
+		gothrow("lost mcache")
+	}
+
+	// Scheduler returned, so we're allowed to run now.
+	// Delete the syscallsp information that we left for
+	// the garbage collector during the system call.
+	// Must wait until now because until gosched returns
+	// we don't know for sure that the garbage collector
+	// is not running.
+	_g_.syscallsp = 0
+	_g_.m.p.syscalltick++
+	_g_.throwsplit = false
+}
+
+//go:nosplit
+func exitsyscallfast() bool {
+	_g_ := getg()
+
+	// Freezetheworld sets stopwait but does not retake P's.
+	if sched.stopwait != 0 {
+		_g_.m.mcache = nil
+		_g_.m.p = nil
+		return false
+	}
+
+	// Try to re-acquire the last P.
+	if _g_.m.p != nil && _g_.m.p.status == _Psyscall && cas(&_g_.m.p.status, _Psyscall, _Prunning) {
+		// There's a cpu for us, so we can run.
+		_g_.m.mcache = _g_.m.p.mcache
+		_g_.m.p.m = _g_.m
+		return true
+	}
+
+	// Try to get any other idle P.
+	_g_.m.mcache = nil
+	_g_.m.p = nil
+	if sched.pidle != nil {
+		var ok bool
+		systemstack(func() {
+			ok = exitsyscallfast_pidle()
+		})
+		if ok {
+			return true
+		}
+	}
+	return false
+}
+
+func exitsyscallfast_pidle() bool {
+	lock(&sched.lock)
+	_p_ := pidleget()
+	if _p_ != nil && atomicload(&sched.sysmonwait) != 0 {
+		atomicstore(&sched.sysmonwait, 0)
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+	if _p_ != nil {
+		acquirep(_p_)
+		return true
+	}
+	return false
+}
+
+// exitsyscall slow path on g0.
+// Failed to acquire P, enqueue gp as runnable.
+func exitsyscall0(gp *g) {
+	_g_ := getg()
+
+	casgstatus(gp, _Gsyscall, _Grunnable)
+	dropg()
+	lock(&sched.lock)
+	_p_ := pidleget()
+	if _p_ == nil {
+		globrunqput(gp)
+	} else if atomicload(&sched.sysmonwait) != 0 {
+		atomicstore(&sched.sysmonwait, 0)
+		notewakeup(&sched.sysmonnote)
+	}
+	unlock(&sched.lock)
+	if _p_ != nil {
+		acquirep(_p_)
+		execute(gp) // Never returns.
+	}
+	if _g_.m.lockedg != nil {
+		// Wait until another thread schedules gp and so m again.
+		stoplockedm()
+		execute(gp) // Never returns.
+	}
+	stopm()
+	schedule() // Never returns.
+}
+
+func beforefork() {
+	gp := getg().m.curg
+
+	// Fork can hang if preempted with signals frequently enough (see issue 5517).
+	// Ensure that we stay on the same M where we disable profiling.
+	gp.m.locks++
+	if gp.m.profilehz != 0 {
+		resetcpuprofiler(0)
+	}
+
+	// This function is called before fork in syscall package.
+	// Code between fork and exec must not allocate memory nor even try to grow stack.
+	// Here we spoil g->_StackGuard to reliably detect any attempts to grow stack.
+	// runtime_AfterFork will undo this in parent process, but not in child.
+	gp.stackguard0 = stackFork
+}
+
+// Called from syscall package before fork.
+//go:nosplit
+func syscall_BeforeFork() {
+	systemstack(beforefork)
+}
+
+func afterfork() {
+	gp := getg().m.curg
+
+	// See the comment in beforefork.
+	gp.stackguard0 = gp.stack.lo + _StackGuard
+
+	hz := sched.profilehz
+	if hz != 0 {
+		resetcpuprofiler(hz)
+	}
+	gp.m.locks--
+}
+
+// Called from syscall package after fork in parent.
+//go:nosplit
+func syscall_AfterFork() {
+	systemstack(afterfork)
+}
+
+// Allocate a new g, with a stack big enough for stacksize bytes.
+func malg(stacksize int32) *g {
+	newg := allocg()
+	if stacksize >= 0 {
+		stacksize = round2(_StackSystem + stacksize)
+		systemstack(func() {
+			newg.stack = stackalloc(uint32(stacksize))
+		})
+		newg.stackguard0 = newg.stack.lo + _StackGuard
+		newg.stackguard1 = ^uintptr(0)
+	}
+	return newg
+}
+
+// Create a new g running fn with siz bytes of arguments.
+// Put it on the queue of g's waiting to run.
+// The compiler turns a go statement into a call to this.
+// Cannot split the stack because it assumes that the arguments
+// are available sequentially after &fn; they would not be
+// copied if a stack split occurred.
+//go:nosplit
+func newproc(siz int32, fn *funcval) {
+	argp := add(unsafe.Pointer(&fn), ptrSize)
+	if hasLinkRegister {
+		argp = add(argp, ptrSize) // skip caller's saved LR
+	}
+
+	pc := getcallerpc(unsafe.Pointer(&siz))
+	systemstack(func() {
+		newproc1(fn, (*uint8)(argp), siz, 0, pc)
+	})
+}
+
+// Create a new g running fn with narg bytes of arguments starting
+// at argp and returning nret bytes of results.  callerpc is the
+// address of the go statement that created this.  The new g is put
+// on the queue of g's waiting to run.
+func newproc1(fn *funcval, argp *uint8, narg int32, nret int32, callerpc uintptr) *g {
+	_g_ := getg()
+
+	if fn == nil {
+		_g_.m.throwing = -1 // do not dump full stacks
+		gothrow("go of nil func value")
+	}
+	_g_.m.locks++ // disable preemption because it can be holding p in a local var
+	siz := narg + nret
+	siz = (siz + 7) &^ 7
+
+	// We could allocate a larger initial stack if necessary.
+	// Not worth it: this is almost always an error.
+	// 4*sizeof(uintreg): extra space added below
+	// sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall).
+	if siz >= _StackMin-4*regSize-regSize {
+		gothrow("newproc: function arguments too large for new goroutine")
+	}
+
+	_p_ := _g_.m.p
+	newg := gfget(_p_)
+	if newg == nil {
+		newg = malg(_StackMin)
+		casgstatus(newg, _Gidle, _Gdead)
+		allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack.
+	}
+	if newg.stack.hi == 0 {
+		gothrow("newproc1: newg missing stack")
+	}
+
+	if readgstatus(newg) != _Gdead {
+		gothrow("newproc1: new g is not Gdead")
+	}
+
+	sp := newg.stack.hi
+	sp -= 4 * regSize // extra space in case of reads slightly beyond frame
+	sp -= uintptr(siz)
+	memmove(unsafe.Pointer(sp), unsafe.Pointer(argp), uintptr(narg))
+	if hasLinkRegister {
+		// caller's LR
+		sp -= ptrSize
+		*(*unsafe.Pointer)(unsafe.Pointer(sp)) = nil
+	}
+
+	memclr(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched))
+	newg.sched.sp = sp
+	newg.sched.pc = funcPC(goexit) + _PCQuantum // +PCQuantum so that previous instruction is in same function
+	newg.sched.g = newg
+	gostartcallfn(&newg.sched, fn)
+	newg.gopc = callerpc
+	casgstatus(newg, _Gdead, _Grunnable)
+
+	if _p_.goidcache == _p_.goidcacheend {
+		// Sched.goidgen is the last allocated id,
+		// this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch].
+		// At startup sched.goidgen=0, so main goroutine receives goid=1.
+		_p_.goidcache = xadd64(&sched.goidgen, _GoidCacheBatch)
+		_p_.goidcache -= _GoidCacheBatch - 1
+		_p_.goidcacheend = _p_.goidcache + _GoidCacheBatch
+	}
+	newg.goid = int64(_p_.goidcache)
+	_p_.goidcache++
+	if raceenabled {
+		newg.racectx = racegostart(callerpc)
+	}
+	runqput(_p_, newg)
+
+	if atomicload(&sched.npidle) != 0 && atomicload(&sched.nmspinning) == 0 && unsafe.Pointer(fn.fn) != unsafe.Pointer(funcPC(main)) { // TODO: fast atomic
+		wakep()
+	}
+	_g_.m.locks--
+	if _g_.m.locks == 0 && _g_.preempt { // restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+	return newg
+}
+
+// Put on gfree list.
+// If local list is too long, transfer a batch to the global list.
+func gfput(_p_ *p, gp *g) {
+	if readgstatus(gp) != _Gdead {
+		gothrow("gfput: bad status (not Gdead)")
+	}
+
+	stksize := gp.stack.hi - gp.stack.lo
+
+	if stksize != _FixedStack {
+		// non-standard stack size - free it.
+		stackfree(gp.stack)
+		gp.stack.lo = 0
+		gp.stack.hi = 0
+		gp.stackguard0 = 0
+	}
+
+	gp.schedlink = _p_.gfree
+	_p_.gfree = gp
+	_p_.gfreecnt++
+	if _p_.gfreecnt >= 64 {
+		lock(&sched.gflock)
+		for _p_.gfreecnt >= 32 {
+			_p_.gfreecnt--
+			gp = _p_.gfree
+			_p_.gfree = gp.schedlink
+			gp.schedlink = sched.gfree
+			sched.gfree = gp
+			sched.ngfree++
+		}
+		unlock(&sched.gflock)
+	}
+}
+
+// Get from gfree list.
+// If local list is empty, grab a batch from global list.
+func gfget(_p_ *p) *g {
+retry:
+	gp := _p_.gfree
+	if gp == nil && sched.gfree != nil {
+		lock(&sched.gflock)
+		for _p_.gfreecnt < 32 && sched.gfree != nil {
+			_p_.gfreecnt++
+			gp = sched.gfree
+			sched.gfree = gp.schedlink
+			sched.ngfree--
+			gp.schedlink = _p_.gfree
+			_p_.gfree = gp
+		}
+		unlock(&sched.gflock)
+		goto retry
+	}
+	if gp != nil {
+		_p_.gfree = gp.schedlink
+		_p_.gfreecnt--
+		if gp.stack.lo == 0 {
+			// Stack was deallocated in gfput.  Allocate a new one.
+			systemstack(func() {
+				gp.stack = stackalloc(_FixedStack)
+			})
+			gp.stackguard0 = gp.stack.lo + _StackGuard
+		} else {
+			if raceenabled {
+				racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo)
+			}
+		}
+	}
+	return gp
+}
+
+// Purge all cached G's from gfree list to the global list.
+func gfpurge(_p_ *p) {
+	lock(&sched.gflock)
+	for _p_.gfreecnt != 0 {
+		_p_.gfreecnt--
+		gp := _p_.gfree
+		_p_.gfree = gp.schedlink
+		gp.schedlink = sched.gfree
+		sched.gfree = gp
+		sched.ngfree++
+	}
+	unlock(&sched.gflock)
+}
+
+// Breakpoint executes a breakpoint trap.
+func Breakpoint() {
+	breakpoint()
+}
+
+// dolockOSThread is called by LockOSThread and lockOSThread below
+// after they modify m.locked. Do not allow preemption during this call,
+// or else the m might be different in this function than in the caller.
+//go:nosplit
+func dolockOSThread() {
+	_g_ := getg()
+	_g_.m.lockedg = _g_
+	_g_.lockedm = _g_.m
+}
+
+//go:nosplit
+
+// LockOSThread wires the calling goroutine to its current operating system thread.
+// Until the calling goroutine exits or calls UnlockOSThread, it will always
+// execute in that thread, and no other goroutine can.
+func LockOSThread() {
+	getg().m.locked |= _LockExternal
+	dolockOSThread()
+}
+
+//go:nosplit
+func lockOSThread() {
+	getg().m.locked += _LockInternal
+	dolockOSThread()
+}
+
+// dounlockOSThread is called by UnlockOSThread and unlockOSThread below
+// after they update m->locked. Do not allow preemption during this call,
+// or else the m might be in different in this function than in the caller.
+//go:nosplit
+func dounlockOSThread() {
+	_g_ := getg()
+	if _g_.m.locked != 0 {
+		return
+	}
+	_g_.m.lockedg = nil
+	_g_.lockedm = nil
+}
+
+//go:nosplit
+
+// UnlockOSThread unwires the calling goroutine from its fixed operating system thread.
+// If the calling goroutine has not called LockOSThread, UnlockOSThread is a no-op.
+func UnlockOSThread() {
+	getg().m.locked &^= _LockExternal
+	dounlockOSThread()
+}
+
+//go:nosplit
+func unlockOSThread() {
+	_g_ := getg()
+	if _g_.m.locked < _LockInternal {
+		systemstack(badunlockosthread)
+	}
+	_g_.m.locked -= _LockInternal
+	dounlockOSThread()
+}
+
+func badunlockosthread() {
+	gothrow("runtime: internal error: misuse of lockOSThread/unlockOSThread")
+}
+
+func gcount() int32 {
+	n := int32(allglen) - sched.ngfree
+	for i := 0; ; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			break
+		}
+		n -= _p_.gfreecnt
+	}
+
+	// All these variables can be changed concurrently, so the result can be inconsistent.
+	// But at least the current goroutine is running.
+	if n < 1 {
+		n = 1
+	}
+	return n
+}
+
+func mcount() int32 {
+	return sched.mcount
+}
+
+var prof struct {
+	lock uint32
+	hz   int32
+}
+
+func _System()       { _System() }
+func _ExternalCode() { _ExternalCode() }
+func _GC()           { _GC() }
+
+var etext struct{}
+
+// Called if we receive a SIGPROF signal.
+func sigprof(pc *uint8, sp *uint8, lr *uint8, gp *g, mp *m) {
+	var n int32
+	var traceback bool
+	var stk [100]uintptr
+
+	if prof.hz == 0 {
+		return
+	}
+
+	// Profiling runs concurrently with GC, so it must not allocate.
+	mp.mallocing++
+
+	// Define that a "user g" is a user-created goroutine, and a "system g"
+	// is one that is m->g0 or m->gsignal. We've only made sure that we
+	// can unwind user g's, so exclude the system g's.
+	//
+	// It is not quite as easy as testing gp == m->curg (the current user g)
+	// because we might be interrupted for profiling halfway through a
+	// goroutine switch. The switch involves updating three (or four) values:
+	// g, PC, SP, and (on arm) LR. The PC must be the last to be updated,
+	// because once it gets updated the new g is running.
+	//
+	// When switching from a user g to a system g, LR is not considered live,
+	// so the update only affects g, SP, and PC. Since PC must be last, there
+	// the possible partial transitions in ordinary execution are (1) g alone is updated,
+	// (2) both g and SP are updated, and (3) SP alone is updated.
+	// If g is updated, we'll see a system g and not look closer.
+	// If SP alone is updated, we can detect the partial transition by checking
+	// whether the SP is within g's stack bounds. (We could also require that SP
+	// be changed only after g, but the stack bounds check is needed by other
+	// cases, so there is no need to impose an additional requirement.)
+	//
+	// There is one exceptional transition to a system g, not in ordinary execution.
+	// When a signal arrives, the operating system starts the signal handler running
+	// with an updated PC and SP. The g is updated last, at the beginning of the
+	// handler. There are two reasons this is okay. First, until g is updated the
+	// g and SP do not match, so the stack bounds check detects the partial transition.
+	// Second, signal handlers currently run with signals disabled, so a profiling
+	// signal cannot arrive during the handler.
+	//
+	// When switching from a system g to a user g, there are three possibilities.
+	//
+	// First, it may be that the g switch has no PC update, because the SP
+	// either corresponds to a user g throughout (as in asmcgocall)
+	// or because it has been arranged to look like a user g frame
+	// (as in cgocallback_gofunc). In this case, since the entire
+	// transition is a g+SP update, a partial transition updating just one of
+	// those will be detected by the stack bounds check.
+	//
+	// Second, when returning from a signal handler, the PC and SP updates
+	// are performed by the operating system in an atomic update, so the g
+	// update must be done before them. The stack bounds check detects
+	// the partial transition here, and (again) signal handlers run with signals
+	// disabled, so a profiling signal cannot arrive then anyway.
+	//
+	// Third, the common case: it may be that the switch updates g, SP, and PC
+	// separately, as in gogo.
+	//
+	// Because gogo is the only instance, we check whether the PC lies
+	// within that function, and if so, not ask for a traceback. This approach
+	// requires knowing the size of the gogo function, which we
+	// record in arch_*.h and check in runtime_test.go.
+	//
+	// There is another apparently viable approach, recorded here in case
+	// the "PC within gogo" check turns out not to be usable.
+	// It would be possible to delay the update of either g or SP until immediately
+	// before the PC update instruction. Then, because of the stack bounds check,
+	// the only problematic interrupt point is just before that PC update instruction,
+	// and the sigprof handler can detect that instruction and simulate stepping past
+	// it in order to reach a consistent state. On ARM, the update of g must be made
+	// in two places (in R10 and also in a TLS slot), so the delayed update would
+	// need to be the SP update. The sigprof handler must read the instruction at
+	// the current PC and if it was the known instruction (for example, JMP BX or
+	// MOV R2, PC), use that other register in place of the PC value.
+	// The biggest drawback to this solution is that it requires that we can tell
+	// whether it's safe to read from the memory pointed at by PC.
+	// In a correct program, we can test PC == nil and otherwise read,
+	// but if a profiling signal happens at the instant that a program executes
+	// a bad jump (before the program manages to handle the resulting fault)
+	// the profiling handler could fault trying to read nonexistent memory.
+	//
+	// To recap, there are no constraints on the assembly being used for the
+	// transition. We simply require that g and SP match and that the PC is not
+	// in gogo.
+	traceback = true
+	usp := uintptr(unsafe.Pointer(sp))
+	gogo := funcPC(gogo)
+	if gp == nil || gp != mp.curg ||
+		usp < gp.stack.lo || gp.stack.hi < usp ||
+		(gogo <= uintptr(unsafe.Pointer(pc)) && uintptr(unsafe.Pointer(pc)) < gogo+_RuntimeGogoBytes) {
+		traceback = false
+	}
+
+	n = 0
+	if traceback {
+		n = int32(gentraceback(uintptr(unsafe.Pointer(pc)), uintptr(unsafe.Pointer(sp)), uintptr(unsafe.Pointer(lr)), gp, 0, &stk[0], len(stk), nil, nil, _TraceTrap))
+	}
+	if !traceback || n <= 0 {
+		// Normal traceback is impossible or has failed.
+		// See if it falls into several common cases.
+		n = 0
+		if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 {
+			// Cgo, we can't unwind and symbolize arbitrary C code,
+			// so instead collect Go stack that leads to the cgo call.
+			// This is especially important on windows, since all syscalls are cgo calls.
+			n = int32(gentraceback(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, 0, &stk[0], len(stk), nil, nil, 0))
+		}
+		if GOOS == "windows" && n == 0 && mp.libcallg != nil && mp.libcallpc != 0 && mp.libcallsp != 0 {
+			// Libcall, i.e. runtime syscall on windows.
+			// Collect Go stack that leads to the call.
+			n = int32(gentraceback(mp.libcallpc, mp.libcallsp, 0, mp.libcallg, 0, &stk[0], len(stk), nil, nil, 0))
+		}
+		if n == 0 {
+			// If all of the above has failed, account it against abstract "System" or "GC".
+			n = 2
+			// "ExternalCode" is better than "etext".
+			if uintptr(unsafe.Pointer(pc)) > uintptr(unsafe.Pointer(&etext)) {
+				pc = (*uint8)(unsafe.Pointer(uintptr(funcPC(_ExternalCode) + _PCQuantum)))
+			}
+			stk[0] = uintptr(unsafe.Pointer(pc))
+			if mp.gcing != 0 || mp.helpgc != 0 {
+				stk[1] = funcPC(_GC) + _PCQuantum
+			} else {
+				stk[1] = funcPC(_System) + _PCQuantum
+			}
+		}
+	}
+
+	if prof.hz != 0 {
+		// Simple cas-lock to coordinate with setcpuprofilerate.
+		for !cas(&prof.lock, 0, 1) {
+			osyield()
+		}
+		if prof.hz != 0 {
+			cpuproftick(&stk[0], n)
+		}
+		atomicstore(&prof.lock, 0)
+	}
+	mp.mallocing--
+}
+
+// Arrange to call fn with a traceback hz times a second.
+func setcpuprofilerate_m(hz int32) {
+	// Force sane arguments.
+	if hz < 0 {
+		hz = 0
+	}
+
+	// Disable preemption, otherwise we can be rescheduled to another thread
+	// that has profiling enabled.
+	_g_ := getg()
+	_g_.m.locks++
+
+	// Stop profiler on this thread so that it is safe to lock prof.
+	// if a profiling signal came in while we had prof locked,
+	// it would deadlock.
+	resetcpuprofiler(0)
+
+	for !cas(&prof.lock, 0, 1) {
+		osyield()
+	}
+	prof.hz = hz
+	atomicstore(&prof.lock, 0)
+
+	lock(&sched.lock)
+	sched.profilehz = hz
+	unlock(&sched.lock)
+
+	if hz != 0 {
+		resetcpuprofiler(hz)
+	}
+
+	_g_.m.locks--
+}
+
+// Change number of processors.  The world is stopped, sched is locked.
+// gcworkbufs are not being modified by either the GC or
+// the write barrier code.
+func procresize(new int32) {
+	old := gomaxprocs
+	if old < 0 || old > _MaxGomaxprocs || new <= 0 || new > _MaxGomaxprocs {
+		gothrow("procresize: invalid arg")
+	}
+
+	// initialize new P's
+	for i := int32(0); i < new; i++ {
+		p := allp[i]
+		if p == nil {
+			p = newP()
+			p.id = i
+			p.status = _Pgcstop
+			atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(p))
+		}
+		if p.mcache == nil {
+			if old == 0 && i == 0 {
+				if getg().m.mcache == nil {
+					gothrow("missing mcache?")
+				}
+				p.mcache = getg().m.mcache // bootstrap
+			} else {
+				p.mcache = allocmcache()
+			}
+		}
+	}
+
+	// redistribute runnable G's evenly
+	// collect all runnable goroutines in global queue preserving FIFO order
+	// FIFO order is required to ensure fairness even during frequent GCs
+	// see http://golang.org/issue/7126
+	empty := false
+	for !empty {
+		empty = true
+		for i := int32(0); i < old; i++ {
+			p := allp[i]
+			if p.runqhead == p.runqtail {
+				continue
+			}
+			empty = false
+			// pop from tail of local queue
+			p.runqtail--
+			gp := p.runq[p.runqtail%uint32(len(p.runq))]
+			// push onto head of global queue
+			gp.schedlink = sched.runqhead
+			sched.runqhead = gp
+			if sched.runqtail == nil {
+				sched.runqtail = gp
+			}
+			sched.runqsize++
+		}
+	}
+
+	// fill local queues with at most len(p.runq)/2 goroutines
+	// start at 1 because current M already executes some G and will acquire allp[0] below,
+	// so if we have a spare G we want to put it into allp[1].
+	var _p_ p
+	for i := int32(1); i < new*int32(len(_p_.runq))/2 && sched.runqsize > 0; i++ {
+		gp := sched.runqhead
+		sched.runqhead = gp.schedlink
+		if sched.runqhead == nil {
+			sched.runqtail = nil
+		}
+		sched.runqsize--
+		runqput(allp[i%new], gp)
+	}
+
+	// free unused P's
+	for i := new; i < old; i++ {
+		p := allp[i]
+		freemcache(p.mcache)
+		p.mcache = nil
+		gfpurge(p)
+		p.status = _Pdead
+		// can't free P itself because it can be referenced by an M in syscall
+	}
+
+	_g_ := getg()
+	if _g_.m.p != nil {
+		_g_.m.p.m = nil
+	}
+	_g_.m.p = nil
+	_g_.m.mcache = nil
+	p := allp[0]
+	p.m = nil
+	p.status = _Pidle
+	acquirep(p)
+	for i := new - 1; i > 0; i-- {
+		p := allp[i]
+		p.status = _Pidle
+		pidleput(p)
+	}
+	var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32
+	atomicstore((*uint32)(unsafe.Pointer(int32p)), uint32(new))
+}
+
+// Associate p and the current m.
+func acquirep(_p_ *p) {
+	_g_ := getg()
+
+	if _g_.m.p != nil || _g_.m.mcache != nil {
+		gothrow("acquirep: already in go")
+	}
+	if _p_.m != nil || _p_.status != _Pidle {
+		id := int32(0)
+		if _p_.m != nil {
+			id = _p_.m.id
+		}
+		print("acquirep: p->m=", _p_.m, "(", id, ") p->status=", _p_.status, "\n")
+		gothrow("acquirep: invalid p state")
+	}
+	_g_.m.mcache = _p_.mcache
+	_g_.m.p = _p_
+	_p_.m = _g_.m
+	_p_.status = _Prunning
+}
+
+// Disassociate p and the current m.
+func releasep() *p {
+	_g_ := getg()
+
+	if _g_.m.p == nil || _g_.m.mcache == nil {
+		gothrow("releasep: invalid arg")
+	}
+	_p_ := _g_.m.p
+	if _p_.m != _g_.m || _p_.mcache != _g_.m.mcache || _p_.status != _Prunning {
+		print("releasep: m=", _g_.m, " m->p=", _g_.m.p, " p->m=", _p_.m, " m->mcache=", _g_.m.mcache, " p->mcache=", _p_.mcache, " p->status=", _p_.status, "\n")
+		gothrow("releasep: invalid p state")
+	}
+	_g_.m.p = nil
+	_g_.m.mcache = nil
+	_p_.m = nil
+	_p_.status = _Pidle
+	return _p_
+}
+
+func incidlelocked(v int32) {
+	lock(&sched.lock)
+	sched.nmidlelocked += v
+	if v > 0 {
+		checkdead()
+	}
+	unlock(&sched.lock)
+}
+
+// Check for deadlock situation.
+// The check is based on number of running M's, if 0 -> deadlock.
+func checkdead() {
+	// If we are dying because of a signal caught on an already idle thread,
+	// freezetheworld will cause all running threads to block.
+	// And runtime will essentially enter into deadlock state,
+	// except that there is a thread that will call exit soon.
+	if panicking > 0 {
+		return
+	}
+
+	// -1 for sysmon
+	run := sched.mcount - sched.nmidle - sched.nmidlelocked - 1
+	if run > 0 {
+		return
+	}
+	if run < 0 {
+		print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", sched.mcount, "\n")
+		gothrow("checkdead: inconsistent counts")
+	}
+
+	grunning := 0
+	lock(&allglock)
+	for i := 0; i < len(allgs); i++ {
+		gp := allgs[i]
+		if gp.issystem {
+			continue
+		}
+		s := readgstatus(gp)
+		switch s &^ _Gscan {
+		case _Gwaiting:
+			grunning++
+		case _Grunnable,
+			_Grunning,
+			_Gsyscall:
+			unlock(&allglock)
+			print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n")
+			gothrow("checkdead: runnable g")
+		}
+	}
+	unlock(&allglock)
+	if grunning == 0 { // possible if main goroutine calls runtime·Goexit()
+		gothrow("no goroutines (main called runtime.Goexit) - deadlock!")
+	}
+
+	// Maybe jump time forward for playground.
+	gp := timejump()
+	if gp != nil {
+		casgstatus(gp, _Gwaiting, _Grunnable)
+		globrunqput(gp)
+		_p_ := pidleget()
+		if _p_ == nil {
+			gothrow("checkdead: no p for timer")
+		}
+		mp := mget()
+		if mp == nil {
+			_newm(nil, _p_)
+		} else {
+			mp.nextp = _p_
+			notewakeup(&mp.park)
+		}
+		return
+	}
+
+	getg().m.throwing = -1 // do not dump full stacks
+	gothrow("all goroutines are asleep - deadlock!")
+}
+
+func sysmon() {
+	// If we go two minutes without a garbage collection, force one to run.
+	forcegcperiod := int64(2 * 60 * 1e9)
+
+	// If a heap span goes unused for 5 minutes after a garbage collection,
+	// we hand it back to the operating system.
+	scavengelimit := int64(5 * 60 * 1e9)
+
+	if debug.scavenge > 0 {
+		// Scavenge-a-lot for testing.
+		forcegcperiod = 10 * 1e6
+		scavengelimit = 20 * 1e6
+	}
+
+	lastscavenge := nanotime()
+	nscavenge := 0
+
+	// Make wake-up period small enough for the sampling to be correct.
+	maxsleep := forcegcperiod / 2
+	if scavengelimit < forcegcperiod {
+		maxsleep = scavengelimit / 2
+	}
+
+	lasttrace := int64(0)
+	idle := 0 // how many cycles in succession we had not wokeup somebody
+	delay := uint32(0)
+	for {
+		if idle == 0 { // start with 20us sleep...
+			delay = 20
+		} else if idle > 50 { // start doubling the sleep after 1ms...
+			delay *= 2
+		}
+		if delay > 10*1000 { // up to 10ms
+			delay = 10 * 1000
+		}
+		usleep(delay)
+		if debug.schedtrace <= 0 && (sched.gcwaiting != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs)) { // TODO: fast atomic
+			lock(&sched.lock)
+			if atomicload(&sched.gcwaiting) != 0 || atomicload(&sched.npidle) == uint32(gomaxprocs) {
+				atomicstore(&sched.sysmonwait, 1)
+				unlock(&sched.lock)
+				notetsleep(&sched.sysmonnote, maxsleep)
+				lock(&sched.lock)
+				atomicstore(&sched.sysmonwait, 0)
+				noteclear(&sched.sysmonnote)
+				idle = 0
+				delay = 20
+			}
+			unlock(&sched.lock)
+		}
+		// poll network if not polled for more than 10ms
+		lastpoll := int64(atomicload64(&sched.lastpoll))
+		now := nanotime()
+		unixnow := unixnanotime()
+		if lastpoll != 0 && lastpoll+10*1000*1000 < now {
+			cas64(&sched.lastpoll, uint64(lastpoll), uint64(now))
+			gp := netpoll(false) // non-blocking - returns list of goroutines
+			if gp != nil {
+				// Need to decrement number of idle locked M's
+				// (pretending that one more is running) before injectglist.
+				// Otherwise it can lead to the following situation:
+				// injectglist grabs all P's but before it starts M's to run the P's,
+				// another M returns from syscall, finishes running its G,
+				// observes that there is no work to do and no other running M's
+				// and reports deadlock.
+				incidlelocked(-1)
+				injectglist(gp)
+				incidlelocked(1)
+			}
+		}
+		// retake P's blocked in syscalls
+		// and preempt long running G's
+		if retake(now) != 0 {
+			idle = 0
+		} else {
+			idle++
+		}
+		// check if we need to force a GC
+		lastgc := int64(atomicload64(&memstats.last_gc))
+		if lastgc != 0 && unixnow-lastgc > forcegcperiod && atomicload(&forcegc.idle) != 0 {
+			lock(&forcegc.lock)
+			forcegc.idle = 0
+			forcegc.g.schedlink = nil
+			injectglist(forcegc.g)
+			unlock(&forcegc.lock)
+		}
+		// scavenge heap once in a while
+		if lastscavenge+scavengelimit/2 < now {
+			mHeap_Scavenge(int32(nscavenge), uint64(now), uint64(scavengelimit))
+			lastscavenge = now
+			nscavenge++
+		}
+		if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace*1000000) <= now {
+			lasttrace = now
+			schedtrace(debug.scheddetail > 0)
+		}
+	}
+}
+
+var pdesc [_MaxGomaxprocs]struct {
+	schedtick   uint32
+	schedwhen   int64
+	syscalltick uint32
+	syscallwhen int64
+}
+
+func retake(now int64) uint32 {
+	n := 0
+	for i := int32(0); i < gomaxprocs; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			continue
+		}
+		pd := &pdesc[i]
+		s := _p_.status
+		if s == _Psyscall {
+			// Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
+			t := int64(_p_.syscalltick)
+			if int64(pd.syscalltick) != t {
+				pd.syscalltick = uint32(t)
+				pd.syscallwhen = now
+				continue
+			}
+			// On the one hand we don't want to retake Ps if there is no other work to do,
+			// but on the other hand we want to retake them eventually
+			// because they can prevent the sysmon thread from deep sleep.
+			if _p_.runqhead == _p_.runqtail && atomicload(&sched.nmspinning)+atomicload(&sched.npidle) > 0 && pd.syscallwhen+10*1000*1000 > now {
+				continue
+			}
+			// Need to decrement number of idle locked M's
+			// (pretending that one more is running) before the CAS.
+			// Otherwise the M from which we retake can exit the syscall,
+			// increment nmidle and report deadlock.
+			incidlelocked(-1)
+			if cas(&_p_.status, s, _Pidle) {
+				n++
+				handoffp(_p_)
+			}
+			incidlelocked(1)
+		} else if s == _Prunning {
+			// Preempt G if it's running for more than 10ms.
+			t := int64(_p_.schedtick)
+			if int64(pd.schedtick) != t {
+				pd.schedtick = uint32(t)
+				pd.schedwhen = now
+				continue
+			}
+			if pd.schedwhen+10*1000*1000 > now {
+				continue
+			}
+			preemptone(_p_)
+		}
+	}
+	return uint32(n)
+}
+
+// Tell all goroutines that they have been preempted and they should stop.
+// This function is purely best-effort.  It can fail to inform a goroutine if a
+// processor just started running it.
+// No locks need to be held.
+// Returns true if preemption request was issued to at least one goroutine.
+func preemptall() bool {
+	res := false
+	for i := int32(0); i < gomaxprocs; i++ {
+		_p_ := allp[i]
+		if _p_ == nil || _p_.status != _Prunning {
+			continue
+		}
+		if preemptone(_p_) {
+			res = true
+		}
+	}
+	return res
+}
+
+// Tell the goroutine running on processor P to stop.
+// This function is purely best-effort.  It can incorrectly fail to inform the
+// goroutine.  It can send inform the wrong goroutine.  Even if it informs the
+// correct goroutine, that goroutine might ignore the request if it is
+// simultaneously executing newstack.
+// No lock needs to be held.
+// Returns true if preemption request was issued.
+// The actual preemption will happen at some point in the future
+// and will be indicated by the gp->status no longer being
+// Grunning
+func preemptone(_p_ *p) bool {
+	mp := _p_.m
+	if mp == nil || mp == getg().m {
+		return false
+	}
+	gp := mp.curg
+	if gp == nil || gp == mp.g0 {
+		return false
+	}
+
+	gp.preempt = true
+
+	// Every call in a go routine checks for stack overflow by
+	// comparing the current stack pointer to gp->stackguard0.
+	// Setting gp->stackguard0 to StackPreempt folds
+	// preemption into the normal stack overflow check.
+	gp.stackguard0 = stackPreempt
+	return true
+}
+
+var starttime int64
+
+func schedtrace(detailed bool) {
+	now := nanotime()
+	if starttime == 0 {
+		starttime = now
+	}
+
+	lock(&sched.lock)
+	print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle, " threads=", sched.mcount, " spinningthreads=", sched.nmspinning, " idlethreads=", sched.nmidle, " runqueue=", sched.runqsize)
+	if detailed {
+		print(" gcwaiting=", sched.gcwaiting, " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait, "\n")
+	}
+	// We must be careful while reading data from P's, M's and G's.
+	// Even if we hold schedlock, most data can be changed concurrently.
+	// E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
+	for i := int32(0); i < gomaxprocs; i++ {
+		_p_ := allp[i]
+		if _p_ == nil {
+			continue
+		}
+		mp := _p_.m
+		h := atomicload(&_p_.runqhead)
+		t := atomicload(&_p_.runqtail)
+		if detailed {
+			id := int32(-1)
+			if mp != nil {
+				id = mp.id
+			}
+			print("  P", i, ": status=", _p_.status, " schedtick=", _p_.schedtick, " syscalltick=", _p_.syscalltick, " m=", id, " runqsize=", t-h, " gfreecnt=", _p_.gfreecnt, "\n")
+		} else {
+			// In non-detailed mode format lengths of per-P run queues as:
+			// [len1 len2 len3 len4]
+			print(" ")
+			if i == 0 {
+				print("[")
+			}
+			print(t - h)
+			if i == gomaxprocs-1 {
+				print("]\n")
+			}
+		}
+	}
+
+	if !detailed {
+		unlock(&sched.lock)
+		return
+	}
+
+	for mp := allm; mp != nil; mp = mp.alllink {
+		_p_ := mp.p
+		gp := mp.curg
+		lockedg := mp.lockedg
+		id1 := int32(-1)
+		if _p_ != nil {
+			id1 = _p_.id
+		}
+		id2 := int64(-1)
+		if gp != nil {
+			id2 = gp.goid
+		}
+		id3 := int64(-1)
+		if lockedg != nil {
+			id3 = lockedg.goid
+		}
+		print("  M", mp.id, ": p=", id1, " curg=", id2, " mallocing=", mp.mallocing, " throwing=", mp.throwing, " gcing=", mp.gcing, ""+" locks=", mp.locks, " dying=", mp.dying, " helpgc=", mp.helpgc, " spinning=", mp.spinning, " blocked=", getg().m.blocked, " lockedg=", id3, "\n")
+	}
+
+	lock(&allglock)
+	for gi := 0; gi < len(allgs); gi++ {
+		gp := allgs[gi]
+		mp := gp.m
+		lockedm := gp.lockedm
+		id1 := int32(-1)
+		if mp != nil {
+			id1 = mp.id
+		}
+		id2 := int32(-1)
+		if lockedm != nil {
+			id2 = lockedm.id
+		}
+		print("  G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason, ") m=", id1, " lockedm=", id2, "\n")
+	}
+	unlock(&allglock)
+	unlock(&sched.lock)
+}
+
+// Put mp on midle list.
+// Sched must be locked.
+func mput(mp *m) {
+	mp.schedlink = sched.midle
+	sched.midle = mp
+	sched.nmidle++
+	checkdead()
+}
+
+// Try to get an m from midle list.
+// Sched must be locked.
+func mget() *m {
+	mp := sched.midle
+	if mp != nil {
+		sched.midle = mp.schedlink
+		sched.nmidle--
+	}
+	return mp
+}
+
+// Put gp on the global runnable queue.
+// Sched must be locked.
+func globrunqput(gp *g) {
+	gp.schedlink = nil
+	if sched.runqtail != nil {
+		sched.runqtail.schedlink = gp
+	} else {
+		sched.runqhead = gp
+	}
+	sched.runqtail = gp
+	sched.runqsize++
+}
+
+// Put a batch of runnable goroutines on the global runnable queue.
+// Sched must be locked.
+func globrunqputbatch(ghead *g, gtail *g, n int32) {
+	gtail.schedlink = nil
+	if sched.runqtail != nil {
+		sched.runqtail.schedlink = ghead
+	} else {
+		sched.runqhead = ghead
+	}
+	sched.runqtail = gtail
+	sched.runqsize += n
+}
+
+// Try get a batch of G's from the global runnable queue.
+// Sched must be locked.
+func globrunqget(_p_ *p, max int32) *g {
+	if sched.runqsize == 0 {
+		return nil
+	}
+
+	n := sched.runqsize/gomaxprocs + 1
+	if n > sched.runqsize {
+		n = sched.runqsize
+	}
+	if max > 0 && n > max {
+		n = max
+	}
+	if n > int32(len(_p_.runq))/2 {
+		n = int32(len(_p_.runq)) / 2
+	}
+
+	sched.runqsize -= n
+	if sched.runqsize == 0 {
+		sched.runqtail = nil
+	}
+
+	gp := sched.runqhead
+	sched.runqhead = gp.schedlink
+	n--
+	for ; n > 0; n-- {
+		gp1 := sched.runqhead
+		sched.runqhead = gp1.schedlink
+		runqput(_p_, gp1)
+	}
+	return gp
+}
+
+// Put p to on _Pidle list.
+// Sched must be locked.
+func pidleput(_p_ *p) {
+	_p_.link = sched.pidle
+	sched.pidle = _p_
+	xadd(&sched.npidle, 1) // TODO: fast atomic
+}
+
+// Try get a p from _Pidle list.
+// Sched must be locked.
+func pidleget() *p {
+	_p_ := sched.pidle
+	if _p_ != nil {
+		sched.pidle = _p_.link
+		xadd(&sched.npidle, -1) // TODO: fast atomic
+	}
+	return _p_
+}
+
+// Try to put g on local runnable queue.
+// If it's full, put onto global queue.
+// Executed only by the owner P.
+func runqput(_p_ *p, gp *g) {
+retry:
+	h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
+	t := _p_.runqtail
+	if t-h < uint32(len(_p_.runq)) {
+		_p_.runq[t%uint32(len(_p_.runq))] = gp
+		atomicstore(&_p_.runqtail, t+1) // store-release, makes the item available for consumption
+		return
+	}
+	if runqputslow(_p_, gp, h, t) {
+		return
+	}
+	// the queue is not full, now the put above must suceed
+	goto retry
+}
+
+// Put g and a batch of work from local runnable queue on global queue.
+// Executed only by the owner P.
+func runqputslow(_p_ *p, gp *g, h, t uint32) bool {
+	var batch [len(_p_.runq)/2 + 1]*g
+
+	// First, grab a batch from local queue.
+	n := t - h
+	n = n / 2
+	if n != uint32(len(_p_.runq)/2) {
+		gothrow("runqputslow: queue is not full")
+	}
+	for i := uint32(0); i < n; i++ {
+		batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
+	}
+	if !cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+		return false
+	}
+	batch[n] = gp
+
+	// Link the goroutines.
+	for i := uint32(0); i < n; i++ {
+		batch[i].schedlink = batch[i+1]
+	}
+
+	// Now put the batch on global queue.
+	lock(&sched.lock)
+	globrunqputbatch(batch[0], batch[n], int32(n+1))
+	unlock(&sched.lock)
+	return true
+}
+
+// Get g from local runnable queue.
+// Executed only by the owner P.
+func runqget(_p_ *p) *g {
+	for {
+		h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
+		t := _p_.runqtail
+		if t == h {
+			return nil
+		}
+		gp := _p_.runq[h%uint32(len(_p_.runq))]
+		if cas(&_p_.runqhead, h, h+1) { // cas-release, commits consume
+			return gp
+		}
+	}
+}
+
+// Grabs a batch of goroutines from local runnable queue.
+// batch array must be of size len(p->runq)/2. Returns number of grabbed goroutines.
+// Can be executed by any P.
+func runqgrab(_p_ *p, batch []*g) uint32 {
+	for {
+		h := atomicload(&_p_.runqhead) // load-acquire, synchronize with other consumers
+		t := atomicload(&_p_.runqtail) // load-acquire, synchronize with the producer
+		n := t - h
+		n = n - n/2
+		if n == 0 {
+			return 0
+		}
+		if n > uint32(len(_p_.runq)/2) { // read inconsistent h and t
+			continue
+		}
+		for i := uint32(0); i < n; i++ {
+			batch[i] = _p_.runq[(h+i)%uint32(len(_p_.runq))]
+		}
+		if cas(&_p_.runqhead, h, h+n) { // cas-release, commits consume
+			return n
+		}
+	}
+}
+
+// Steal half of elements from local runnable queue of p2
+// and put onto local runnable queue of p.
+// Returns one of the stolen elements (or nil if failed).
+func runqsteal(_p_, p2 *p) *g {
+	var batch [len(_p_.runq) / 2]*g
+
+	n := runqgrab(p2, batch[:])
+	if n == 0 {
+		return nil
+	}
+	n--
+	gp := batch[n]
+	if n == 0 {
+		return gp
+	}
+	h := atomicload(&_p_.runqhead) // load-acquire, synchronize with consumers
+	t := _p_.runqtail
+	if t-h+n >= uint32(len(_p_.runq)) {
+		gothrow("runqsteal: runq overflow")
+	}
+	for i := uint32(0); i < n; i++ {
+		_p_.runq[(t+i)%uint32(len(_p_.runq))] = batch[i]
+	}
+	atomicstore(&_p_.runqtail, t+n) // store-release, makes the item available for consumption
+	return gp
+}
+
+func testSchedLocalQueue() {
+	_p_ := new(p)
+	gs := make([]g, len(_p_.runq))
+	for i := 0; i < len(_p_.runq); i++ {
+		if runqget(_p_) != nil {
+			gothrow("runq is not empty initially")
+		}
+		for j := 0; j < i; j++ {
+			runqput(_p_, &gs[i])
+		}
+		for j := 0; j < i; j++ {
+			if runqget(_p_) != &gs[i] {
+				print("bad element at iter ", i, "/", j, "\n")
+				gothrow("bad element")
+			}
+		}
+		if runqget(_p_) != nil {
+			gothrow("runq is not empty afterwards")
+		}
+	}
+}
+
+func testSchedLocalQueueSteal() {
+	p1 := new(p)
+	p2 := new(p)
+	gs := make([]g, len(p1.runq))
+	for i := 0; i < len(p1.runq); i++ {
+		for j := 0; j < i; j++ {
+			gs[j].sig = 0
+			runqput(p1, &gs[j])
+		}
+		gp := runqsteal(p2, p1)
+		s := 0
+		if gp != nil {
+			s++
+			gp.sig++
+		}
+		for {
+			gp = runqget(p2)
+			if gp == nil {
+				break
+			}
+			s++
+			gp.sig++
+		}
+		for {
+			gp = runqget(p1)
+			if gp == nil {
+				break
+			}
+			gp.sig++
+		}
+		for j := 0; j < i; j++ {
+			if gs[j].sig != 1 {
+				print("bad element ", j, "(", gs[j].sig, ") at iter ", i, "\n")
+				gothrow("bad element")
+			}
+		}
+		if s != i/2 && s != i/2+1 {
+			print("bad steal ", s, ", want ", i/2, " or ", i/2+1, ", iter ", i, "\n")
+			gothrow("bad steal")
+		}
+	}
+}
+
+func setMaxThreads(in int) (out int) {
+	lock(&sched.lock)
+	out = int(sched.maxmcount)
+	sched.maxmcount = int32(in)
+	checkmcount()
+	unlock(&sched.lock)
+	return
+}
+
+var goexperiment string = "GOEXPERIMENT" // TODO: defined in zaexperiment.h
+
+func haveexperiment(name string) bool {
+	x := goexperiment
+	for x != "" {
+		xname := ""
+		i := index(x, ",")
+		if i < 0 {
+			xname, x = x, ""
+		} else {
+			xname, x = x[:i], x[i+1:]
+		}
+		if xname == name {
+			return true
+		}
+	}
+	return false
+}
+
+//go:nosplit
+func sync_procPin() int {
+	_g_ := getg()
+	mp := _g_.m
+
+	mp.locks++
+	return int(mp.p.id)
+}
+
+//go:nosplit
+func sync_procUnpin() {
+	_g_ := getg()
+	_g_.m.locks--
+}
diff --git a/src/runtime/race.c b/src/runtime/race.c
deleted file mode 100644
index 9ac73fb..0000000
--- a/src/runtime/race.c
+++ /dev/null
@@ -1,314 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Implementation of the race detector API.
-// +build race
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "race.h"
-#include "type.h"
-#include "typekind.h"
-#include "textflag.h"
-
-// Race runtime functions called via runtime·racecall.
-void __tsan_init(void);
-void __tsan_fini(void);
-void __tsan_map_shadow(void);
-void __tsan_finalizer_goroutine(void);
-void __tsan_go_start(void);
-void __tsan_go_end(void);
-void __tsan_malloc(void);
-void __tsan_acquire(void);
-void __tsan_release(void);
-void __tsan_release_merge(void);
-void __tsan_go_ignore_sync_begin(void);
-void __tsan_go_ignore_sync_end(void);
-
-// Mimic what cmd/cgo would do.
-#pragma cgo_import_static __tsan_init
-#pragma cgo_import_static __tsan_fini
-#pragma cgo_import_static __tsan_map_shadow
-#pragma cgo_import_static __tsan_finalizer_goroutine
-#pragma cgo_import_static __tsan_go_start
-#pragma cgo_import_static __tsan_go_end
-#pragma cgo_import_static __tsan_malloc
-#pragma cgo_import_static __tsan_acquire
-#pragma cgo_import_static __tsan_release
-#pragma cgo_import_static __tsan_release_merge
-#pragma cgo_import_static __tsan_go_ignore_sync_begin
-#pragma cgo_import_static __tsan_go_ignore_sync_end
-
-// These are called from race_amd64.s.
-#pragma cgo_import_static __tsan_read
-#pragma cgo_import_static __tsan_read_pc
-#pragma cgo_import_static __tsan_read_range
-#pragma cgo_import_static __tsan_write
-#pragma cgo_import_static __tsan_write_pc
-#pragma cgo_import_static __tsan_write_range
-#pragma cgo_import_static __tsan_func_enter
-#pragma cgo_import_static __tsan_func_exit
-
-#pragma cgo_import_static __tsan_go_atomic32_load
-#pragma cgo_import_static __tsan_go_atomic64_load
-#pragma cgo_import_static __tsan_go_atomic32_store
-#pragma cgo_import_static __tsan_go_atomic64_store
-#pragma cgo_import_static __tsan_go_atomic32_exchange
-#pragma cgo_import_static __tsan_go_atomic64_exchange
-#pragma cgo_import_static __tsan_go_atomic32_fetch_add
-#pragma cgo_import_static __tsan_go_atomic64_fetch_add
-#pragma cgo_import_static __tsan_go_atomic32_compare_exchange
-#pragma cgo_import_static __tsan_go_atomic64_compare_exchange
-
-extern byte runtime·noptrdata[];
-extern byte runtime·enoptrbss[];
-  
-// start/end of heap for race_amd64.s
-uintptr runtime·racearenastart;
-uintptr runtime·racearenaend;
-
-void runtime·racefuncenter(void *callpc);
-void runtime·racefuncexit(void);
-void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc);
-void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc);
-void runtime·racesymbolizethunk(void*);
-
-// racecall allows calling an arbitrary function f from C race runtime
-// with up to 4 uintptr arguments.
-void runtime·racecall(void(*f)(void), ...);
-
-// checks if the address has shadow (i.e. heap or data/bss)
-#pragma textflag NOSPLIT
-static bool
-isvalidaddr(uintptr addr)
-{
-	if(addr >= runtime·racearenastart && addr < runtime·racearenaend)
-		return true;
-	if(addr >= (uintptr)runtime·noptrdata && addr < (uintptr)runtime·enoptrbss)
-		return true;
-	return false;
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·raceinit(void)
-{
-	uintptr racectx, start, size;
-
-	// cgo is required to initialize libc, which is used by race runtime
-	if(!runtime·iscgo)
-		runtime·throw("raceinit: race build must use cgo");
-	runtime·racecall(__tsan_init, &racectx, runtime·racesymbolizethunk);
-	// Round data segment to page boundaries, because it's used in mmap().
-	start = (uintptr)runtime·noptrdata & ~(PageSize-1);
-	size = ROUND((uintptr)runtime·enoptrbss - start, PageSize);
-	runtime·racecall(__tsan_map_shadow, start, size);
-	return racectx;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racefini(void)
-{
-	runtime·racecall(__tsan_fini);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racemapshadow(void *addr, uintptr size)
-{
-	if(runtime·racearenastart == 0)
-		runtime·racearenastart = (uintptr)addr;
-	if(runtime·racearenaend < (uintptr)addr+size)
-		runtime·racearenaend = (uintptr)addr+size;
-	runtime·racecall(__tsan_map_shadow, addr, size);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racemalloc(void *p, uintptr sz)
-{
-	runtime·racecall(__tsan_malloc, p, sz);
-}
-
-#pragma textflag NOSPLIT
-uintptr
-runtime·racegostart(void *pc)
-{
-	uintptr racectx;
-	G *spawng;
-
-	if(g->m->curg != nil)
-		spawng = g->m->curg;
-	else
-		spawng = g;
-
-	runtime·racecall(__tsan_go_start, spawng->racectx, &racectx, pc);
-	return racectx;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racegoend(void)
-{
-	runtime·racecall(__tsan_go_end, g->racectx);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc)
-{
-	if(g != g->m->curg) {
-		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
-		// Not interesting.
-		return;
-	}
-	if(callpc != nil)
-		runtime·racefuncenter(callpc);
-	runtime·racewriterangepc1(addr, sz, pc);
-	if(callpc != nil)
-		runtime·racefuncexit();
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc)
-{
-	if(g != g->m->curg) {
-		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
-		// Not interesting.
-		return;
-	}
-	if(callpc != nil)
-		runtime·racefuncenter(callpc);
-	runtime·racereadrangepc1(addr, sz, pc);
-	if(callpc != nil)
-		runtime·racefuncexit();
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racewriteobjectpc(void *addr, Type *t, void *callpc, void *pc)
-{
-	uint8 kind;
-
-	kind = t->kind & KindMask;
-	if(kind == KindArray || kind == KindStruct)
-		runtime·racewriterangepc(addr, t->size, callpc, pc);
-	else
-		runtime·racewritepc(addr, callpc, pc);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racereadobjectpc(void *addr, Type *t, void *callpc, void *pc)
-{
-	uint8 kind;
-
-	kind = t->kind & KindMask;
-	if(kind == KindArray || kind == KindStruct)
-		runtime·racereadrangepc(addr, t->size, callpc, pc);
-	else
-		runtime·racereadpc(addr, callpc, pc);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·raceacquire(void *addr)
-{
-	runtime·raceacquireg(g, addr);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·raceacquireg(G *gp, void *addr)
-{
-	if(g->raceignore || !isvalidaddr((uintptr)addr))
-		return;
-	runtime·racecall(__tsan_acquire, gp->racectx, addr);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racerelease(void *addr)
-{
-	if(g->raceignore || !isvalidaddr((uintptr)addr))
-		return;
-	runtime·racereleaseg(g, addr);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racereleaseg(G *gp, void *addr)
-{
-	if(g->raceignore || !isvalidaddr((uintptr)addr))
-		return;
-	runtime·racecall(__tsan_release, gp->racectx, addr);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racereleasemerge(void *addr)
-{
-	runtime·racereleasemergeg(g, addr);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racereleasemergeg(G *gp, void *addr)
-{
-	if(g->raceignore || !isvalidaddr((uintptr)addr))
-		return;
-	runtime·racecall(__tsan_release_merge, gp->racectx, addr);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·racefingo(void)
-{
-	runtime·racecall(__tsan_finalizer_goroutine, g->racectx);
-}
-
-// func RaceAcquire(addr unsafe.Pointer)
-#pragma textflag NOSPLIT
-void
-runtime·RaceAcquire(void *addr)
-{
-	runtime·raceacquire(addr);
-}
-
-// func RaceRelease(addr unsafe.Pointer)
-#pragma textflag NOSPLIT
-void
-runtime·RaceRelease(void *addr)
-{
-	runtime·racerelease(addr);
-}
-
-// func RaceReleaseMerge(addr unsafe.Pointer)
-#pragma textflag NOSPLIT
-void
-runtime·RaceReleaseMerge(void *addr)
-{
-	runtime·racereleasemerge(addr);
-}
-
-// func RaceDisable()
-#pragma textflag NOSPLIT
-void
-runtime·RaceDisable(void)
-{
-	if(g->raceignore++ == 0)
-		runtime·racecall(__tsan_go_ignore_sync_begin, g->racectx);
-}
-
-// func RaceEnable()
-#pragma textflag NOSPLIT
-void
-runtime·RaceEnable(void)
-{
-	if(--g->raceignore == 0)
-		runtime·racecall(__tsan_go_ignore_sync_end, g->racectx);
-}
diff --git a/src/runtime/race.go b/src/runtime/race.go
index bb0ee6d..649cd72 100644
--- a/src/runtime/race.go
+++ b/src/runtime/race.go
@@ -12,18 +12,6 @@
 	"unsafe"
 )
 
-func racefini()
-
-// RaceDisable disables handling of race events in the current goroutine.
-func RaceDisable()
-
-// RaceEnable re-enables handling of race events in the current goroutine.
-func RaceEnable()
-
-func RaceAcquire(addr unsafe.Pointer)
-func RaceRelease(addr unsafe.Pointer)
-func RaceReleaseMerge(addr unsafe.Pointer)
-
 func RaceRead(addr unsafe.Pointer)
 func RaceWrite(addr unsafe.Pointer)
 func RaceReadRange(addr unsafe.Pointer, len int)
@@ -67,32 +55,6 @@
 //go:noescape
 func racewritepc(addr unsafe.Pointer, callpc, pc uintptr)
 
-//go:noescape
-func racereadrangepc(addr unsafe.Pointer, len uintptr, callpc, pc uintptr)
-
-//go:noescape
-func racewriterangepc(addr unsafe.Pointer, len uintptr, callpc, pc uintptr)
-
-//go:noescape
-func raceacquire(addr unsafe.Pointer)
-
-//go:noescape
-func racerelease(addr unsafe.Pointer)
-
-//go:noescape
-func raceacquireg(gp *g, addr unsafe.Pointer)
-
-//go:noescape
-func racereleaseg(gp *g, addr unsafe.Pointer)
-
-func racefingo()
-
-//go:noescape
-func racemalloc(p unsafe.Pointer, size uintptr)
-
-//go:noescape
-func racereleasemerge(addr unsafe.Pointer)
-
 type symbolizeContext struct {
 	pc   uintptr
 	fn   *byte
@@ -118,8 +80,8 @@
 	}
 
 	ctx.fn = funcname(f)
-	var file string
-	ctx.line = uintptr(funcline(f, ctx.pc, &file))
+	file, line := funcline(f, ctx.pc)
+	ctx.line = uintptr(line)
 	ctx.file = &bytes(file)[0] // assume NUL-terminated
 	ctx.off = ctx.pc - f.entry
 	ctx.res = 1
diff --git a/src/runtime/race.h b/src/runtime/race.h
deleted file mode 100644
index fee31e0..0000000
--- a/src/runtime/race.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Definitions related to data race detection.
-
-#ifdef RACE
-enum { raceenabled = 1 };
-#else
-enum { raceenabled = 0 };
-#endif
-
-// Initialize race detection subsystem.
-uintptr	runtime·raceinit(void);
-// Finalize race detection subsystem, does not return.
-void	runtime·racefini(void);
-
-void	runtime·racemapshadow(void *addr, uintptr size);
-void	runtime·racemalloc(void *p, uintptr sz);
-uintptr	runtime·racegostart(void *pc);
-void	runtime·racegoend(void);
-void	runtime·racewritepc(void *addr, void *callpc, void *pc);
-void	runtime·racereadpc(void *addr, void *callpc, void *pc);
-void	runtime·racewriterangepc(void *addr, uintptr sz, void *callpc, void *pc);
-void	runtime·racereadrangepc(void *addr, uintptr sz, void *callpc, void *pc);
-void	runtime·racereadobjectpc(void *addr, Type *t, void *callpc, void *pc);
-void	runtime·racewriteobjectpc(void *addr, Type *t, void *callpc, void *pc);
-void	runtime·racefingo(void);
-void	runtime·raceacquire(void *addr);
-void	runtime·raceacquireg(G *gp, void *addr);
-void	runtime·racerelease(void *addr);
-void	runtime·racereleaseg(G *gp, void *addr);
-void	runtime·racereleasemerge(void *addr);
-void	runtime·racereleasemergeg(G *gp, void *addr);
diff --git a/src/runtime/race0.go b/src/runtime/race0.go
index 5d90cc8..dadb608 100644
--- a/src/runtime/race0.go
+++ b/src/runtime/race0.go
@@ -18,7 +18,7 @@
 
 func raceReadObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr)  { gothrow("race") }
 func raceWriteObjectPC(t *_type, addr unsafe.Pointer, callerpc, pc uintptr) { gothrow("race") }
-func raceinit()                                                             { gothrow("race") }
+func raceinit() uintptr                                                     { gothrow("race"); return 0 }
 func racefini()                                                             { gothrow("race") }
 func racemapshadow(addr unsafe.Pointer, size uintptr)                       { gothrow("race") }
 func racewritepc(addr unsafe.Pointer, callerpc, pc uintptr)                 { gothrow("race") }
diff --git a/src/runtime/race1.go b/src/runtime/race1.go
new file mode 100644
index 0000000..4c58042
--- /dev/null
+++ b/src/runtime/race1.go
@@ -0,0 +1,304 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Implementation of the race detector API.
+// +build race
+
+package runtime
+
+import "unsafe"
+
+// Race runtime functions called via runtime·racecall.
+//go:linkname __tsan_init __tsan_init
+var __tsan_init byte
+
+//go:linkname __tsan_fini __tsan_fini
+var __tsan_fini byte
+
+//go:linkname __tsan_map_shadow __tsan_map_shadow
+var __tsan_map_shadow byte
+
+//go:linkname __tsan_finalizer_goroutine __tsan_finalizer_goroutine
+var __tsan_finalizer_goroutine byte
+
+//go:linkname __tsan_go_start __tsan_go_start
+var __tsan_go_start byte
+
+//go:linkname __tsan_go_end __tsan_go_end
+var __tsan_go_end byte
+
+//go:linkname __tsan_malloc __tsan_malloc
+var __tsan_malloc byte
+
+//go:linkname __tsan_acquire __tsan_acquire
+var __tsan_acquire byte
+
+//go:linkname __tsan_release __tsan_release
+var __tsan_release byte
+
+//go:linkname __tsan_release_merge __tsan_release_merge
+var __tsan_release_merge byte
+
+//go:linkname __tsan_go_ignore_sync_begin __tsan_go_ignore_sync_begin
+var __tsan_go_ignore_sync_begin byte
+
+//go:linkname __tsan_go_ignore_sync_end __tsan_go_ignore_sync_end
+var __tsan_go_ignore_sync_end byte
+
+// Mimic what cmd/cgo would do.
+//go:cgo_import_static __tsan_init
+//go:cgo_import_static __tsan_fini
+//go:cgo_import_static __tsan_map_shadow
+//go:cgo_import_static __tsan_finalizer_goroutine
+//go:cgo_import_static __tsan_go_start
+//go:cgo_import_static __tsan_go_end
+//go:cgo_import_static __tsan_malloc
+//go:cgo_import_static __tsan_acquire
+//go:cgo_import_static __tsan_release
+//go:cgo_import_static __tsan_release_merge
+//go:cgo_import_static __tsan_go_ignore_sync_begin
+//go:cgo_import_static __tsan_go_ignore_sync_end
+
+// These are called from race_amd64.s.
+//go:cgo_import_static __tsan_read
+//go:cgo_import_static __tsan_read_pc
+//go:cgo_import_static __tsan_read_range
+//go:cgo_import_static __tsan_write
+//go:cgo_import_static __tsan_write_pc
+//go:cgo_import_static __tsan_write_range
+//go:cgo_import_static __tsan_func_enter
+//go:cgo_import_static __tsan_func_exit
+
+//go:cgo_import_static __tsan_go_atomic32_load
+//go:cgo_import_static __tsan_go_atomic64_load
+//go:cgo_import_static __tsan_go_atomic32_store
+//go:cgo_import_static __tsan_go_atomic64_store
+//go:cgo_import_static __tsan_go_atomic32_exchange
+//go:cgo_import_static __tsan_go_atomic64_exchange
+//go:cgo_import_static __tsan_go_atomic32_fetch_add
+//go:cgo_import_static __tsan_go_atomic64_fetch_add
+//go:cgo_import_static __tsan_go_atomic32_compare_exchange
+//go:cgo_import_static __tsan_go_atomic64_compare_exchange
+
+// start/end of heap for race_amd64.s
+var racearenastart uintptr
+var racearenaend uintptr
+
+func racefuncenter(uintptr)
+func racefuncexit()
+func racereadrangepc1(uintptr, uintptr, uintptr)
+func racewriterangepc1(uintptr, uintptr, uintptr)
+func racesymbolizethunk(uintptr)
+
+// racecall allows calling an arbitrary function f from C race runtime
+// with up to 4 uintptr arguments.
+func racecall(*byte, uintptr, uintptr, uintptr, uintptr)
+
+// checks if the address has shadow (i.e. heap or data/bss)
+//go:nosplit
+func isvalidaddr(addr unsafe.Pointer) bool {
+	return racearenastart <= uintptr(addr) && uintptr(addr) < racearenaend ||
+		uintptr(unsafe.Pointer(&noptrdata)) <= uintptr(addr) && uintptr(addr) < uintptr(unsafe.Pointer(&enoptrbss))
+}
+
+//go:nosplit
+func raceinit() uintptr {
+	// cgo is required to initialize libc, which is used by race runtime
+	if !iscgo {
+		gothrow("raceinit: race build must use cgo")
+	}
+
+	var racectx uintptr
+	racecall(&__tsan_init, uintptr(unsafe.Pointer(&racectx)), funcPC(racesymbolizethunk), 0, 0)
+
+	// Round data segment to page boundaries, because it's used in mmap().
+	start := uintptr(unsafe.Pointer(&noptrdata)) &^ (_PageSize - 1)
+	size := round(uintptr(unsafe.Pointer(&enoptrbss))-start, _PageSize)
+	racecall(&__tsan_map_shadow, start, size, 0, 0)
+
+	return racectx
+}
+
+//go:nosplit
+func racefini() {
+	racecall(&__tsan_fini, 0, 0, 0, 0)
+}
+
+//go:nosplit
+func racemapshadow(addr unsafe.Pointer, size uintptr) {
+	if racearenastart == 0 {
+		racearenastart = uintptr(addr)
+	}
+	if racearenaend < uintptr(addr)+size {
+		racearenaend = uintptr(addr) + size
+	}
+	racecall(&__tsan_map_shadow, uintptr(addr), size, 0, 0)
+}
+
+//go:nosplit
+func racemalloc(p unsafe.Pointer, sz uintptr) {
+	racecall(&__tsan_malloc, uintptr(p), sz, 0, 0)
+}
+
+//go:nosplit
+func racegostart(pc uintptr) uintptr {
+	_g_ := getg()
+	var spawng *g
+	if _g_.m.curg != nil {
+		spawng = _g_.m.curg
+	} else {
+		spawng = _g_
+	}
+
+	var racectx uintptr
+	racecall(&__tsan_go_start, spawng.racectx, uintptr(unsafe.Pointer(&racectx)), pc, 0)
+	return racectx
+}
+
+//go:nosplit
+func racegoend() {
+	racecall(&__tsan_go_end, getg().racectx, 0, 0, 0)
+}
+
+//go:nosplit
+func racewriterangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
+	_g_ := getg()
+	if _g_ != _g_.m.curg {
+		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
+		// Not interesting.
+		return
+	}
+	if callpc != 0 {
+		racefuncenter(callpc)
+	}
+	racewriterangepc1(uintptr(addr), sz, pc)
+	if callpc != 0 {
+		racefuncexit()
+	}
+}
+
+//go:nosplit
+func racereadrangepc(addr unsafe.Pointer, sz, callpc, pc uintptr) {
+	_g_ := getg()
+	if _g_ != _g_.m.curg {
+		// The call is coming from manual instrumentation of Go code running on g0/gsignal.
+		// Not interesting.
+		return
+	}
+	if callpc != 0 {
+		racefuncenter(callpc)
+	}
+	racereadrangepc1(uintptr(addr), sz, pc)
+	if callpc != 0 {
+		racefuncexit()
+	}
+}
+
+//go:nosplit
+func racewriteobjectpc(addr unsafe.Pointer, t *_type, callpc, pc uintptr) {
+	kind := t.kind & _KindMask
+	if kind == _KindArray || kind == _KindStruct {
+		racewriterangepc(addr, t.size, callpc, pc)
+	} else {
+		racewritepc(addr, callpc, pc)
+	}
+}
+
+//go:nosplit
+func racereadobjectpc(addr unsafe.Pointer, t *_type, callpc, pc uintptr) {
+	kind := t.kind & _KindMask
+	if kind == _KindArray || kind == _KindStruct {
+		racereadrangepc(addr, t.size, callpc, pc)
+	} else {
+		racereadpc(addr, callpc, pc)
+	}
+}
+
+//go:nosplit
+func raceacquire(addr unsafe.Pointer) {
+	raceacquireg(getg(), addr)
+}
+
+//go:nosplit
+func raceacquireg(gp *g, addr unsafe.Pointer) {
+	if getg().raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racecall(&__tsan_acquire, gp.racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
+func racerelease(addr unsafe.Pointer) {
+	_g_ := getg()
+	if _g_.raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racereleaseg(_g_, addr)
+}
+
+//go:nosplit
+func racereleaseg(gp *g, addr unsafe.Pointer) {
+	if getg().raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racecall(&__tsan_release, gp.racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
+func racereleasemerge(addr unsafe.Pointer) {
+	racereleasemergeg(getg(), addr)
+}
+
+//go:nosplit
+func racereleasemergeg(gp *g, addr unsafe.Pointer) {
+	if getg().raceignore != 0 || !isvalidaddr(addr) {
+		return
+	}
+	racecall(&__tsan_release_merge, gp.racectx, uintptr(addr), 0, 0)
+}
+
+//go:nosplit
+func racefingo() {
+	racecall(&__tsan_finalizer_goroutine, getg().racectx, 0, 0, 0)
+}
+
+//go:nosplit
+
+func RaceAcquire(addr unsafe.Pointer) {
+	raceacquire(addr)
+}
+
+//go:nosplit
+
+func RaceRelease(addr unsafe.Pointer) {
+	racerelease(addr)
+}
+
+//go:nosplit
+
+func RaceReleaseMerge(addr unsafe.Pointer) {
+	racereleasemerge(addr)
+}
+
+//go:nosplit
+
+// RaceEnable re-enables handling of race events in the current goroutine.
+func RaceDisable() {
+	_g_ := getg()
+	if _g_.raceignore == 0 {
+		racecall(&__tsan_go_ignore_sync_begin, _g_.racectx, 0, 0, 0)
+	}
+	_g_.raceignore++
+}
+
+//go:nosplit
+
+// RaceDisable disables handling of race events in the current goroutine.
+func RaceEnable() {
+	_g_ := getg()
+	_g_.raceignore--
+	if _g_.raceignore == 0 {
+		racecall(&__tsan_go_ignore_sync_end, _g_.racectx, 0, 0, 0)
+	}
+}
diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s
index 15b18ff..a7f4487 100644
--- a/src/runtime/race_amd64.s
+++ b/src/runtime/race_amd64.s
@@ -4,7 +4,8 @@
 
 // +build race
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
 
diff --git a/src/runtime/rdebug.go b/src/runtime/rdebug.go
index e5e6911..f2766d7 100644
--- a/src/runtime/rdebug.go
+++ b/src/runtime/rdebug.go
@@ -10,15 +10,6 @@
 	return out
 }
 
-func setGCPercent(in int32) (out int32) {
-	mp := acquirem()
-	mp.scalararg[0] = uintptr(int(in))
-	onM(setgcpercent_m)
-	out = int32(int(mp.scalararg[0]))
-	releasem(mp)
-	return out
-}
-
 func setPanicOnFault(new bool) (old bool) {
 	mp := acquirem()
 	old = mp.curg.paniconfault
@@ -26,12 +17,3 @@
 	releasem(mp)
 	return old
 }
-
-func setMaxThreads(in int) (out int) {
-	mp := acquirem()
-	mp.scalararg[0] = uintptr(in)
-	onM(setmaxthreads_m)
-	out = int(mp.scalararg[0])
-	releasem(mp)
-	return out
-}
diff --git a/src/runtime/rt0_linux_386.s b/src/runtime/rt0_linux_386.s
index 352e594..47fd908 100644
--- a/src/runtime/rt0_linux_386.s
+++ b/src/runtime/rt0_linux_386.s
@@ -9,7 +9,6 @@
 	LEAL	12(SP), BX
 	MOVL	AX, 0(SP)
 	MOVL	BX, 4(SP)
-	CALL	runtime·linux_setup_vdso(SB)
 	CALL	main(SB)
 	INT	$3
 
diff --git a/src/runtime/rt0_windows_amd64.s b/src/runtime/rt0_windows_amd64.s
index 197f52e..df956ba 100644
--- a/src/runtime/rt0_windows_amd64.s
+++ b/src/runtime/rt0_windows_amd64.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 TEXT _rt0_amd64_windows(SB),NOSPLIT,$-8
diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c
deleted file mode 100644
index a684142..0000000
--- a/src/runtime/runtime.c
+++ /dev/null
@@ -1,411 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "stack.h"
-#include "arch_GOARCH.h"
-#include "textflag.h"
-#include "malloc.h"
-
-// Keep a cached value to make gotraceback fast,
-// since we call it on every call to gentraceback.
-// The cached value is a uint32 in which the low bit
-// is the "crash" setting and the top 31 bits are the
-// gotraceback value.
-static uint32 traceback_cache = 2<<1;
-
-// The GOTRACEBACK environment variable controls the
-// behavior of a Go program that is crashing and exiting.
-//	GOTRACEBACK=0   suppress all tracebacks
-//	GOTRACEBACK=1   default behavior - show tracebacks but exclude runtime frames
-//	GOTRACEBACK=2   show tracebacks including runtime frames
-//	GOTRACEBACK=crash   show tracebacks including runtime frames, then crash (core dump etc)
-#pragma textflag NOSPLIT
-int32
-runtime·gotraceback(bool *crash)
-{
-	if(crash != nil)
-		*crash = false;
-	if(g->m->traceback != 0)
-		return g->m->traceback;
-	if(crash != nil)
-		*crash = traceback_cache&1;
-	return traceback_cache>>1;
-}
-
-int32
-runtime·mcmp(byte *s1, byte *s2, uintptr n)
-{
-	uintptr i;
-	byte c1, c2;
-
-	for(i=0; i<n; i++) {
-		c1 = s1[i];
-		c2 = s2[i];
-		if(c1 < c2)
-			return -1;
-		if(c1 > c2)
-			return +1;
-	}
-	return 0;
-}
-
-
-byte*
-runtime·mchr(byte *p, byte c, byte *ep)
-{
-	for(; p < ep; p++)
-		if(*p == c)
-			return p;
-	return nil;
-}
-
-static int32	argc;
-
-#pragma dataflag NOPTR /* argv not a heap pointer */
-static uint8**	argv;
-
-extern Slice runtime·argslice;
-extern Slice runtime·envs;
-
-void (*runtime·sysargs)(int32, uint8**);
-
-void
-runtime·args(int32 c, uint8 **v)
-{
-	argc = c;
-	argv = v;
-	if(runtime·sysargs != nil)
-		runtime·sysargs(c, v);
-}
-
-int32 runtime·isplan9;
-int32 runtime·issolaris;
-int32 runtime·iswindows;
-
-// Information about what cpu features are available.
-// Set on startup in asm_{x86/amd64}.s.
-uint32 runtime·cpuid_ecx;
-uint32 runtime·cpuid_edx;
-
-void
-runtime·goargs(void)
-{
-	String *s;
-	int32 i;
-
-	// for windows implementation see "os" package
-	if(Windows)
-		return;
-
-	runtime·argslice = runtime·makeStringSlice(argc);
-	s = (String*)runtime·argslice.array;
-	for(i=0; i<argc; i++)
-		s[i] = runtime·gostringnocopy(argv[i]);
-}
-
-void
-runtime·goenvs_unix(void)
-{
-	String *s;
-	int32 i, n;
-
-	for(n=0; argv[argc+1+n] != 0; n++)
-		;
-
-	runtime·envs = runtime·makeStringSlice(n);
-	s = (String*)runtime·envs.array;
-	for(i=0; i<n; i++)
-		s[i] = runtime·gostringnocopy(argv[argc+1+i]);
-}
-
-#pragma textflag NOSPLIT
-Slice
-runtime·environ()
-{
-	return runtime·envs;
-}
-
-int32
-runtime·atoi(byte *p)
-{
-	int32 n;
-
-	n = 0;
-	while('0' <= *p && *p <= '9')
-		n = n*10 + *p++ - '0';
-	return n;
-}
-
-static void
-TestAtomic64(void)
-{
-	uint64 z64, x64;
-
-	z64 = 42;
-	x64 = 0;
-	PREFETCH(&z64);
-	if(runtime·cas64(&z64, x64, 1))
-		runtime·throw("cas64 failed");
-	if(x64 != 0)
-		runtime·throw("cas64 failed");
-	x64 = 42;
-	if(!runtime·cas64(&z64, x64, 1))
-		runtime·throw("cas64 failed");
-	if(x64 != 42 || z64 != 1)
-		runtime·throw("cas64 failed");
-	if(runtime·atomicload64(&z64) != 1)
-		runtime·throw("load64 failed");
-	runtime·atomicstore64(&z64, (1ull<<40)+1);
-	if(runtime·atomicload64(&z64) != (1ull<<40)+1)
-		runtime·throw("store64 failed");
-	if(runtime·xadd64(&z64, (1ull<<40)+1) != (2ull<<40)+2)
-		runtime·throw("xadd64 failed");
-	if(runtime·atomicload64(&z64) != (2ull<<40)+2)
-		runtime·throw("xadd64 failed");
-	if(runtime·xchg64(&z64, (3ull<<40)+3) != (2ull<<40)+2)
-		runtime·throw("xchg64 failed");
-	if(runtime·atomicload64(&z64) != (3ull<<40)+3)
-		runtime·throw("xchg64 failed");
-}
-
-void
-runtime·check(void)
-{
-	int8 a;
-	uint8 b;
-	int16 c;
-	uint16 d;
-	int32 e;
-	uint32 f;
-	int64 g;
-	uint64 h;
-	float32 i, i1;
-	float64 j, j1;
-	byte *k, *k1;
-	uint16* l;
-	byte m[4];
-	struct x1 {
-		byte x;
-	};
-	struct y1 {
-		struct x1 x1;
-		byte y;
-	};
-
-	if(sizeof(a) != 1) runtime·throw("bad a");
-	if(sizeof(b) != 1) runtime·throw("bad b");
-	if(sizeof(c) != 2) runtime·throw("bad c");
-	if(sizeof(d) != 2) runtime·throw("bad d");
-	if(sizeof(e) != 4) runtime·throw("bad e");
-	if(sizeof(f) != 4) runtime·throw("bad f");
-	if(sizeof(g) != 8) runtime·throw("bad g");
-	if(sizeof(h) != 8) runtime·throw("bad h");
-	if(sizeof(i) != 4) runtime·throw("bad i");
-	if(sizeof(j) != 8) runtime·throw("bad j");
-	if(sizeof(k) != sizeof(uintptr)) runtime·throw("bad k");
-	if(sizeof(l) != sizeof(uintptr)) runtime·throw("bad l");
-	if(sizeof(struct x1) != 1) runtime·throw("bad sizeof x1");
-	if(offsetof(struct y1, y) != 1) runtime·throw("bad offsetof y1.y");
-	if(sizeof(struct y1) != 2) runtime·throw("bad sizeof y1");
-
-	if(runtime·timediv(12345LL*1000000000+54321, 1000000000, &e) != 12345 || e != 54321)
-		runtime·throw("bad timediv");
-
-	uint32 z;
-	z = 1;
-	if(!runtime·cas(&z, 1, 2))
-		runtime·throw("cas1");
-	if(z != 2)
-		runtime·throw("cas2");
-
-	z = 4;
-	if(runtime·cas(&z, 5, 6))
-		runtime·throw("cas3");
-	if(z != 4)
-		runtime·throw("cas4");
-
-	z = 0xffffffff;
-	if(!runtime·cas(&z, 0xffffffff, 0xfffffffe))
-		runtime·throw("cas5");
-	if(z != 0xfffffffe)
-		runtime·throw("cas6");
-
-	k = (byte*)0xfedcb123;
-	if(sizeof(void*) == 8)
-		k = (byte*)((uintptr)k<<10);
-	if(runtime·casp((void**)&k, nil, nil))
-		runtime·throw("casp1");
-	k1 = k+1;
-	if(!runtime·casp((void**)&k, k, k1))
-		runtime·throw("casp2");
-	if(k != k1)
-		runtime·throw("casp3");
-
-	m[0] = m[1] = m[2] = m[3] = 0x1;
-	runtime·atomicor8(&m[1], 0xf0);
-	if (m[0] != 0x1 || m[1] != 0xf1 || m[2] != 0x1 || m[3] != 0x1)
-		runtime·throw("atomicor8");
-
-	*(uint64*)&j = ~0ULL;
-	if(j == j)
-		runtime·throw("float64nan");
-	if(!(j != j))
-		runtime·throw("float64nan1");
-
-	*(uint64*)&j1 = ~1ULL;
-	if(j == j1)
-		runtime·throw("float64nan2");
-	if(!(j != j1))
-		runtime·throw("float64nan3");
-
-	*(uint32*)&i = ~0UL;
-	if(i == i)
-		runtime·throw("float32nan");
-	if(!(i != i))
-		runtime·throw("float32nan1");
-
-	*(uint32*)&i1 = ~1UL;
-	if(i == i1)
-		runtime·throw("float32nan2");
-	if(!(i != i1))
-		runtime·throw("float32nan3");
-
-	TestAtomic64();
-
-	if(FixedStack != runtime·round2(FixedStack))
-		runtime·throw("FixedStack is not power-of-2");
-}
-
-#pragma dataflag NOPTR
-DebugVars	runtime·debug;
-
-typedef struct DbgVar DbgVar;
-struct DbgVar
-{
-	int8*	name;
-	int32*	value;
-};
-
-// Do we report invalid pointers found during stack or heap scans?
-int32 runtime·invalidptr = 1;
-
-#pragma dataflag NOPTR /* dbgvar has no heap pointers */
-static DbgVar dbgvar[] = {
-	{"allocfreetrace", &runtime·debug.allocfreetrace},
-	{"invalidptr", &runtime·invalidptr},
-	{"efence", &runtime·debug.efence},
-	{"gctrace", &runtime·debug.gctrace},
-	{"gcdead", &runtime·debug.gcdead},
-	{"scheddetail", &runtime·debug.scheddetail},
-	{"schedtrace", &runtime·debug.schedtrace},
-	{"scavenge", &runtime·debug.scavenge},
-};
-
-void
-runtime·parsedebugvars(void)
-{
-	byte *p;
-	intgo i, n;
-
-	p = runtime·getenv("GODEBUG");
-	if(p != nil){
-		for(;;) {
-			for(i=0; i<nelem(dbgvar); i++) {
-				n = runtime·findnull((byte*)dbgvar[i].name);
-				if(runtime·mcmp(p, (byte*)dbgvar[i].name, n) == 0 && p[n] == '=')
-					*dbgvar[i].value = runtime·atoi(p+n+1);
-			}
-			p = runtime·strstr(p, (byte*)",");
-			if(p == nil)
-				break;
-			p++;
-		}
-	}
-
-	p = runtime·getenv("GOTRACEBACK");
-	if(p == nil)
-		p = (byte*)"";
-	if(p[0] == '\0')
-		traceback_cache = 1<<1;
-	else if(runtime·strcmp(p, (byte*)"crash") == 0)
-		traceback_cache = (2<<1) | 1;
-	else
-		traceback_cache = runtime·atoi(p)<<1;	
-}
-
-// Poor mans 64-bit division.
-// This is a very special function, do not use it if you are not sure what you are doing.
-// int64 division is lowered into _divv() call on 386, which does not fit into nosplit functions.
-// Handles overflow in a time-specific manner.
-#pragma textflag NOSPLIT
-int32
-runtime·timediv(int64 v, int32 div, int32 *rem)
-{
-	int32 res, bit;
-
-	res = 0;
-	for(bit = 30; bit >= 0; bit--) {
-		if(v >= ((int64)div<<bit)) {
-			v = v - ((int64)div<<bit);
-			res += 1<<bit;
-		}
-	}
-	if(v >= (int64)div) {
-		if(rem != nil)
-			*rem = 0;
-		return 0x7fffffff;
-	}
-	if(rem != nil)
-		*rem = v;
-	return res;
-}
-
-// Helpers for Go. Must be NOSPLIT, must only call NOSPLIT functions, and must not block.
-
-#pragma textflag NOSPLIT
-G*
-runtime·getg(void)
-{
-	return g;
-}
-
-#pragma textflag NOSPLIT
-M*
-runtime·acquirem(void)
-{
-	g->m->locks++;
-	return g->m;
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·releasem(M *mp)
-{
-	mp->locks--;
-	if(mp->locks == 0 && g->preempt) {
-		// restore the preemption request in case we've cleared it in newstack
-		g->stackguard0 = StackPreempt;
-	}
-}
-
-#pragma textflag NOSPLIT
-MCache*
-runtime·gomcache(void)
-{
-	return g->m->mcache;
-}
-
-#pragma textflag NOSPLIT
-Slice
-reflect·typelinks(void)
-{
-	extern Type *runtime·typelink[], *runtime·etypelink[];
-	Slice ret;
-
-	ret.array = (byte*)runtime·typelink;
-	ret.len = runtime·etypelink - runtime·typelink;
-	ret.cap = ret.len;
-	return ret;
-}
diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h
deleted file mode 100644
index 330ed42..0000000
--- a/src/runtime/runtime.h
+++ /dev/null
@@ -1,1151 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-/*
- * basic types
- */
-typedef	signed char		int8;
-typedef	unsigned char		uint8;
-typedef	signed short		int16;
-typedef	unsigned short		uint16;
-typedef	signed int		int32;
-typedef	unsigned int		uint32;
-typedef	signed long long int	int64;
-typedef	unsigned long long int	uint64;
-typedef	float			float32;
-typedef	double			float64;
-
-#ifdef _64BIT
-typedef	uint64		uintptr;
-typedef	int64		intptr;
-typedef	int64		intgo; // Go's int
-typedef	uint64		uintgo; // Go's uint
-#else
-typedef	uint32		uintptr;
-typedef	int32		intptr;
-typedef	int32		intgo; // Go's int
-typedef	uint32		uintgo; // Go's uint
-#endif
-
-#ifdef _64BITREG
-typedef	uint64		uintreg;
-#else
-typedef	uint32		uintreg;
-#endif
-
-/*
- * get rid of C types
- * the / / / forces a syntax error immediately,
- * which will show "last name: XXunsigned".
- */
-#define	unsigned		XXunsigned / / /
-#define	signed			XXsigned / / /
-#define	char			XXchar / / /
-#define	short			XXshort / / /
-#define	int			XXint / / /
-#define	long			XXlong / / /
-#define	float			XXfloat / / /
-#define	double			XXdouble / / /
-
-/*
- * defined types
- */
-typedef	uint8			bool;
-typedef	uint8			byte;
-typedef	struct	Func		Func;
-typedef	struct	G		G;
-typedef	struct	Gobuf		Gobuf;
-typedef	struct	SudoG		SudoG;
-typedef	struct	Mutex		Mutex;
-typedef	struct	M		M;
-typedef	struct	P		P;
-typedef	struct	SchedT	SchedT;
-typedef	struct	Note		Note;
-typedef	struct	Slice		Slice;
-typedef	struct	String		String;
-typedef	struct	FuncVal		FuncVal;
-typedef	struct	SigTab		SigTab;
-typedef	struct	MCache		MCache;
-typedef	struct	FixAlloc	FixAlloc;
-typedef	struct	Iface		Iface;
-typedef	struct	Itab		Itab;
-typedef	struct	InterfaceType	InterfaceType;
-typedef	struct	Eface		Eface;
-typedef	struct	Type		Type;
-typedef	struct	PtrType		PtrType;
-typedef	struct	ChanType	ChanType;
-typedef	struct	MapType		MapType;
-typedef	struct	Defer		Defer;
-typedef	struct	Panic		Panic;
-typedef	struct	Hmap		Hmap;
-typedef	struct	Hiter		Hiter;
-typedef	struct	Hchan		Hchan;
-typedef	struct	Complex64	Complex64;
-typedef	struct	Complex128	Complex128;
-typedef	struct	LibCall		LibCall;
-typedef	struct	WinCallbackContext	WinCallbackContext;
-typedef	struct	GCStats		GCStats;
-typedef	struct	LFNode		LFNode;
-typedef	struct	ParFor		ParFor;
-typedef	struct	ParForThread	ParForThread;
-typedef	struct	CgoMal		CgoMal;
-typedef	struct	PollDesc	PollDesc;
-typedef	struct	DebugVars	DebugVars;
-typedef	struct	ForceGCState	ForceGCState;
-typedef	struct	Stack		Stack;
-typedef struct  Workbuf         Workbuf;
-
-/*
- * Per-CPU declaration.
- *
- * "extern register" is a special storage class implemented by 6c, 8c, etc.
- * On the ARM, it is an actual register; elsewhere it is a slot in thread-
- * local storage indexed by a pseudo-register TLS. See zasmhdr in
- * src/cmd/dist/buildruntime.c for details, and be aware that the linker may
- * make further OS-specific changes to the compiler's output. For example,
- * 6l/linux rewrites 0(TLS) as -8(FS).
- *
- * Every C file linked into a Go program must include runtime.h so that the
- * C compiler (6c, 8c, etc.) knows to avoid other uses of these dedicated
- * registers. The Go compiler (6g, 8g, etc.) knows to avoid them.
- */
-extern	register	G*	g;
-
-/*
- * defined constants
- */
-enum
-{
-	// G status
-	//
-	// If you add to this list, add to the list
-	// of "okay during garbage collection" status
-	// in mgc0.c too.
-	Gidle,                                 // 0
-	Grunnable,                             // 1 runnable and on a run queue
-	Grunning,                              // 2
-	Gsyscall,                              // 3
-	Gwaiting,                              // 4
-	Gmoribund_unused,                      // 5 currently unused, but hardcoded in gdb scripts
-	Gdead,                                 // 6
-	Genqueue,                              // 7 Only the Gscanenqueue is used.
-	Gcopystack,                            // 8 in this state when newstack is moving the stack
-	// the following encode that the GC is scanning the stack and what to do when it is done 
-	Gscan = 0x1000,                        // atomicstatus&~Gscan = the non-scan state,
-	// Gscanidle =     Gscan + Gidle,      // Not used. Gidle only used with newly malloced gs
-	Gscanrunnable = Gscan + Grunnable,     //  0x1001 When scanning complets make Grunnable (it is already on run queue)
-	Gscanrunning =  Gscan + Grunning,      //  0x1002 Used to tell preemption newstack routine to scan preempted stack.
-	Gscansyscall =  Gscan + Gsyscall,      //  0x1003 When scanning completes make is Gsyscall
-	Gscanwaiting =  Gscan + Gwaiting,      //  0x1004 When scanning completes make it Gwaiting
-	// Gscanmoribund_unused,               //  not possible
-	// Gscandead,                          //  not possible
-	Gscanenqueue = Gscan + Genqueue,       //  When scanning completes make it Grunnable and put on runqueue
-};
-enum
-{
-	// P status
-	Pidle,
-	Prunning,
-	Psyscall,
-	Pgcstop,
-	Pdead,
-};
-enum
-{
-	true	= 1,
-	false	= 0,
-};
-enum
-{
-	PtrSize = sizeof(void*),
-};
-/*
- * structures
- */
-struct	Mutex
-{
-	// Futex-based impl treats it as uint32 key,
-	// while sema-based impl as M* waitm.
-	// Used to be a union, but unions break precise GC.
-	uintptr	key;
-};
-struct	Note
-{
-	// Futex-based impl treats it as uint32 key,
-	// while sema-based impl as M* waitm.
-	// Used to be a union, but unions break precise GC.
-	uintptr	key;
-};
-struct String
-{
-	byte*	str;
-	intgo	len;
-};
-struct FuncVal
-{
-	void	(*fn)(void);
-	// variable-size, fn-specific data here
-};
-struct Iface
-{
-	Itab*	tab;
-	void*	data;
-};
-struct Eface
-{
-	Type*	type;
-	void*	data;
-};
-struct Complex64
-{
-	float32	real;
-	float32	imag;
-};
-struct Complex128
-{
-	float64	real;
-	float64	imag;
-};
-
-struct	Slice
-{				// must not move anything
-	byte*	array;		// actual data
-	uintgo	len;		// number of elements
-	uintgo	cap;		// allocated number of elements
-};
-struct	Gobuf
-{
-	// The offsets of sp, pc, and g are known to (hard-coded in) libmach.
-	uintptr	sp;
-	uintptr	pc;
-	G*	g;
-	void*	ctxt; // this has to be a pointer so that GC scans it
-	uintreg	ret;
-	uintptr	lr;
-};
-// Known to compiler.
-// Changes here must also be made in src/cmd/gc/select.c's selecttype.
-struct	SudoG
-{
-	G*	g;
-	uint32*	selectdone;
-	SudoG*	next;
-	SudoG*	prev;
-	void*	elem;		// data element
-	int64	releasetime;
-	int32	nrelease;	// -1 for acquire
-	SudoG*	waitlink;	// G.waiting list
-};
-struct	GCStats
-{
-	// the struct must consist of only uint64's,
-	// because it is casted to uint64[].
-	uint64	nhandoff;
-	uint64	nhandoffcnt;
-	uint64	nprocyield;
-	uint64	nosyield;
-	uint64	nsleep;
-};
-
-struct	LibCall
-{
-	uintptr	fn;
-	uintptr	n;	// number of parameters
-	uintptr	args;	// parameters
-	uintptr	r1;	// return values
-	uintptr	r2;
-	uintptr	err;	// error number
-};
-
-// describes how to handle callback
-struct	WinCallbackContext
-{
-	void*	gobody;		// Go function to call
-	uintptr	argsize;	// callback arguments size (in bytes)
-	uintptr	restorestack;	// adjust stack on return by (in bytes) (386 only)
-	bool	cleanstack;
-};
-
-// Stack describes a Go execution stack.
-// The bounds of the stack are exactly [lo, hi),
-// with no implicit data structures on either side.
-struct	Stack
-{
-	uintptr	lo;
-	uintptr	hi;
-};
-
-struct	G
-{
-	// Stack parameters.
-	// stack describes the actual stack memory: [stack.lo, stack.hi).
-	// stackguard0 is the stack pointer compared in the Go stack growth prologue.
-	// It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption.
-	// stackguard1 is the stack pointer compared in the C stack growth prologue.
-	// It is stack.lo+StackGuard on g0 and gsignal stacks.
-	// It is ~0 on other goroutine stacks, to trigger a call to morestackc (and crash).
-	Stack	stack;	// offset known to runtime/cgo
-	uintptr	stackguard0;	// offset known to liblink
-	uintptr	stackguard1;	// offset known to liblink
-
-	Panic*	panic;	// innermost panic - offset known to liblink
-	Defer*	defer;	// innermost defer
-	Gobuf	sched;
-	uintptr	syscallsp;	// if status==Gsyscall, syscallsp = sched.sp to use during gc
-	uintptr	syscallpc;	// if status==Gsyscall, syscallpc = sched.pc to use during gc
-	void*	param;		// passed parameter on wakeup
-	uint32	atomicstatus;
-	int64	goid;
-	int64	waitsince;	// approx time when the G become blocked
-	String	waitreason;	// if status==Gwaiting
-	G*	schedlink;
-	bool	issystem;	// do not output in stack dump, ignore in deadlock detector
-	bool	preempt;	// preemption signal, duplicates stackguard0 = StackPreempt
-	bool	paniconfault;	// panic (instead of crash) on unexpected fault address
-	bool	preemptscan;    // preempted g does scan for GC
-	bool	gcworkdone;     // debug: cleared at begining of gc work phase cycle, set by gcphasework, tested at end of cycle
-	bool	throwsplit;     // must not split stack
-	int8	raceignore;	// ignore race detection events
-	M*	m;		// for debuggers, but offset not hard-coded
-	M*	lockedm;
-	int32	sig;
-	Slice	writebuf;
-	uintptr	sigcode0;
-	uintptr	sigcode1;
-	uintptr	sigpc;
-	uintptr	gopc;		// pc of go statement that created this goroutine
-	uintptr	racectx;
-	SudoG*	waiting;	// sudog structures this G is waiting on (that have a valid elem ptr)
-	uintptr	end[];
-};
-
-struct	M
-{
-	G*	g0;		// goroutine with scheduling stack
-	Gobuf	morebuf;	// gobuf arg to morestack
-
-	// Fields not known to debuggers.
-	uint64	procid;		// for debuggers, but offset not hard-coded
-	G*	gsignal;	// signal-handling G
-	uintptr	tls[4];		// thread-local storage (for x86 extern register)
-	void	(*mstartfn)(void);
-	G*	curg;		// current running goroutine
-	G*	caughtsig;	// goroutine running during fatal signal
-	P*	p;		// attached P for executing Go code (nil if not executing Go code)
-	P*	nextp;
-	int32	id;
-	int32	mallocing;
-	int32	throwing;
-	int32	gcing;
-	int32	locks;
-	int32	softfloat;
-	int32	dying;
-	int32	profilehz;
-	int32	helpgc;
-	bool	spinning;	// M is out of work and is actively looking for work
-	bool	blocked;	// M is blocked on a Note
-	bool    inwb;           // M is executing a write barrier
-	int8	printlock;
-	uint32	fastrand;
-	uint64	ncgocall;	// number of cgo calls in total
-	int32	ncgo;		// number of cgo calls currently in progress
-	CgoMal*	cgomal;
-	Note	park;
-	M*	alllink;	// on allm
-	M*	schedlink;
-	uint32	machport;	// Return address for Mach IPC (OS X)
-	MCache*	mcache;
-	G*	lockedg;
-	uintptr	createstack[32];// Stack that created this thread.
-	uint32	freglo[16];	// D[i] lsb and F[i]
-	uint32	freghi[16];	// D[i] msb and F[i+16]
-	uint32	fflag;		// floating point compare flags
-	uint32	locked;		// tracking for LockOSThread
-	M*	nextwaitm;	// next M waiting for lock
-	uintptr	waitsema;	// semaphore for parking on locks
-	uint32	waitsemacount;
-	uint32	waitsemalock;
-	GCStats	gcstats;
-	bool	needextram;
-	uint8	traceback;
-	bool	(*waitunlockf)(G*, void*);
-	void*	waitlock;
-	uintptr scalararg[4];	// scalar argument/return for mcall
-	void*   ptrarg[4];	// pointer argument/return for mcall
-#ifdef GOOS_windows
-	uintptr	thread;		// thread handle
-	// these are here because they are too large to be on the stack
-	// of low-level NOSPLIT functions.
-	LibCall	libcall;
-	uintptr	libcallpc;	// for cpu profiler
-	uintptr	libcallsp;
-	G*	libcallg;
-#endif
-#ifdef GOOS_solaris
-	int32*	perrno; 	// pointer to TLS errno
-	// these are here because they are too large to be on the stack
-	// of low-level NOSPLIT functions.
-	LibCall	libcall;
-	struct MTs {
-		int64	tv_sec;
-		int64	tv_nsec;
-	} ts;
-	struct MScratch {
-		uintptr v[6];
-	} scratch;
-#endif
-#ifdef GOOS_plan9
-	int8*	notesig;
-	byte*	errstr;
-#endif
-	uintptr	end[];
-};
-
-struct P
-{
-	Mutex	lock;
-
-	int32	id;
-	uint32	status;		// one of Pidle/Prunning/...
-	P*	link;
-	uint32	schedtick;	// incremented on every scheduler call
-	uint32	syscalltick;	// incremented on every system call
-	M*	m;		// back-link to associated M (nil if idle)
-	MCache*	mcache;
-	Defer*	deferpool[5];	// pool of available Defer structs of different sizes (see panic.c)
-
-	// Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen.
-	uint64	goidcache;
-	uint64	goidcacheend;
-
-	// Queue of runnable goroutines.
-	uint32	runqhead;
-	uint32	runqtail;
-	G*	runq[256];
-
-	// Available G's (status == Gdead)
-	G*	gfree;
-	int32	gfreecnt;
-
-	byte	pad[64];
-};
-
-enum {
-	// The max value of GOMAXPROCS.
-	// There are no fundamental restrictions on the value.
-	MaxGomaxprocs = 1<<8,
-};
-
-struct	SchedT
-{
-	Mutex	lock;
-
-	uint64	goidgen;
-
-	M*	midle;	 // idle m's waiting for work
-	int32	nmidle;	 // number of idle m's waiting for work
-	int32	nmidlelocked; // number of locked m's waiting for work
-	int32	mcount;	 // number of m's that have been created
-	int32	maxmcount;	// maximum number of m's allowed (or die)
-
-	P*	pidle;  // idle P's
-	uint32	npidle;
-	uint32	nmspinning;
-
-	// Global runnable queue.
-	G*	runqhead;
-	G*	runqtail;
-	int32	runqsize;
-
-	// Global cache of dead G's.
-	Mutex	gflock;
-	G*	gfree;
-	int32	ngfree;
-
-	uint32	gcwaiting;	// gc is waiting to run
-	int32	stopwait;
-	Note	stopnote;
-	uint32	sysmonwait;
-	Note	sysmonnote;
-	uint64	lastpoll;
-
-	int32	profilehz;	// cpu profiling rate
-};
-
-// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
-// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
-// External locks are not recursive; a second lock is silently ignored.
-// The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread
-// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
-// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
-// goroutine is holding the lock during the initialization phase.
-enum
-{
-	LockExternal = 1,
-	LockInternal = 2,
-};
-
-struct	SigTab
-{
-	int32	flags;
-	int8	*name;
-};
-enum
-{
-	SigNotify = 1<<0,	// let signal.Notify have signal, even if from kernel
-	SigKill = 1<<1,		// if signal.Notify doesn't take it, exit quietly
-	SigThrow = 1<<2,	// if signal.Notify doesn't take it, exit loudly
-	SigPanic = 1<<3,	// if the signal is from the kernel, panic
-	SigDefault = 1<<4,	// if the signal isn't explicitly requested, don't monitor it
-	SigHandling = 1<<5,	// our signal handler is registered
-	SigIgnored = 1<<6,	// the signal was ignored before we registered for it
-	SigGoExit = 1<<7,	// cause all runtime procs to exit (only used on Plan 9).
-};
-
-// Layout of in-memory per-function information prepared by linker
-// See http://golang.org/s/go12symtab.
-// Keep in sync with linker and with ../../libmach/sym.c
-// and with package debug/gosym and with symtab.go in package runtime.
-struct	Func
-{
-	uintptr	entry;	// start pc
-	int32	nameoff;// function name
-	
-	int32	args;	// in/out args size
-	int32	frame;	// legacy frame size; use pcsp if possible
-
-	int32	pcsp;
-	int32	pcfile;
-	int32	pcln;
-	int32	npcdata;
-	int32	nfuncdata;
-};
-
-// layout of Itab known to compilers
-// allocated in non-garbage-collected memory
-struct	Itab
-{
-	InterfaceType*	inter;
-	Type*	type;
-	Itab*	link;
-	int32	bad;
-	int32	unused;
-	void	(*fun[])(void);
-};
-
-#ifdef GOOS_nacl
-enum {
-   NaCl = 1,
-};
-#else
-enum {
-   NaCl = 0,
-};
-#endif
-
-#ifdef GOOS_windows
-enum {
-   Windows = 1
-};
-#else
-enum {
-   Windows = 0
-};
-#endif
-#ifdef GOOS_solaris
-enum {
-   Solaris = 1
-};
-#else
-enum {
-   Solaris = 0
-};
-#endif
-#ifdef GOOS_plan9
-enum {
-   Plan9 = 1
-};
-#else
-enum {
-   Plan9 = 0
-};
-#endif
-
-// Lock-free stack node.
-// Also known to export_test.go.
-struct LFNode
-{
-	uint64	next;
-	uintptr	pushcnt;
-};
-
-// Parallel for descriptor.
-struct ParFor
-{
-	void (*body)(ParFor*, uint32);	// executed for each element
-	uint32 done;			// number of idle threads
-	uint32 nthr;			// total number of threads
-	uint32 nthrmax;			// maximum number of threads
-	uint32 thrseq;			// thread id sequencer
-	uint32 cnt;			// iteration space [0, cnt)
-	void *ctx;			// arbitrary user context
-	bool wait;			// if true, wait while all threads finish processing,
-					// otherwise parfor may return while other threads are still working
-	ParForThread *thr;		// array of thread descriptors
-	uint32 pad;			// to align ParForThread.pos for 64-bit atomic operations
-	// stats
-	uint64 nsteal;
-	uint64 nstealcnt;
-	uint64 nprocyield;
-	uint64 nosyield;
-	uint64 nsleep;
-};
-
-enum {
-	WorkbufSize	= 4*1024,
-};
-struct Workbuf
-{
-	LFNode	node; // must be first
-	uintptr	nobj;
-	byte*	obj[(WorkbufSize-sizeof(LFNode)-sizeof(uintptr))/PtrSize];
-};
-
-// Track memory allocated by code not written in Go during a cgo call,
-// so that the garbage collector can see them.
-struct CgoMal
-{
-	CgoMal	*next;
-	void	*alloc;
-};
-
-// Holds variables parsed from GODEBUG env var.
-struct DebugVars
-{
-	int32	allocfreetrace;
-	int32	efence;
-	int32	gctrace;
-	int32	gcdead;
-	int32	scheddetail;
-	int32	schedtrace;
-	int32	scavenge;
-};
-
-// Indicates to write barrier and sychronization task to preform.
-enum
-{                               // Action               WB installation
-	GCoff = 0,		// stop and start	no wb
-	GCquiesce, 		// stop and start	no wb
-	GCstw, 			// stop the ps		nop
-	GCscan,			// scan the stacks prior to marking
-	GCmark,			// mark use wbufs from GCscan and globals, scan the stacks, then go to GCtermination
-	GCmarktermination,	// mark termination detection. Allocate black, Ps help out GC
-	GCsweep,		// stop and start	nop
-};
-
-struct ForceGCState
-{
-	Mutex	lock;
-	G*	g;
-	uint32	idle;
-};
-
-extern uint32 runtime·gcphase;
-extern Mutex runtime·allglock;
-
-/*
- * defined macros
- *    you need super-gopher-guru privilege
- *    to add this list.
- */
-#define	nelem(x)	(sizeof(x)/sizeof((x)[0]))
-#define	nil		((void*)0)
-#define	offsetof(s,m)	(uint32)(&(((s*)0)->m))
-#define	ROUND(x, n)	(((x)+(n)-1)&~(uintptr)((n)-1)) /* all-caps to mark as macro: it evaluates n twice */
-
-/*
- * known to compiler
- */
-enum {
-	Structrnd = sizeof(uintreg),
-};
-
-byte*	runtime·startup_random_data;
-uint32	runtime·startup_random_data_len;
-
-int32	runtime·invalidptr;
-
-enum {
-	// hashinit wants this many random bytes
-	HashRandomBytes = 32
-};
-
-uint32  runtime·readgstatus(G*);
-void    runtime·casgstatus(G*, uint32, uint32);
-bool    runtime·castogscanstatus(G*, uint32, uint32);
-void    runtime·quiesce(G*);
-bool    runtime·stopg(G*);
-void    runtime·restartg(G*);
-void    runtime·gcphasework(G*);
-
-/*
- * deferred subroutine calls
- */
-struct Defer
-{
-	int32	siz;
-	bool	started;
-	uintptr	argp;		// where args were copied from
-	uintptr	pc;
-	FuncVal*	fn;
-	Panic*	panic;	// panic that is running defer
-	Defer*	link;
-};
-
-// argp used in Defer structs when there is no argp.
-#define NoArgs ((uintptr)-1)
-
-/*
- * panics
- */
-struct Panic
-{
-	void*	argp;	// pointer to arguments of deferred call run during panic; cannot move - known to liblink
-	Eface	arg;		// argument to panic
-	Panic*	link;		// link to earlier panic
-	bool	recovered;	// whether this panic is over
-	bool	aborted;	// the panic was aborted
-};
-
-/*
- * stack traces
- */
-typedef struct Stkframe Stkframe;
-typedef struct BitVector BitVector;
-struct Stkframe
-{
-	Func*	fn;	// function being run
-	uintptr	pc;	// program counter within fn
-	uintptr	continpc;	// program counter where execution can continue, or 0 if not
-	uintptr	lr;	// program counter at caller aka link register
-	uintptr	sp;	// stack pointer at pc
-	uintptr	fp;	// stack pointer at caller aka frame pointer
-	uintptr	varp;	// top of local variables
-	uintptr	argp;	// pointer to function arguments
-	uintptr	arglen;	// number of bytes at argp
-	BitVector*	argmap;	// force use of this argmap
-};
-
-enum
-{
-	TraceRuntimeFrames = 1<<0, // include frames for internal runtime functions.
-	TraceTrap = 1<<1, // the initial PC, SP are from a trap, not a return PC from a call
-};
-intgo	runtime·gentraceback(uintptr, uintptr, uintptr, G*, intgo, uintptr*, intgo, bool(**)(Stkframe*, void*), void*, uintgo);
-void	runtime·tracebackdefers(G*, bool(**)(Stkframe*, void*), void*);
-void	runtime·traceback(uintptr pc, uintptr sp, uintptr lr, G* gp);
-void	runtime·tracebacktrap(uintptr pc, uintptr sp, uintptr lr, G* gp);
-void	runtime·tracebackothers(G*);
-bool	runtime·haszeroargs(uintptr pc);
-bool	runtime·topofstack(Func*);
-enum
-{
-	// The maximum number of frames we print for a traceback
-	TracebackMaxFrames = 100,
-};
-
-/*
- * external data
- */
-extern	String	runtime·emptystring;
-extern	G**	runtime·allg;
-extern	Slice	runtime·allgs; // []*G
-extern	uintptr runtime·allglen;
-extern	G*	runtime·lastg;
-extern	M*	runtime·allm;
-extern	P*	runtime·allp[MaxGomaxprocs+1];
-extern	int32	runtime·gomaxprocs;
-extern	uint32	runtime·needextram;
-extern	uint32	runtime·panicking;
-extern	int8*	runtime·goos;
-extern	int32	runtime·ncpu;
-extern	bool	runtime·iscgo;
-extern 	void	(*runtime·sysargs)(int32, uint8**);
-extern	uintptr	runtime·maxstring;
-extern	uint32	runtime·cpuid_ecx;
-extern	uint32	runtime·cpuid_edx;
-extern	DebugVars	runtime·debug;
-extern	uintptr	runtime·maxstacksize;
-extern	Note	runtime·signote;
-extern	ForceGCState	runtime·forcegc;
-extern	SchedT	runtime·sched;
-extern	int32		runtime·newprocs;
-
-/*
- * common functions and data
- */
-int32	runtime·strcmp(byte*, byte*);
-int32	runtime·strncmp(byte*, byte*, uintptr);
-byte*	runtime·strstr(byte*, byte*);
-intgo	runtime·findnull(byte*);
-intgo	runtime·findnullw(uint16*);
-void	runtime·dump(byte*, int32);
-int32	runtime·runetochar(byte*, int32);
-int32	runtime·charntorune(int32*, uint8*, int32);
-
-
-/*
- * This macro is used when writing C functions
- * called as if they were Go functions.
- * Passed the address of a result before a return statement,
- * it makes sure the result has been flushed to memory
- * before the return.
- *
- * It is difficult to write such functions portably, because
- * of the varying requirements on the alignment of the
- * first output value. Almost all code should write such
- * functions in .goc files, where goc2c (part of cmd/dist)
- * can arrange the correct alignment for the target system.
- * Goc2c also takes care of conveying to the garbage collector
- * which parts of the argument list are inputs vs outputs.
- *
- * Therefore, do NOT use this macro if at all possible.
- */ 
-#define FLUSH(x)	USED(x)
-
-/*
- * GoOutput is a type with the same alignment requirements as the
- * initial output argument from a Go function. Only for use in cases
- * where using goc2c is not possible. See comment on FLUSH above.
- */
-typedef uint64 GoOutput;
-
-void	runtime·gogo(Gobuf*);
-void	runtime·gostartcall(Gobuf*, void(*)(void), void*);
-void	runtime·gostartcallfn(Gobuf*, FuncVal*);
-void	runtime·gosave(Gobuf*);
-void	runtime·goargs(void);
-void	runtime·goenvs(void);
-void	runtime·goenvs_unix(void);
-void*	runtime·getu(void);
-void	runtime·throw(int8*);
-bool	runtime·canpanic(G*);
-void	runtime·prints(int8*);
-void	runtime·printf(int8*, ...);
-void	runtime·snprintf(byte*, int32, int8*, ...);
-byte*	runtime·mchr(byte*, byte, byte*);
-int32	runtime·mcmp(byte*, byte*, uintptr);
-void	runtime·memmove(void*, void*, uintptr);
-String	runtime·catstring(String, String);
-String	runtime·gostring(byte*);
-Slice	runtime·makeStringSlice(intgo);
-String  runtime·gostringn(byte*, intgo);
-Slice	runtime·gobytes(byte*, intgo);
-String	runtime·gostringnocopy(byte*);
-String	runtime·gostringw(uint16*);
-void	runtime·initsig(void);
-void	runtime·sigenable(uint32 sig);
-void	runtime·sigdisable(uint32 sig);
-int32	runtime·gotraceback(bool *crash);
-void	runtime·goroutineheader(G*);
-int32	runtime·open(int8*, int32, int32);
-int32	runtime·read(int32, void*, int32);
-int32	runtime·write(uintptr, void*, int32); // use uintptr to accommodate windows.
-int32	runtime·close(int32);
-int32	runtime·mincore(void*, uintptr, byte*);
-void	runtime·jmpdefer(FuncVal*, uintptr);
-void	runtime·exit1(int32);
-void	runtime·ready(G*);
-byte*	runtime·getenv(int8*);
-int32	runtime·atoi(byte*);
-void	runtime·newosproc(M *mp, void *stk);
-void	runtime·mstart(void);
-G*	runtime·malg(int32);
-void	runtime·asminit(void);
-void	runtime·mpreinit(M*);
-void	runtime·minit(void);
-void	runtime·unminit(void);
-void	runtime·signalstack(byte*, int32);
-void	runtime·tracebackinit(void);
-void	runtime·symtabinit(void);
-Func*	runtime·findfunc(uintptr);
-int32	runtime·funcline(Func*, uintptr, String*);
-int32	runtime·funcspdelta(Func*, uintptr);
-int8*	runtime·funcname(Func*);
-int32	runtime·pcdatavalue(Func*, int32, uintptr);
-void	runtime·stackinit(void);
-Stack	runtime·stackalloc(uint32);
-void	runtime·stackfree(Stack);
-void	runtime·shrinkstack(G*);
-void	runtime·shrinkfinish(void);
-MCache*	runtime·allocmcache(void);
-void	runtime·freemcache(MCache*);
-void	runtime·mallocinit(void);
-void	runtime·gcinit(void);
-void*	runtime·mallocgc(uintptr size, Type* typ, uint32 flag);
-void	runtime·runpanic(Panic*);
-uintptr	runtime·getcallersp(void*);
-int32	runtime·mcount(void);
-int32	runtime·gcount(void);
-void	runtime·mcall(void(**)(G*));
-void	runtime·onM(void(**)(void));
-void	runtime·onMsignal(void(**)(void));
-uint32	runtime·fastrand1(void);
-void	runtime·rewindmorestack(Gobuf*);
-int32	runtime·timediv(int64, int32, int32*);
-int32	runtime·round2(int32 x); // round x up to a power of 2.
-
-// atomic operations
-bool	runtime·cas(uint32*, uint32, uint32);
-bool	runtime·cas64(uint64*, uint64, uint64);
-bool	runtime·casp(void**, void*, void*);
-bool	runtime·casuintptr(uintptr*, uintptr, uintptr);
-// Don't confuse with XADD x86 instruction,
-// this one is actually 'addx', that is, add-and-fetch.
-uint32	runtime·xadd(uint32 volatile*, int32);
-uint64	runtime·xadd64(uint64 volatile*, int64);
-uint32	runtime·xchg(uint32 volatile*, uint32);
-uint64	runtime·xchg64(uint64 volatile*, uint64);
-void*	runtime·xchgp(void* volatile*, void*);
-uint32	runtime·atomicload(uint32 volatile*);
-void	runtime·atomicstore(uint32 volatile*, uint32);
-void	runtime·atomicstore64(uint64 volatile*, uint64);
-uint64	runtime·atomicload64(uint64 volatile*);
-void*	runtime·atomicloadp(void* volatile*);
-uintptr	runtime·atomicloaduintptr(uintptr volatile*);
-void	runtime·atomicstorep(void* volatile*, void*);
-void	runtime·atomicstoreuintptr(uintptr volatile*, uintptr);
-void	runtime·atomicor8(byte volatile*, byte);
-
-void	runtime·setg(G*);
-void	runtime·newextram(void);
-void	runtime·exit(int32);
-void	runtime·breakpoint(void);
-void	runtime·gosched_m(G*);
-void	runtime·schedtrace(bool);
-void	runtime·park(bool(*)(G*, void*), void*, String);
-void	runtime·parkunlock(Mutex*, String);
-void	runtime·tsleep(int64, String);
-M*	runtime·newm(void);
-void	runtime·goexit(void);
-void	runtime·asmcgocall(void (*fn)(void*), void*);
-int32	runtime·asmcgocall_errno(void (*fn)(void*), void*);
-void	runtime·entersyscall(void);
-void	runtime·reentersyscall(uintptr, uintptr);
-void	runtime·entersyscallblock(void);
-void	runtime·exitsyscall(void);
-G*	runtime·newproc1(FuncVal*, byte*, int32, int32, void*);
-bool	runtime·sigsend(int32 sig);
-intgo	runtime·callers(intgo, uintptr*, intgo);
-intgo	runtime·gcallers(G*, intgo, uintptr*, intgo);
-int64	runtime·nanotime(void);	// monotonic time
-int64	runtime·unixnanotime(void); // real time, can skip
-void	runtime·dopanic(int32);
-void	runtime·startpanic(void);
-void	runtime·freezetheworld(void);
-void	runtime·sigprof(uint8 *pc, uint8 *sp, uint8 *lr, G *gp, M *mp);
-void	runtime·resetcpuprofiler(int32);
-void	runtime·setcpuprofilerate(int32);
-void	runtime·usleep(uint32);
-int64	runtime·cputicks(void);
-int64	runtime·tickspersecond(void);
-void	runtime·blockevent(int64, intgo);
-G*	runtime·netpoll(bool);
-void	runtime·netpollready(G**, PollDesc*, int32);
-uintptr	runtime·netpollfd(PollDesc*);
-void**	runtime·netpolluser(PollDesc*);
-bool	runtime·netpollclosing(PollDesc*);
-void	runtime·netpolllock(PollDesc*);
-void	runtime·netpollunlock(PollDesc*);
-void	runtime·crash(void);
-void	runtime·parsedebugvars(void);
-void*	runtime·funcdata(Func*, int32);
-void	runtime·setmaxthreads_m(void);
-G*	runtime·timejump(void);
-void	runtime·iterate_itabs(void (**callback)(Itab*));
-void	runtime·iterate_finq(void (*callback)(FuncVal*, byte*, uintptr, Type*, PtrType*));
-
-#pragma	varargck	argpos	runtime·printf	1
-#pragma	varargck	type	"c"	int32
-#pragma	varargck	type	"d"	int32
-#pragma	varargck	type	"d"	uint32
-#pragma	varargck	type	"D"	int64
-#pragma	varargck	type	"D"	uint64
-#pragma	varargck	type	"x"	int32
-#pragma	varargck	type	"x"	uint32
-#pragma	varargck	type	"X"	int64
-#pragma	varargck	type	"X"	uint64
-#pragma	varargck	type	"p"	void*
-#pragma	varargck	type	"p"	uintptr
-#pragma	varargck	type	"s"	int8*
-#pragma	varargck	type	"s"	uint8*
-#pragma	varargck	type	"S"	String
-
-void	runtime·stoptheworld(void);
-void	runtime·starttheworld(void);
-extern uint32 runtime·worldsema;
-
-/*
- * mutual exclusion locks.  in the uncontended case,
- * as fast as spin locks (just a few user-level instructions),
- * but on the contention path they sleep in the kernel.
- * a zeroed Mutex is unlocked (no need to initialize each lock).
- */
-void	runtime·lock(Mutex*);
-void	runtime·unlock(Mutex*);
-
-/*
- * sleep and wakeup on one-time events.
- * before any calls to notesleep or notewakeup,
- * must call noteclear to initialize the Note.
- * then, exactly one thread can call notesleep
- * and exactly one thread can call notewakeup (once).
- * once notewakeup has been called, the notesleep
- * will return.  future notesleep will return immediately.
- * subsequent noteclear must be called only after
- * previous notesleep has returned, e.g. it's disallowed
- * to call noteclear straight after notewakeup.
- *
- * notetsleep is like notesleep but wakes up after
- * a given number of nanoseconds even if the event
- * has not yet happened.  if a goroutine uses notetsleep to
- * wake up early, it must wait to call noteclear until it
- * can be sure that no other goroutine is calling
- * notewakeup.
- *
- * notesleep/notetsleep are generally called on g0,
- * notetsleepg is similar to notetsleep but is called on user g.
- */
-void	runtime·noteclear(Note*);
-void	runtime·notesleep(Note*);
-void	runtime·notewakeup(Note*);
-bool	runtime·notetsleep(Note*, int64);  // false - timeout
-bool	runtime·notetsleepg(Note*, int64);  // false - timeout
-
-/*
- * low-level synchronization for implementing the above
- */
-uintptr	runtime·semacreate(void);
-int32	runtime·semasleep(int64);
-void	runtime·semawakeup(M*);
-// or
-void	runtime·futexsleep(uint32*, uint32, int64);
-void	runtime·futexwakeup(uint32*, uint32);
-
-/*
- * Mutex-free stack.
- * Initialize uint64 head to 0, compare with 0 to test for emptiness.
- * The stack does not keep pointers to nodes,
- * so they can be garbage collected if there are no other pointers to nodes.
- */
-void	runtime·lfstackpush(uint64 *head, LFNode *node);
-LFNode*	runtime·lfstackpop(uint64 *head);
-
-/*
- * Parallel for over [0, n).
- * body() is executed for each iteration.
- * nthr - total number of worker threads.
- * ctx - arbitrary user context.
- * if wait=true, threads return from parfor() when all work is done;
- * otherwise, threads can return while other threads are still finishing processing.
- */
-ParFor*	runtime·parforalloc(uint32 nthrmax);
-void	runtime·parforsetup(ParFor *desc, uint32 nthr, uint32 n, void *ctx, bool wait, void (*body)(ParFor*, uint32));
-void	runtime·parfordo(ParFor *desc);
-void	runtime·parforiters(ParFor*, uintptr, uintptr*, uintptr*);
-
-/*
- * low level C-called
- */
-// for mmap, we only pass the lower 32 bits of file offset to the 
-// assembly routine; the higher bits (if required), should be provided
-// by the assembly routine as 0.
-uint8*	runtime·mmap(byte*, uintptr, int32, int32, int32, uint32);
-void	runtime·munmap(byte*, uintptr);
-void	runtime·madvise(byte*, uintptr, int32);
-void	runtime·memclr(byte*, uintptr);
-void	runtime·setcallerpc(void*, void*);
-void*	runtime·getcallerpc(void*);
-void	runtime·printbool(bool);
-void	runtime·printbyte(int8);
-void	runtime·printfloat(float64);
-void	runtime·printint(int64);
-void	runtime·printiface(Iface);
-void	runtime·printeface(Eface);
-void	runtime·printstring(String);
-void	runtime·printpc(void*);
-void	runtime·printpointer(void*);
-void	runtime·printuint(uint64);
-void	runtime·printhex(uint64);
-void	runtime·printslice(Slice);
-void	runtime·printcomplex(Complex128);
-
-/*
- * runtime go-called
- */
-void	runtime·gopanic(Eface);
-void	runtime·panicindex(void);
-void	runtime·panicslice(void);
-void	runtime·panicdivide(void);
-
-/*
- * runtime c-called (but written in Go)
- */
-void	runtime·printany(Eface);
-void	runtime·newTypeAssertionError(String*, String*, String*, String*, Eface*);
-void	runtime·fadd64c(uint64, uint64, uint64*);
-void	runtime·fsub64c(uint64, uint64, uint64*);
-void	runtime·fmul64c(uint64, uint64, uint64*);
-void	runtime·fdiv64c(uint64, uint64, uint64*);
-void	runtime·fneg64c(uint64, uint64*);
-void	runtime·f32to64c(uint32, uint64*);
-void	runtime·f64to32c(uint64, uint32*);
-void	runtime·fcmp64c(uint64, uint64, int32*, bool*);
-void	runtime·fintto64c(int64, uint64*);
-void	runtime·f64tointc(uint64, int64*, bool*);
-
-/*
- * wrapped for go users
- */
-float64	runtime·Inf(int32 sign);
-float64	runtime·NaN(void);
-float32	runtime·float32frombits(uint32 i);
-uint32	runtime·float32tobits(float32 f);
-float64	runtime·float64frombits(uint64 i);
-uint64	runtime·float64tobits(float64 f);
-float64	runtime·frexp(float64 d, int32 *ep);
-bool	runtime·isInf(float64 f, int32 sign);
-bool	runtime·isNaN(float64 f);
-float64	runtime·ldexp(float64 d, int32 e);
-float64	runtime·modf(float64 d, float64 *ip);
-void	runtime·semacquire(uint32*, bool);
-void	runtime·semrelease(uint32*);
-int32	runtime·gomaxprocsfunc(int32 n);
-void	runtime·procyield(uint32);
-void	runtime·osyield(void);
-void	runtime·lockOSThread(void);
-void	runtime·unlockOSThread(void);
-
-void	runtime·writebarrierptr_nostore(void*, void*);
-
-bool	runtime·showframe(Func*, G*);
-void	runtime·printcreatedby(G*);
-
-void	runtime·ifaceE2I(InterfaceType*, Eface, Iface*);
-bool	runtime·ifaceE2I2(InterfaceType*, Eface, Iface*);
-uintptr	runtime·memlimit(void);
-
-// float.c
-extern float64 runtime·nan;
-extern float64 runtime·posinf;
-extern float64 runtime·neginf;
-extern uint64 ·nan;
-extern uint64 ·posinf;
-extern uint64 ·neginf;
-#define ISNAN(f) ((f) != (f))
-
-enum
-{
-	UseSpanType = 1,
-};
diff --git a/src/runtime/runtime1.go b/src/runtime/runtime1.go
new file mode 100644
index 0000000..15dea01
--- /dev/null
+++ b/src/runtime/runtime1.go
@@ -0,0 +1,417 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Keep a cached value to make gotraceback fast,
+// since we call it on every call to gentraceback.
+// The cached value is a uint32 in which the low bit
+// is the "crash" setting and the top 31 bits are the
+// gotraceback value.
+var traceback_cache uint32 = 2 << 1
+
+// The GOTRACEBACK environment variable controls the
+// behavior of a Go program that is crashing and exiting.
+//	GOTRACEBACK=0   suppress all tracebacks
+//	GOTRACEBACK=1   default behavior - show tracebacks but exclude runtime frames
+//	GOTRACEBACK=2   show tracebacks including runtime frames
+//	GOTRACEBACK=crash   show tracebacks including runtime frames, then crash (core dump etc)
+//go:nosplit
+func gotraceback(crash *bool) int32 {
+	_g_ := getg()
+	if crash != nil {
+		*crash = false
+	}
+	if _g_.m.traceback != 0 {
+		return int32(_g_.m.traceback)
+	}
+	if crash != nil {
+		*crash = traceback_cache&1 != 0
+	}
+	return int32(traceback_cache >> 1)
+}
+
+var (
+	argc int32
+	argv **byte
+)
+
+// nosplit for use in linux/386 startup linux_setup_vdso
+//go:nosplit
+func argv_index(argv **byte, i int32) *byte {
+	return *(**byte)(add(unsafe.Pointer(argv), uintptr(i)*ptrSize))
+}
+
+func args(c int32, v **byte) {
+	argc = c
+	argv = v
+	sysargs(c, v)
+}
+
+var (
+	// TODO: Retire in favor of GOOS== checks.
+	isplan9   int32
+	issolaris int32
+	iswindows int32
+)
+
+// Information about what cpu features are available.
+// Set on startup in asm_{x86/amd64}.s.
+var (
+//cpuid_ecx uint32
+//cpuid_edx uint32
+)
+
+func goargs() {
+	if GOOS == "windows" {
+		return
+	}
+
+	argslice = make([]string, argc)
+	for i := int32(0); i < argc; i++ {
+		argslice[i] = gostringnocopy(argv_index(argv, i))
+	}
+}
+
+func goenvs_unix() {
+	n := int32(0)
+	for argv_index(argv, argc+1+n) != nil {
+		n++
+	}
+
+	envs = make([]string, n)
+	for i := int32(0); i < n; i++ {
+		envs[i] = gostringnocopy(argv_index(argv, argc+1+i))
+	}
+}
+
+func environ() []string {
+	return envs
+}
+
+func testAtomic64() {
+	var z64, x64 uint64
+
+	z64 = 42
+	x64 = 0
+	// TODO: PREFETCH((unsafe.Pointer)(&z64))
+	if cas64(&z64, x64, 1) {
+		gothrow("cas64 failed")
+	}
+	if x64 != 0 {
+		gothrow("cas64 failed")
+	}
+	x64 = 42
+	if !cas64(&z64, x64, 1) {
+		gothrow("cas64 failed")
+	}
+	if x64 != 42 || z64 != 1 {
+		gothrow("cas64 failed")
+	}
+	if atomicload64(&z64) != 1 {
+		gothrow("load64 failed")
+	}
+	atomicstore64(&z64, (1<<40)+1)
+	if atomicload64(&z64) != (1<<40)+1 {
+		gothrow("store64 failed")
+	}
+	if xadd64(&z64, (1<<40)+1) != (2<<40)+2 {
+		gothrow("xadd64 failed")
+	}
+	if atomicload64(&z64) != (2<<40)+2 {
+		gothrow("xadd64 failed")
+	}
+	if xchg64(&z64, (3<<40)+3) != (2<<40)+2 {
+		gothrow("xchg64 failed")
+	}
+	if atomicload64(&z64) != (3<<40)+3 {
+		gothrow("xchg64 failed")
+	}
+}
+
+func check() {
+	var (
+		a     int8
+		b     uint8
+		c     int16
+		d     uint16
+		e     int32
+		f     uint32
+		g     int64
+		h     uint64
+		i, i1 float32
+		j, j1 float64
+		k, k1 unsafe.Pointer
+		l     *uint16
+		m     [4]byte
+	)
+	type x1t struct {
+		x uint8
+	}
+	type y1t struct {
+		x1 x1t
+		y  uint8
+	}
+	var x1 x1t
+	var y1 y1t
+
+	if unsafe.Sizeof(a) != 1 {
+		gothrow("bad a")
+	}
+	if unsafe.Sizeof(b) != 1 {
+		gothrow("bad b")
+	}
+	if unsafe.Sizeof(c) != 2 {
+		gothrow("bad c")
+	}
+	if unsafe.Sizeof(d) != 2 {
+		gothrow("bad d")
+	}
+	if unsafe.Sizeof(e) != 4 {
+		gothrow("bad e")
+	}
+	if unsafe.Sizeof(f) != 4 {
+		gothrow("bad f")
+	}
+	if unsafe.Sizeof(g) != 8 {
+		gothrow("bad g")
+	}
+	if unsafe.Sizeof(h) != 8 {
+		gothrow("bad h")
+	}
+	if unsafe.Sizeof(i) != 4 {
+		gothrow("bad i")
+	}
+	if unsafe.Sizeof(j) != 8 {
+		gothrow("bad j")
+	}
+	if unsafe.Sizeof(k) != ptrSize {
+		gothrow("bad k")
+	}
+	if unsafe.Sizeof(l) != ptrSize {
+		gothrow("bad l")
+	}
+	if unsafe.Sizeof(x1) != 1 {
+		gothrow("bad unsafe.Sizeof x1")
+	}
+	if unsafe.Offsetof(y1.y) != 1 {
+		gothrow("bad offsetof y1.y")
+	}
+	if unsafe.Sizeof(y1) != 2 {
+		gothrow("bad unsafe.Sizeof y1")
+	}
+
+	if timediv(12345*1000000000+54321, 1000000000, &e) != 12345 || e != 54321 {
+		gothrow("bad timediv")
+	}
+
+	var z uint32
+	z = 1
+	if !cas(&z, 1, 2) {
+		gothrow("cas1")
+	}
+	if z != 2 {
+		gothrow("cas2")
+	}
+
+	z = 4
+	if cas(&z, 5, 6) {
+		gothrow("cas3")
+	}
+	if z != 4 {
+		gothrow("cas4")
+	}
+
+	z = 0xffffffff
+	if !cas(&z, 0xffffffff, 0xfffffffe) {
+		gothrow("cas5")
+	}
+	if z != 0xfffffffe {
+		gothrow("cas6")
+	}
+
+	k = unsafe.Pointer(uintptr(0xfedcb123))
+	if ptrSize == 8 {
+		k = unsafe.Pointer(uintptr(unsafe.Pointer(k)) << 10)
+	}
+	if casp(&k, nil, nil) {
+		gothrow("casp1")
+	}
+	k1 = add(k, 1)
+	if !casp(&k, k, k1) {
+		gothrow("casp2")
+	}
+	if k != k1 {
+		gothrow("casp3")
+	}
+
+	m = [4]byte{1, 1, 1, 1}
+	atomicor8(&m[1], 0xf0)
+	if m[0] != 1 || m[1] != 0xf1 || m[2] != 1 || m[3] != 1 {
+		gothrow("atomicor8")
+	}
+
+	*(*uint64)(unsafe.Pointer(&j)) = ^uint64(0)
+	if j == j {
+		gothrow("float64nan")
+	}
+	if !(j != j) {
+		gothrow("float64nan1")
+	}
+
+	*(*uint64)(unsafe.Pointer(&j1)) = ^uint64(1)
+	if j == j1 {
+		gothrow("float64nan2")
+	}
+	if !(j != j1) {
+		gothrow("float64nan3")
+	}
+
+	*(*uint32)(unsafe.Pointer(&i)) = ^uint32(0)
+	if i == i {
+		gothrow("float32nan")
+	}
+	if i == i {
+		gothrow("float32nan1")
+	}
+
+	*(*uint32)(unsafe.Pointer(&i1)) = ^uint32(1)
+	if i == i1 {
+		gothrow("float32nan2")
+	}
+	if i == i1 {
+		gothrow("float32nan3")
+	}
+
+	testAtomic64()
+
+	if _FixedStack != round2(_FixedStack) {
+		gothrow("FixedStack is not power-of-2")
+	}
+}
+
+type dbgVar struct {
+	name  string
+	value *int32
+}
+
+// Do we report invalid pointers found during stack or heap scans?
+//var invalidptr int32 = 1
+
+var dbgvars = []dbgVar{
+	{"allocfreetrace", &debug.allocfreetrace},
+	{"invalidptr", &invalidptr},
+	{"efence", &debug.efence},
+	{"gctrace", &debug.gctrace},
+	{"gcdead", &debug.gcdead},
+	{"scheddetail", &debug.scheddetail},
+	{"schedtrace", &debug.schedtrace},
+	{"scavenge", &debug.scavenge},
+}
+
+func parsedebugvars() {
+	for p := gogetenv("GODEBUG"); p != ""; {
+		field := ""
+		i := index(p, ",")
+		if i < 0 {
+			field, p = p, ""
+		} else {
+			field, p = p[:i], p[i+1:]
+		}
+		i = index(field, "=")
+		if i < 0 {
+			continue
+		}
+		key, value := field[:i], field[i+1:]
+		for _, v := range dbgvars {
+			if v.name == key {
+				*v.value = int32(goatoi(value))
+			}
+		}
+	}
+
+	switch p := gogetenv("GOTRACEBACK"); p {
+	case "":
+		traceback_cache = 1 << 1
+	case "crash":
+		traceback_cache = 2<<1 | 1
+	default:
+		traceback_cache = uint32(goatoi(p)) << 1
+	}
+}
+
+// Poor mans 64-bit division.
+// This is a very special function, do not use it if you are not sure what you are doing.
+// int64 division is lowered into _divv() call on 386, which does not fit into nosplit functions.
+// Handles overflow in a time-specific manner.
+//go:nosplit
+func timediv(v int64, div int32, rem *int32) int32 {
+	res := int32(0)
+	for bit := 30; bit >= 0; bit-- {
+		if v >= int64(div)<<uint(bit) {
+			v = v - (int64(div) << uint(bit))
+			res += 1 << uint(bit)
+		}
+	}
+	if v >= int64(div) {
+		if rem != nil {
+			*rem = 0
+		}
+		return 0x7fffffff
+	}
+	if rem != nil {
+		*rem = int32(v)
+	}
+	return res
+}
+
+// Helpers for Go. Must be NOSPLIT, must only call NOSPLIT functions, and must not block.
+
+//go:nosplit
+func acquirem() *m {
+	_g_ := getg()
+	_g_.m.locks++
+	return _g_.m
+}
+
+//go:nosplit
+func releasem(mp *m) {
+	_g_ := getg()
+	mp.locks--
+	if mp.locks == 0 && _g_.preempt {
+		// restore the preemption request in case we've cleared it in newstack
+		_g_.stackguard0 = stackPreempt
+	}
+}
+
+//go:nosplit
+func gomcache() *mcache {
+	return getg().m.mcache
+}
+
+var typelink, etypelink [0]byte
+
+//go:nosplit
+func typelinks() []*_type {
+	var ret []*_type
+	sp := (*slice)(unsafe.Pointer(&ret))
+	sp.array = (*byte)(unsafe.Pointer(&typelink))
+	sp.len = uint((uintptr(unsafe.Pointer(&etypelink)) - uintptr(unsafe.Pointer(&typelink))) / unsafe.Sizeof(ret[0]))
+	sp.cap = sp.len
+	return ret
+}
+
+// TODO: move back into mgc0.c when converted to Go
+func readgogc() int32 {
+	p := gogetenv("GOGC")
+	if p == "" {
+		return 100
+	}
+	if p == "off" {
+		return -1
+	}
+	return int32(goatoi(p))
+}
diff --git a/src/runtime/runtime2.go b/src/runtime/runtime2.go
new file mode 100644
index 0000000..7625a2d
--- /dev/null
+++ b/src/runtime/runtime2.go
@@ -0,0 +1,613 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+/*
+ * defined constants
+ */
+const (
+	// G status
+	//
+	// If you add to this list, add to the list
+	// of "okay during garbage collection" status
+	// in mgc0.c too.
+	_Gidle            = iota // 0
+	_Grunnable               // 1 runnable and on a run queue
+	_Grunning                // 2
+	_Gsyscall                // 3
+	_Gwaiting                // 4
+	_Gmoribund_unused        // 5 currently unused, but hardcoded in gdb scripts
+	_Gdead                   // 6
+	_Genqueue                // 7 Only the Gscanenqueue is used.
+	_Gcopystack              // 8 in this state when newstack is moving the stack
+	// the following encode that the GC is scanning the stack and what to do when it is done
+	_Gscan = 0x1000 // atomicstatus&~Gscan = the non-scan state,
+	// _Gscanidle =     _Gscan + _Gidle,      // Not used. Gidle only used with newly malloced gs
+	_Gscanrunnable = _Gscan + _Grunnable //  0x1001 When scanning complets make Grunnable (it is already on run queue)
+	_Gscanrunning  = _Gscan + _Grunning  //  0x1002 Used to tell preemption newstack routine to scan preempted stack.
+	_Gscansyscall  = _Gscan + _Gsyscall  //  0x1003 When scanning completes make is Gsyscall
+	_Gscanwaiting  = _Gscan + _Gwaiting  //  0x1004 When scanning completes make it Gwaiting
+	// _Gscanmoribund_unused,               //  not possible
+	// _Gscandead,                          //  not possible
+	_Gscanenqueue = _Gscan + _Genqueue //  When scanning completes make it Grunnable and put on runqueue
+)
+
+const (
+	// P status
+	_Pidle = iota
+	_Prunning
+	_Psyscall
+	_Pgcstop
+	_Pdead
+)
+
+// XXX inserting below here
+
+type mutex struct {
+	// Futex-based impl treats it as uint32 key,
+	// while sema-based impl as M* waitm.
+	// Used to be a union, but unions break precise GC.
+	key uintptr
+}
+
+type note struct {
+	// Futex-based impl treats it as uint32 key,
+	// while sema-based impl as M* waitm.
+	// Used to be a union, but unions break precise GC.
+	key uintptr
+}
+
+type _string struct {
+	str *byte
+	len int
+}
+
+type funcval struct {
+	fn uintptr
+	// variable-size, fn-specific data here
+}
+
+type iface struct {
+	tab  *itab
+	data unsafe.Pointer
+}
+
+type eface struct {
+	_type *_type
+	data  unsafe.Pointer
+}
+
+type slice struct {
+	array *byte // actual data
+	len   uint  // number of elements
+	cap   uint  // allocated number of elements
+}
+
+type gobuf struct {
+	// The offsets of sp, pc, and g are known to (hard-coded in) libmach.
+	sp   uintptr
+	pc   uintptr
+	g    *g
+	ctxt unsafe.Pointer // this has to be a pointer so that gc scans it
+	ret  uintreg
+	lr   uintptr
+}
+
+// Known to compiler.
+// Changes here must also be made in src/cmd/gc/select.c's selecttype.
+type sudog struct {
+	g           *g
+	selectdone  *uint32
+	next        *sudog
+	prev        *sudog
+	elem        unsafe.Pointer // data element
+	releasetime int64
+	nrelease    int32  // -1 for acquire
+	waitlink    *sudog // g.waiting list
+}
+
+type gcstats struct {
+	// the struct must consist of only uint64's,
+	// because it is casted to uint64[].
+	nhandoff    uint64
+	nhandoffcnt uint64
+	nprocyield  uint64
+	nosyield    uint64
+	nsleep      uint64
+}
+
+type libcall struct {
+	fn   uintptr
+	n    uintptr // number of parameters
+	args uintptr // parameters
+	r1   uintptr // return values
+	r2   uintptr
+	err  uintptr // error number
+}
+
+// describes how to handle callback
+type wincallbackcontext struct {
+	gobody       unsafe.Pointer // go function to call
+	argsize      uintptr        // callback arguments size (in bytes)
+	restorestack uintptr        // adjust stack on return by (in bytes) (386 only)
+	cleanstack   bool
+}
+
+// Stack describes a Go execution stack.
+// The bounds of the stack are exactly [lo, hi),
+// with no implicit data structures on either side.
+type stack struct {
+	lo uintptr
+	hi uintptr
+}
+
+type g struct {
+	// Stack parameters.
+	// stack describes the actual stack memory: [stack.lo, stack.hi).
+	// stackguard0 is the stack pointer compared in the Go stack growth prologue.
+	// It is stack.lo+StackGuard normally, but can be StackPreempt to trigger a preemption.
+	// stackguard1 is the stack pointer compared in the C stack growth prologue.
+	// It is stack.lo+StackGuard on g0 and gsignal stacks.
+	// It is ~0 on other goroutine stacks, to trigger a call to morestackc (and crash).
+	stack       stack   // offset known to runtime/cgo
+	stackguard0 uintptr // offset known to liblink
+	stackguard1 uintptr // offset known to liblink
+
+	_panic       *_panic // innermost panic - offset known to liblink
+	_defer       *_defer // innermost defer
+	sched        gobuf
+	syscallsp    uintptr        // if status==gsyscall, syscallsp = sched.sp to use during gc
+	syscallpc    uintptr        // if status==gsyscall, syscallpc = sched.pc to use during gc
+	param        unsafe.Pointer // passed parameter on wakeup
+	atomicstatus uint32
+	goid         int64
+	waitsince    int64  // approx time when the g become blocked
+	waitreason   string // if status==gwaiting
+	schedlink    *g
+	issystem     bool // do not output in stack dump, ignore in deadlock detector
+	preempt      bool // preemption signal, duplicates stackguard0 = stackpreempt
+	paniconfault bool // panic (instead of crash) on unexpected fault address
+	preemptscan  bool // preempted g does scan for gc
+	gcworkdone   bool // debug: cleared at begining of gc work phase cycle, set by gcphasework, tested at end of cycle
+	throwsplit   bool // must not split stack
+	raceignore   int8 // ignore race detection events
+	m            *m   // for debuggers, but offset not hard-coded
+	lockedm      *m
+	sig          uint32
+	writebuf     []byte
+	sigcode0     uintptr
+	sigcode1     uintptr
+	sigpc        uintptr
+	gopc         uintptr // pc of go statement that created this goroutine
+	racectx      uintptr
+	waiting      *sudog // sudog structures this g is waiting on (that have a valid elem ptr)
+	end          [0]byte
+}
+
+type mts struct {
+	tv_sec  int64
+	tv_nsec int64
+}
+
+type mscratch struct {
+	v [6]uintptr
+}
+
+type m struct {
+	g0      *g    // goroutine with scheduling stack
+	morebuf gobuf // gobuf arg to morestack
+
+	// Fields not known to debuggers.
+	procid        uint64         // for debuggers, but offset not hard-coded
+	gsignal       *g             // signal-handling g
+	tls           [4]uintptr     // thread-local storage (for x86 extern register)
+	mstartfn      unsafe.Pointer // todo go func()
+	curg          *g             // current running goroutine
+	caughtsig     *g             // goroutine running during fatal signal
+	p             *p             // attached p for executing go code (nil if not executing go code)
+	nextp         *p
+	id            int32
+	mallocing     int32
+	throwing      int32
+	gcing         int32
+	locks         int32
+	softfloat     int32
+	dying         int32
+	profilehz     int32
+	helpgc        int32
+	spinning      bool // m is out of work and is actively looking for work
+	blocked       bool // m is blocked on a note
+	inwb          bool // m is executing a write barrier
+	printlock     int8
+	fastrand      uint32
+	ncgocall      uint64 // number of cgo calls in total
+	ncgo          int32  // number of cgo calls currently in progress
+	cgomal        *cgomal
+	park          note
+	alllink       *m // on allm
+	schedlink     *m
+	machport      uint32 // return address for mach ipc (os x)
+	mcache        *mcache
+	lockedg       *g
+	createstack   [32]uintptr // stack that created this thread.
+	freglo        [16]uint32  // d[i] lsb and f[i]
+	freghi        [16]uint32  // d[i] msb and f[i+16]
+	fflag         uint32      // floating point compare flags
+	locked        uint32      // tracking for lockosthread
+	nextwaitm     *m          // next m waiting for lock
+	waitsema      uintptr     // semaphore for parking on locks
+	waitsemacount uint32
+	waitsemalock  uint32
+	gcstats       gcstats
+	needextram    bool
+	traceback     uint8
+	waitunlockf   unsafe.Pointer // todo go func(*g, unsafe.pointer) bool
+	waitlock      unsafe.Pointer
+	//#ifdef GOOS_windows
+	thread uintptr // thread handle
+	// these are here because they are too large to be on the stack
+	// of low-level NOSPLIT functions.
+	libcall   libcall
+	libcallpc uintptr // for cpu profiler
+	libcallsp uintptr
+	libcallg  *g
+	//#endif
+	//#ifdef GOOS_solaris
+	perrno *int32 // pointer to tls errno
+	// these are here because they are too large to be on the stack
+	// of low-level NOSPLIT functions.
+	//LibCall	libcall;
+	ts      mts
+	scratch mscratch
+	//#endif
+	//#ifdef GOOS_plan9
+	notesig *int8
+	errstr  *byte
+	//#endif
+	end [0]byte
+}
+
+type p struct {
+	lock mutex
+
+	id          int32
+	status      uint32 // one of pidle/prunning/...
+	link        *p
+	schedtick   uint32 // incremented on every scheduler call
+	syscalltick uint32 // incremented on every system call
+	m           *m     // back-link to associated m (nil if idle)
+	mcache      *mcache
+	deferpool   [5]*_defer // pool of available defer structs of different sizes (see panic.c)
+
+	// Cache of goroutine ids, amortizes accesses to runtime·sched.goidgen.
+	goidcache    uint64
+	goidcacheend uint64
+
+	// Queue of runnable goroutines.
+	runqhead uint32
+	runqtail uint32
+	runq     [256]*g
+
+	// Available G's (status == Gdead)
+	gfree    *g
+	gfreecnt int32
+
+	pad [64]byte
+}
+
+const (
+	// The max value of GOMAXPROCS.
+	// There are no fundamental restrictions on the value.
+	_MaxGomaxprocs = 1 << 8
+)
+
+type schedt struct {
+	lock mutex
+
+	goidgen uint64
+
+	midle        *m    // idle m's waiting for work
+	nmidle       int32 // number of idle m's waiting for work
+	nmidlelocked int32 // number of locked m's waiting for work
+	mcount       int32 // number of m's that have been created
+	maxmcount    int32 // maximum number of m's allowed (or die)
+
+	pidle      *p // idle p's
+	npidle     uint32
+	nmspinning uint32
+
+	// Global runnable queue.
+	runqhead *g
+	runqtail *g
+	runqsize int32
+
+	// Global cache of dead G's.
+	gflock mutex
+	gfree  *g
+	ngfree int32
+
+	gcwaiting  uint32 // gc is waiting to run
+	stopwait   int32
+	stopnote   note
+	sysmonwait uint32
+	sysmonnote note
+	lastpoll   uint64
+
+	profilehz int32 // cpu profiling rate
+}
+
+// The m->locked word holds two pieces of state counting active calls to LockOSThread/lockOSThread.
+// The low bit (LockExternal) is a boolean reporting whether any LockOSThread call is active.
+// External locks are not recursive; a second lock is silently ignored.
+// The upper bits of m->lockedcount record the nesting depth of calls to lockOSThread
+// (counting up by LockInternal), popped by unlockOSThread (counting down by LockInternal).
+// Internal locks can be recursive. For instance, a lock for cgo can occur while the main
+// goroutine is holding the lock during the initialization phase.
+const (
+	_LockExternal = 1
+	_LockInternal = 2
+)
+
+type sigtabtt struct {
+	flags int32
+	name  *int8
+}
+
+const (
+	_SigNotify   = 1 << 0 // let signal.Notify have signal, even if from kernel
+	_SigKill     = 1 << 1 // if signal.Notify doesn't take it, exit quietly
+	_SigThrow    = 1 << 2 // if signal.Notify doesn't take it, exit loudly
+	_SigPanic    = 1 << 3 // if the signal is from the kernel, panic
+	_SigDefault  = 1 << 4 // if the signal isn't explicitly requested, don't monitor it
+	_SigHandling = 1 << 5 // our signal handler is registered
+	_SigIgnored  = 1 << 6 // the signal was ignored before we registered for it
+	_SigGoExit   = 1 << 7 // cause all runtime procs to exit (only used on Plan 9).
+)
+
+// Layout of in-memory per-function information prepared by linker
+// See http://golang.org/s/go12symtab.
+// Keep in sync with linker and with ../../libmach/sym.c
+// and with package debug/gosym and with symtab.go in package runtime.
+type _func struct {
+	entry   uintptr // start pc
+	nameoff int32   // function name
+
+	args  int32 // in/out args size
+	frame int32 // legacy frame size; use pcsp if possible
+
+	pcsp      int32
+	pcfile    int32
+	pcln      int32
+	npcdata   int32
+	nfuncdata int32
+}
+
+// layout of Itab known to compilers
+// allocated in non-garbage-collected memory
+type itab struct {
+	inter  *interfacetype
+	_type  *_type
+	link   *itab
+	bad    int32
+	unused int32
+	fun    [0]uintptr
+}
+
+const (
+	// TODO: Generate in cmd/dist.
+	_NaCl    = 0
+	_Windows = 0
+	_Solaris = 0
+	_Plan9   = 0
+)
+
+// Lock-free stack node.
+// // Also known to export_test.go.
+type lfnode struct {
+	next    uint64
+	pushcnt uintptr
+}
+
+// Parallel for descriptor.
+type parfor struct {
+	body    unsafe.Pointer // go func(*parfor, uint32), executed for each element
+	done    uint32         // number of idle threads
+	nthr    uint32         // total number of threads
+	nthrmax uint32         // maximum number of threads
+	thrseq  uint32         // thread id sequencer
+	cnt     uint32         // iteration space [0, cnt)
+	ctx     unsafe.Pointer // arbitrary user context
+	wait    bool           // if true, wait while all threads finish processing,
+	// otherwise parfor may return while other threads are still working
+	thr *parforthread // array of thread descriptors
+	pad uint32        // to align parforthread.pos for 64-bit atomic operations
+	// stats
+	nsteal     uint64
+	nstealcnt  uint64
+	nprocyield uint64
+	nosyield   uint64
+	nsleep     uint64
+}
+
+// Track memory allocated by code not written in Go during a cgo call,
+// so that the garbage collector can see them.
+type cgomal struct {
+	next  *cgomal
+	alloc unsafe.Pointer
+}
+
+// Holds variables parsed from GODEBUG env var.
+type debugvars struct {
+	allocfreetrace int32
+	efence         int32
+	gctrace        int32
+	gcdead         int32
+	scheddetail    int32
+	schedtrace     int32
+	scavenge       int32
+}
+
+// Indicates to write barrier and sychronization task to preform.
+const (
+	_GCoff             = iota // GC not running, write barrier disabled
+	_GCquiesce                // unused state
+	_GCstw                    // unused state
+	_GCscan                   // GC collecting roots into workbufs, write barrier disabled
+	_GCmark                   // GC marking from workbufs, write barrier ENABLED
+	_GCmarktermination        // GC mark termination: allocate black, P's help GC, write barrier ENABLED
+	_GCsweep                  // GC mark completed; sweeping in background, write barrier disabled
+)
+
+type forcegcstate struct {
+	lock mutex
+	g    *g
+	idle uint32
+}
+
+var gcphase uint32
+
+/*
+ * known to compiler
+ */
+const (
+	_Structrnd = regSize
+)
+
+var startup_random_data *byte
+var startup_random_data_len uint32
+
+var invalidptr int32
+
+const (
+	// hashinit wants this many random bytes
+	_HashRandomBytes = 32
+)
+
+/*
+ * deferred subroutine calls
+ */
+type _defer struct {
+	siz     int32
+	started bool
+	argp    uintptr // where args were copied from
+	pc      uintptr
+	fn      *funcval
+	_panic  *_panic // panic that is running defer
+	link    *_defer
+}
+
+/*
+ * panics
+ */
+type _panic struct {
+	argp      unsafe.Pointer // pointer to arguments of deferred call run during panic; cannot move - known to liblink
+	arg       interface{}    // argument to panic
+	link      *_panic        // link to earlier panic
+	recovered bool           // whether this panic is over
+	aborted   bool           // the panic was aborted
+}
+
+/*
+ * stack traces
+ */
+
+type stkframe struct {
+	fn       *_func     // function being run
+	pc       uintptr    // program counter within fn
+	continpc uintptr    // program counter where execution can continue, or 0 if not
+	lr       uintptr    // program counter at caller aka link register
+	sp       uintptr    // stack pointer at pc
+	fp       uintptr    // stack pointer at caller aka frame pointer
+	varp     uintptr    // top of local variables
+	argp     uintptr    // pointer to function arguments
+	arglen   uintptr    // number of bytes at argp
+	argmap   *bitvector // force use of this argmap
+}
+
+const (
+	_TraceRuntimeFrames = 1 << 0 // include frames for internal runtime functions.
+	_TraceTrap          = 1 << 1 // the initial PC, SP are from a trap, not a return PC from a call
+)
+
+const (
+	// The maximum number of frames we print for a traceback
+	_TracebackMaxFrames = 100
+)
+
+var (
+	emptystring string
+	allg        **g
+	allglen     uintptr
+	lastg       *g
+	allm        *m
+	allp        [_MaxGomaxprocs + 1]*p
+	gomaxprocs  int32
+	needextram  uint32
+	panicking   uint32
+	goos        *int8
+	ncpu        int32
+	iscgo       bool
+	cpuid_ecx   uint32
+	cpuid_edx   uint32
+	debug       debugvars
+	signote     note
+	forcegc     forcegcstate
+	sched       schedt
+	newprocs    int32
+)
+
+/*
+ * mutual exclusion locks.  in the uncontended case,
+ * as fast as spin locks (just a few user-level instructions),
+ * but on the contention path they sleep in the kernel.
+ * a zeroed Mutex is unlocked (no need to initialize each lock).
+ */
+
+/*
+ * sleep and wakeup on one-time events.
+ * before any calls to notesleep or notewakeup,
+ * must call noteclear to initialize the Note.
+ * then, exactly one thread can call notesleep
+ * and exactly one thread can call notewakeup (once).
+ * once notewakeup has been called, the notesleep
+ * will return.  future notesleep will return immediately.
+ * subsequent noteclear must be called only after
+ * previous notesleep has returned, e.g. it's disallowed
+ * to call noteclear straight after notewakeup.
+ *
+ * notetsleep is like notesleep but wakes up after
+ * a given number of nanoseconds even if the event
+ * has not yet happened.  if a goroutine uses notetsleep to
+ * wake up early, it must wait to call noteclear until it
+ * can be sure that no other goroutine is calling
+ * notewakeup.
+ *
+ * notesleep/notetsleep are generally called on g0,
+ * notetsleepg is similar to notetsleep but is called on user g.
+ */
+// bool	runtime·notetsleep(Note*, int64);  // false - timeout
+// bool	runtime·notetsleepg(Note*, int64);  // false - timeout
+
+/*
+ * Lock-free stack.
+ * Initialize uint64 head to 0, compare with 0 to test for emptiness.
+ * The stack does not keep pointers to nodes,
+ * so they can be garbage collected if there are no other pointers to nodes.
+ */
+
+/*
+ * Parallel for over [0, n).
+ * body() is executed for each iteration.
+ * nthr - total number of worker threads.
+ * ctx - arbitrary user context.
+ * if wait=true, threads return from parfor() when all work is done;
+ * otherwise, threads can return while other threads are still finishing processing.
+ */
+
+// for mmap, we only pass the lower 32 bits of file offset to the
+// assembly routine; the higher bits (if required), should be provided
+// by the assembly routine as 0.
diff --git a/src/runtime/runtime2_windows.go b/src/runtime/runtime2_windows.go
new file mode 100644
index 0000000..80fc386
--- /dev/null
+++ b/src/runtime/runtime2_windows.go
@@ -0,0 +1,8 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+// TODO(brainman): move generation of zsys_windows_*.s out from cmd/dist/buildruntime.c and into here
+const cb_max = 2000 // maximum number of windows callbacks allowed (must be in sync with MAXWINCB from cmd/dist/buildruntime.c)
diff --git a/src/runtime/select.go b/src/runtime/select.go
index d703e1d..e918b73 100644
--- a/src/runtime/select.go
+++ b/src/runtime/select.go
@@ -167,8 +167,8 @@
 	}
 }
 
-func selparkcommit(gp *g, sel *_select) bool {
-	selunlock(sel)
+func selparkcommit(gp *g, sel unsafe.Pointer) bool {
+	selunlock((*_select)(sel))
 	return true
 }
 
@@ -363,7 +363,7 @@
 
 	// wait for someone to wake us up
 	gp.param = nil
-	gopark(unsafe.Pointer(funcPC(selparkcommit)), unsafe.Pointer(sel), "select")
+	gopark(selparkcommit, unsafe.Pointer(sel), "select")
 
 	// someone woke us up
 	sellock(sel)
diff --git a/src/runtime/signal.c b/src/runtime/signal.c
deleted file mode 100644
index 0674bfb..0000000
--- a/src/runtime/signal.c
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2014 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-
-void
-runtime·sigenable_m(void)
-{
-	uint32 s;
-	
-	s = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-	runtime·sigenable(s);
-}
-
-void
-runtime·sigdisable_m(void)
-{
-	uint32 s;
-	
-	s = g->m->scalararg[0];
-	g->m->scalararg[0] = 0;
-	runtime·sigdisable(s);
-}
diff --git a/src/runtime/signal1_unix.go b/src/runtime/signal1_unix.go
new file mode 100644
index 0000000..25f01e0
--- /dev/null
+++ b/src/runtime/signal1_unix.go
@@ -0,0 +1,111 @@
+// Copyright 2012 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux netbsd openbsd solaris
+
+package runtime
+
+const (
+	_SIG_DFL uintptr = 0
+	_SIG_IGN uintptr = 1
+)
+
+func initsig() {
+	// _NSIG is the number of signals on this operating system.
+	// sigtable should describe what to do for all the possible signals.
+	if len(sigtable) != _NSIG {
+		print("runtime: len(sigtable)=", len(sigtable), " _NSIG=", _NSIG, "\n")
+		gothrow("initsig")
+	}
+
+	// First call: basic setup.
+	for i := int32(0); i < _NSIG; i++ {
+		t := &sigtable[i]
+		if t.flags == 0 || t.flags&_SigDefault != 0 {
+			continue
+		}
+
+		// For some signals, we respect an inherited SIG_IGN handler
+		// rather than insist on installing our own default handler.
+		// Even these signals can be fetched using the os/signal package.
+		switch i {
+		case _SIGHUP, _SIGINT:
+			if getsig(i) == _SIG_IGN {
+				t.flags = _SigNotify | _SigIgnored
+				continue
+			}
+		}
+
+		t.flags |= _SigHandling
+		setsig(i, funcPC(sighandler), true)
+	}
+}
+
+func sigenable(sig uint32) {
+	if sig >= uint32(len(sigtable)) {
+		return
+	}
+
+	t := &sigtable[sig]
+	if t.flags&_SigNotify != 0 && t.flags&_SigHandling == 0 {
+		t.flags |= _SigHandling
+		if getsig(int32(sig)) == _SIG_IGN {
+			t.flags |= _SigIgnored
+		}
+		setsig(int32(sig), funcPC(sighandler), true)
+	}
+}
+
+func sigdisable(sig uint32) {
+	if sig >= uint32(len(sigtable)) {
+		return
+	}
+
+	t := &sigtable[sig]
+	if t.flags&_SigNotify != 0 && t.flags&_SigHandling != 0 {
+		t.flags &^= _SigHandling
+		if t.flags&_SigIgnored != 0 {
+			setsig(int32(sig), _SIG_IGN, true)
+		} else {
+			setsig(int32(sig), _SIG_DFL, true)
+		}
+	}
+}
+
+func resetcpuprofiler(hz int32) {
+	var it itimerval
+	if hz == 0 {
+		setitimer(_ITIMER_PROF, &it, nil)
+	} else {
+		it.it_interval.tv_sec = 0
+		it.it_interval.set_usec(1000000 / hz)
+		it.it_value = it.it_interval
+		setitimer(_ITIMER_PROF, &it, nil)
+	}
+	_g_ := getg()
+	_g_.m.profilehz = hz
+}
+
+func sigpipe() {
+	setsig(_SIGPIPE, _SIG_DFL, false)
+	raise(_SIGPIPE)
+}
+
+func crash() {
+	if GOOS == "darwin" {
+		// OS X core dumps are linear dumps of the mapped memory,
+		// from the first virtual byte to the last, with zeros in the gaps.
+		// Because of the way we arrange the address space on 64-bit systems,
+		// this means the OS X core file will be >128 GB and even on a zippy
+		// workstation can take OS X well over an hour to write (uninterruptible).
+		// Save users from making that mistake.
+		if ptrSize == 8 {
+			return
+		}
+	}
+
+	unblocksignals()
+	setsig(_SIGABRT, _SIG_DFL, false)
+	raise(_SIGABRT)
+}
diff --git a/src/runtime/signal_386.c b/src/runtime/signal_386.c
deleted file mode 100644
index 30a7488..0000000
--- a/src/runtime/signal_386.c
+++ /dev/null
@@ -1,122 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux nacl netbsd openbsd
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_GOOS_GOARCH.h"
-#include "signals_GOOS.h"
-
-void
-runtime·dumpregs(Siginfo *info, void *ctxt)
-{
-	USED(info);
-	USED(ctxt);
-	
-	runtime·printf("eax     %x\n", SIG_EAX(info, ctxt));
-	runtime·printf("ebx     %x\n", SIG_EBX(info, ctxt));
-	runtime·printf("ecx     %x\n", SIG_ECX(info, ctxt));
-	runtime·printf("edx     %x\n", SIG_EDX(info, ctxt));
-	runtime·printf("edi     %x\n", SIG_EDI(info, ctxt));
-	runtime·printf("esi     %x\n", SIG_ESI(info, ctxt));
-	runtime·printf("ebp     %x\n", SIG_EBP(info, ctxt));
-	runtime·printf("esp     %x\n", SIG_ESP(info, ctxt));
-	runtime·printf("eip     %x\n", SIG_EIP(info, ctxt));
-	runtime·printf("eflags  %x\n", SIG_EFLAGS(info, ctxt));
-	runtime·printf("cs      %x\n", SIG_CS(info, ctxt));
-	runtime·printf("fs      %x\n", SIG_FS(info, ctxt));
-	runtime·printf("gs      %x\n", SIG_GS(info, ctxt));
-}
-
-void
-runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
-{
-	uintptr *sp;
-	SigTab *t;
-	bool crash;
-
-	if(sig == SIGPROF) {
-		runtime·sigprof((byte*)SIG_EIP(info, ctxt), (byte*)SIG_ESP(info, ctxt), nil, gp, g->m);
-		return;
-	}
-
-	t = &runtime·sigtab[sig];
-	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
-		// Make it look like a call to the signal func.
-		// Have to pass arguments out of band since
-		// augmenting the stack frame would break
-		// the unwinding code.
-		gp->sig = sig;
-		gp->sigcode0 = SIG_CODE0(info, ctxt);
-		gp->sigcode1 = SIG_CODE1(info, ctxt);
-		gp->sigpc = SIG_EIP(info, ctxt);
-
-#ifdef GOOS_darwin
-		// Work around Leopard bug that doesn't set FPE_INTDIV.
-		// Look at instruction to see if it is a divide.
-		// Not necessary in Snow Leopard (si_code will be != 0).
-		if(sig == SIGFPE && gp->sigcode0 == 0) {
-			byte *pc;
-			pc = (byte*)gp->sigpc;
-			if(pc[0] == 0x66)	// 16-bit instruction prefix
-				pc++;
-			if(pc[0] == 0xF6 || pc[0] == 0xF7)
-				gp->sigcode0 = FPE_INTDIV;
-		}
-#endif
-
-		// Only push runtime·sigpanic if eip != 0.
-		// If eip == 0, probably panicked because of a
-		// call to a nil func.  Not pushing that onto sp will
-		// make the trace look like a call to runtime·sigpanic instead.
-		// (Otherwise the trace will end at runtime·sigpanic and we
-		// won't get to see who faulted.)
-		if(SIG_EIP(info, ctxt) != 0) {
-			sp = (uintptr*)SIG_ESP(info, ctxt);
-			*--sp = SIG_EIP(info, ctxt);
-			SIG_ESP(info, ctxt) = (uintptr)sp;
-		}
-		SIG_EIP(info, ctxt) = (uintptr)runtime·sigpanic;
-		return;
-	}
-
-	if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
-		if(runtime·sigsend(sig))
-			return;
-	if(t->flags & SigKill)
-		runtime·exit(2);
-	if(!(t->flags & SigThrow))
-		return;
-
-	g->m->throwing = 1;
-	g->m->caughtsig = gp;
-	runtime·startpanic();
-
-	if(sig < 0 || sig >= NSIG)
-		runtime·printf("Signal %d\n", sig);
-	else
-		runtime·printf("%s\n", runtime·sigtab[sig].name);
-
-	runtime·printf("PC=%x\n", SIG_EIP(info, ctxt));
-	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
-		runtime·printf("signal arrived during cgo execution\n");
-		gp = g->m->lockedg;
-	}
-	runtime·printf("\n");
-
-	if(runtime·gotraceback(&crash)){
-		runtime·goroutineheader(gp);
-		runtime·tracebacktrap(SIG_EIP(info, ctxt), SIG_ESP(info, ctxt), 0, gp);
-		runtime·tracebackothers(gp);
-		runtime·printf("\n");
-		runtime·dumpregs(info, ctxt);
-	}
-	
-	if(crash)
-		runtime·crash();
-
-	runtime·exit(2);
-}
diff --git a/src/runtime/signal_386.go b/src/runtime/signal_386.go
new file mode 100644
index 0000000..5336a43
--- /dev/null
+++ b/src/runtime/signal_386.go
@@ -0,0 +1,131 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd
+
+package runtime
+
+import "unsafe"
+
+func dumpregs(c *sigctxt) {
+	print("eax    ", hex(c.eax()), "\n")
+	print("ebx    ", hex(c.ebx()), "\n")
+	print("ecx    ", hex(c.ecx()), "\n")
+	print("edx    ", hex(c.edx()), "\n")
+	print("edi    ", hex(c.edi()), "\n")
+	print("esi    ", hex(c.esi()), "\n")
+	print("ebp    ", hex(c.ebp()), "\n")
+	print("esp    ", hex(c.esp()), "\n")
+	print("eip    ", hex(c.eip()), "\n")
+	print("eflags ", hex(c.eflags()), "\n")
+	print("cs     ", hex(c.cs()), "\n")
+	print("fs     ", hex(c.fs()), "\n")
+	print("gs     ", hex(c.gs()), "\n")
+}
+
+func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
+	_g_ := getg()
+	c := &sigctxt{info, ctxt}
+
+	if sig == _SIGPROF {
+		sigprof((*byte)(unsafe.Pointer(uintptr(c.eip()))), (*byte)(unsafe.Pointer(uintptr(c.esp()))), nil, gp, _g_.m)
+		return
+	}
+
+	flags := int32(_SigThrow)
+	if sig < uint32(len(sigtable)) {
+		flags = sigtable[sig].flags
+	}
+	if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp.sig = sig
+		gp.sigcode0 = uintptr(c.sigcode())
+		gp.sigcode1 = uintptr(c.sigaddr())
+		gp.sigpc = uintptr(c.eip())
+
+		if GOOS == "darwin" {
+			// Work around Leopard bug that doesn't set FPE_INTDIV.
+			// Look at instruction to see if it is a divide.
+			// Not necessary in Snow Leopard (si_code will be != 0).
+			if sig == _SIGFPE && gp.sigcode0 == 0 {
+				pc := (*[4]byte)(unsafe.Pointer(gp.sigpc))
+				i := 0
+				if pc[i] == 0x66 { // 16-bit instruction prefix
+					i++
+				}
+				if pc[i] == 0xF6 || pc[i] == 0xF7 {
+					gp.sigcode0 = _FPE_INTDIV
+				}
+			}
+		}
+
+		// Only push runtime.sigpanic if rip != 0.
+		// If rip == 0, probably panicked because of a
+		// call to a nil func.  Not pushing that onto sp will
+		// make the trace look like a call to runtime.sigpanic instead.
+		// (Otherwise the trace will end at runtime.sigpanic and we
+		// won't get to see who faulted.)
+		if c.eip() != 0 {
+			sp := c.esp()
+			if regSize > ptrSize {
+				sp -= ptrSize
+				*(*uintptr)(unsafe.Pointer(uintptr(sp))) = 0
+			}
+			sp -= ptrSize
+			*(*uintptr)(unsafe.Pointer(uintptr(sp))) = uintptr(c.eip())
+			c.set_esp(sp)
+		}
+		c.set_eip(uint32(funcPC(sigpanic)))
+		return
+	}
+
+	if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+		if sigsend(sig) {
+			return
+		}
+	}
+
+	if flags&_SigKill != 0 {
+		exit(2)
+	}
+
+	if flags&_SigThrow == 0 {
+		return
+	}
+
+	_g_.m.throwing = 1
+	_g_.m.caughtsig = gp
+	startpanic()
+
+	if sig < uint32(len(sigtable)) {
+		print(sigtable[sig].name, "\n")
+	} else {
+		print("Signal ", sig, "\n")
+	}
+
+	print("PC=", hex(c.eip()), "\n")
+	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+		print("signal arrived during cgo execution\n")
+		gp = _g_.m.lockedg
+	}
+	print("\n")
+
+	var docrash bool
+	if gotraceback(&docrash) > 0 {
+		goroutineheader(gp)
+		tracebacktrap(uintptr(c.eip()), uintptr(c.esp()), 0, gp)
+		tracebackothers(gp)
+		print("\n")
+		dumpregs(c)
+	}
+
+	if docrash {
+		crash()
+	}
+
+	exit(2)
+}
diff --git a/src/runtime/signal_amd64x.c b/src/runtime/signal_amd64x.c
deleted file mode 100644
index feb4afc..0000000
--- a/src/runtime/signal_amd64x.c
+++ /dev/null
@@ -1,156 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64 amd64p32
-// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_GOOS_GOARCH.h"
-#include "signals_GOOS.h"
-
-void
-runtime·dumpregs(Siginfo *info, void *ctxt)
-{
-	USED(info);
-	USED(ctxt);
-	
-	runtime·printf("rax     %X\n", SIG_RAX(info, ctxt));
-	runtime·printf("rbx     %X\n", SIG_RBX(info, ctxt));
-	runtime·printf("rcx     %X\n", SIG_RCX(info, ctxt));
-	runtime·printf("rdx     %X\n", SIG_RDX(info, ctxt));
-	runtime·printf("rdi     %X\n", SIG_RDI(info, ctxt));
-	runtime·printf("rsi     %X\n", SIG_RSI(info, ctxt));
-	runtime·printf("rbp     %X\n", SIG_RBP(info, ctxt));
-	runtime·printf("rsp     %X\n", SIG_RSP(info, ctxt));
-	runtime·printf("r8      %X\n", SIG_R8(info, ctxt) );
-	runtime·printf("r9      %X\n", SIG_R9(info, ctxt) );
-	runtime·printf("r10     %X\n", SIG_R10(info, ctxt));
-	runtime·printf("r11     %X\n", SIG_R11(info, ctxt));
-	runtime·printf("r12     %X\n", SIG_R12(info, ctxt));
-	runtime·printf("r13     %X\n", SIG_R13(info, ctxt));
-	runtime·printf("r14     %X\n", SIG_R14(info, ctxt));
-	runtime·printf("r15     %X\n", SIG_R15(info, ctxt));
-	runtime·printf("rip     %X\n", SIG_RIP(info, ctxt));
-	runtime·printf("rflags  %X\n", SIG_RFLAGS(info, ctxt));
-	runtime·printf("cs      %X\n", SIG_CS(info, ctxt));
-	runtime·printf("fs      %X\n", SIG_FS(info, ctxt));
-	runtime·printf("gs      %X\n", SIG_GS(info, ctxt));
-}
-
-void
-runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
-{
-	uintptr *sp;
-	SigTab *t;
-	bool crash;
-
-	if(sig == SIGPROF) {
-		runtime·sigprof((byte*)SIG_RIP(info, ctxt), (byte*)SIG_RSP(info, ctxt), nil, gp, g->m);
-		return;
-	}
-
-#ifdef GOOS_darwin
-	// x86-64 has 48-bit virtual addresses. The top 16 bits must echo bit 47.
-	// The hardware delivers a different kind of fault for a malformed address
-	// than it does for an attempt to access a valid but unmapped address.
-	// OS X 10.9.2 mishandles the malformed address case, making it look like
-	// a user-generated signal (like someone ran kill -SEGV ourpid).
-	// We pass user-generated signals to os/signal, or else ignore them.
-	// Doing that here - and returning to the faulting code - results in an
-	// infinite loop. It appears the best we can do is rewrite what the kernel
-	// delivers into something more like the truth. The address used below
-	// has very little chance of being the one that caused the fault, but it is
-	// malformed, it is clearly not a real pointer, and if it does get printed
-	// in real life, people will probably search for it and find this code.
-	// There are no Google hits for b01dfacedebac1e or 0xb01dfacedebac1e
-	// as I type this comment.
-	if(sig == SIGSEGV && SIG_CODE0(info, ctxt) == SI_USER) {
-		SIG_CODE0(info, ctxt) = SI_USER+1;
-		info->si_addr = (void*)(uintptr)0xb01dfacedebac1eULL;
-	}
-#endif
-
-	t = &runtime·sigtab[sig];
-	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
-		// Make it look like a call to the signal func.
-		// Have to pass arguments out of band since
-		// augmenting the stack frame would break
-		// the unwinding code.
-		gp->sig = sig;
-		gp->sigcode0 = SIG_CODE0(info, ctxt);
-		gp->sigcode1 = SIG_CODE1(info, ctxt);
-		gp->sigpc = SIG_RIP(info, ctxt);
-
-#ifdef GOOS_darwin
-		// Work around Leopard bug that doesn't set FPE_INTDIV.
-		// Look at instruction to see if it is a divide.
-		// Not necessary in Snow Leopard (si_code will be != 0).
-		if(sig == SIGFPE && gp->sigcode0 == 0) {
-			byte *pc;
-			pc = (byte*)gp->sigpc;
-			if((pc[0]&0xF0) == 0x40)	// 64-bit REX prefix
-				pc++;
-			else if(pc[0] == 0x66)	// 16-bit instruction prefix
-				pc++;
-			if(pc[0] == 0xF6 || pc[0] == 0xF7)
-				gp->sigcode0 = FPE_INTDIV;
-		}
-#endif
-
-		// Only push runtime·sigpanic if rip != 0.
-		// If rip == 0, probably panicked because of a
-		// call to a nil func.  Not pushing that onto sp will
-		// make the trace look like a call to runtime·sigpanic instead.
-		// (Otherwise the trace will end at runtime·sigpanic and we
-		// won't get to see who faulted.)
-		if(SIG_RIP(info, ctxt) != 0) {
-			sp = (uintptr*)SIG_RSP(info, ctxt);
-			if(sizeof(uintreg) > sizeof(uintptr))
-				*--sp = 0;
-			*--sp = SIG_RIP(info, ctxt);
-			SIG_RSP(info, ctxt) = (uintptr)sp;
-		}
-		SIG_RIP(info, ctxt) = (uintptr)runtime·sigpanic;
-		return;
-	}
-
-	if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
-		if(runtime·sigsend(sig))
-			return;
-	if(t->flags & SigKill)
-		runtime·exit(2);
-	if(!(t->flags & SigThrow))
-		return;
-
-	g->m->throwing = 1;
-	g->m->caughtsig = gp;
-	runtime·startpanic();
-
-	if(sig < 0 || sig >= NSIG)
-		runtime·printf("Signal %d\n", sig);
-	else
-		runtime·printf("%s\n", runtime·sigtab[sig].name);
-
-	runtime·printf("PC=%X\n", SIG_RIP(info, ctxt));
-	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
-		runtime·printf("signal arrived during cgo execution\n");
-		gp = g->m->lockedg;
-	}
-	runtime·printf("\n");
-
-	if(runtime·gotraceback(&crash)){
-		runtime·goroutineheader(gp);
-		runtime·tracebacktrap(SIG_RIP(info, ctxt), SIG_RSP(info, ctxt), 0, gp);
-		runtime·tracebackothers(gp);
-		runtime·printf("\n");
-		runtime·dumpregs(info, ctxt);
-	}
-	
-	if(crash)
-		runtime·crash();
-
-	runtime·exit(2);
-}
diff --git a/src/runtime/signal_amd64x.go b/src/runtime/signal_amd64x.go
new file mode 100644
index 0000000..de88d93
--- /dev/null
+++ b/src/runtime/signal_amd64x.go
@@ -0,0 +1,163 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd solaris
+
+package runtime
+
+import "unsafe"
+
+func dumpregs(c *sigctxt) {
+	print("rax    ", hex(c.rax()), "\n")
+	print("rbx    ", hex(c.rbx()), "\n")
+	print("rcx    ", hex(c.rcx()), "\n")
+	print("rdx    ", hex(c.rdx()), "\n")
+	print("rdi    ", hex(c.rdi()), "\n")
+	print("rsi    ", hex(c.rsi()), "\n")
+	print("rbp    ", hex(c.rbp()), "\n")
+	print("rsp    ", hex(c.rsp()), "\n")
+	print("r8     ", hex(c.r8()), "\n")
+	print("r9     ", hex(c.r9()), "\n")
+	print("r10    ", hex(c.r10()), "\n")
+	print("r11    ", hex(c.r11()), "\n")
+	print("r12    ", hex(c.r12()), "\n")
+	print("r13    ", hex(c.r13()), "\n")
+	print("r14    ", hex(c.r14()), "\n")
+	print("r15    ", hex(c.r15()), "\n")
+	print("rip    ", hex(c.rip()), "\n")
+	print("rflags ", hex(c.rflags()), "\n")
+	print("cs     ", hex(c.cs()), "\n")
+	print("fs     ", hex(c.fs()), "\n")
+	print("gs     ", hex(c.gs()), "\n")
+}
+
+func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
+	_g_ := getg()
+	c := &sigctxt{info, ctxt}
+
+	if sig == _SIGPROF {
+		sigprof((*byte)(unsafe.Pointer(uintptr(c.rip()))), (*byte)(unsafe.Pointer(uintptr(c.rsp()))), nil, gp, _g_.m)
+		return
+	}
+
+	if GOOS == "darwin" {
+		// x86-64 has 48-bit virtual addresses. The top 16 bits must echo bit 47.
+		// The hardware delivers a different kind of fault for a malformed address
+		// than it does for an attempt to access a valid but unmapped address.
+		// OS X 10.9.2 mishandles the malformed address case, making it look like
+		// a user-generated signal (like someone ran kill -SEGV ourpid).
+		// We pass user-generated signals to os/signal, or else ignore them.
+		// Doing that here - and returning to the faulting code - results in an
+		// infinite loop. It appears the best we can do is rewrite what the kernel
+		// delivers into something more like the truth. The address used below
+		// has very little chance of being the one that caused the fault, but it is
+		// malformed, it is clearly not a real pointer, and if it does get printed
+		// in real life, people will probably search for it and find this code.
+		// There are no Google hits for b01dfacedebac1e or 0xb01dfacedebac1e
+		// as I type this comment.
+		if sig == _SIGSEGV && c.sigcode() == _SI_USER {
+			c.set_sigcode(_SI_USER + 1)
+			c.set_sigaddr(0xb01dfacedebac1e)
+		}
+	}
+
+	flags := int32(_SigThrow)
+	if sig < uint32(len(sigtable)) {
+		flags = sigtable[sig].flags
+	}
+	if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp.sig = sig
+		gp.sigcode0 = uintptr(c.sigcode())
+		gp.sigcode1 = uintptr(c.sigaddr())
+		gp.sigpc = uintptr(c.rip())
+
+		if GOOS == "darwin" {
+			// Work around Leopard bug that doesn't set FPE_INTDIV.
+			// Look at instruction to see if it is a divide.
+			// Not necessary in Snow Leopard (si_code will be != 0).
+			if sig == _SIGFPE && gp.sigcode0 == 0 {
+				pc := (*[4]byte)(unsafe.Pointer(gp.sigpc))
+				i := 0
+				if pc[i]&0xF0 == 0x40 { // 64-bit REX prefix
+					i++
+				} else if pc[i] == 0x66 { // 16-bit instruction prefix
+					i++
+				}
+				if pc[i] == 0xF6 || pc[i] == 0xF7 {
+					gp.sigcode0 = _FPE_INTDIV
+				}
+			}
+		}
+
+		// Only push runtime.sigpanic if rip != 0.
+		// If rip == 0, probably panicked because of a
+		// call to a nil func.  Not pushing that onto sp will
+		// make the trace look like a call to runtime.sigpanic instead.
+		// (Otherwise the trace will end at runtime.sigpanic and we
+		// won't get to see who faulted.)
+		if c.rip() != 0 {
+			sp := c.rsp()
+			if regSize > ptrSize {
+				sp -= ptrSize
+				*(*uintptr)(unsafe.Pointer(uintptr(sp))) = 0
+			}
+			sp -= ptrSize
+			*(*uintptr)(unsafe.Pointer(uintptr(sp))) = uintptr(c.rip())
+			c.set_rsp(sp)
+		}
+		c.set_rip(uint64(funcPC(sigpanic)))
+		return
+	}
+
+	if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+		if sigsend(sig) {
+			return
+		}
+	}
+
+	if flags&_SigKill != 0 {
+		exit(2)
+	}
+
+	if flags&_SigThrow == 0 {
+		return
+	}
+
+	_g_.m.throwing = 1
+	_g_.m.caughtsig = gp
+	startpanic()
+
+	if sig < uint32(len(sigtable)) {
+		print(sigtable[sig].name, "\n")
+	} else {
+		print("Signal ", sig, "\n")
+	}
+
+	print("PC=", hex(c.rip()), "\n")
+	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+		print("signal arrived during cgo execution\n")
+		gp = _g_.m.lockedg
+	}
+	print("\n")
+
+	var docrash bool
+	if gotraceback(&docrash) > 0 {
+		goroutineheader(gp)
+		tracebacktrap(uintptr(c.rip()), uintptr(c.rsp()), 0, gp)
+		tracebackothers(gp)
+		print("\n")
+		dumpregs(c)
+	}
+
+	if docrash {
+		crash()
+	}
+
+	exit(2)
+}
diff --git a/src/runtime/signal_arm.c b/src/runtime/signal_arm.c
deleted file mode 100644
index afad5e7..0000000
--- a/src/runtime/signal_arm.c
+++ /dev/null
@@ -1,121 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux nacl netbsd openbsd
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_GOOS_GOARCH.h"
-#include "signals_GOOS.h"
-
-void
-runtime·dumpregs(Siginfo *info, void *ctxt)
-{
-	USED(info);
-	USED(ctxt);
-
-	runtime·printf("trap    %x\n", SIG_TRAP(info, ctxt));
-	runtime·printf("error   %x\n", SIG_ERROR(info, ctxt));
-	runtime·printf("oldmask %x\n", SIG_OLDMASK(info, ctxt));
-	runtime·printf("r0      %x\n", SIG_R0(info, ctxt));
-	runtime·printf("r1      %x\n", SIG_R1(info, ctxt));
-	runtime·printf("r2      %x\n", SIG_R2(info, ctxt));
-	runtime·printf("r3      %x\n", SIG_R3(info, ctxt));
-	runtime·printf("r4      %x\n", SIG_R4(info, ctxt));
-	runtime·printf("r5      %x\n", SIG_R5(info, ctxt));
-	runtime·printf("r6      %x\n", SIG_R6(info, ctxt));
-	runtime·printf("r7      %x\n", SIG_R7(info, ctxt));
-	runtime·printf("r8      %x\n", SIG_R8(info, ctxt));
-	runtime·printf("r9      %x\n", SIG_R9(info, ctxt));
-	runtime·printf("r10     %x\n", SIG_R10(info, ctxt));
-	runtime·printf("fp      %x\n", SIG_FP(info, ctxt));
-	runtime·printf("ip      %x\n", SIG_IP(info, ctxt));
-	runtime·printf("sp      %x\n", SIG_SP(info, ctxt));
-	runtime·printf("lr      %x\n", SIG_LR(info, ctxt));
-	runtime·printf("pc      %x\n", SIG_PC(info, ctxt));
-	runtime·printf("cpsr    %x\n", SIG_CPSR(info, ctxt));
-	runtime·printf("fault   %x\n", SIG_FAULT(info, ctxt));
-}
-
-void
-runtime·sighandler(int32 sig, Siginfo *info, void *ctxt, G *gp)
-{
-	SigTab *t;
-	bool crash;
-
-	if(sig == SIGPROF) {
-		runtime·sigprof((uint8*)SIG_PC(info, ctxt), (uint8*)SIG_SP(info, ctxt), (uint8*)SIG_LR(info, ctxt), gp, g->m);
-		return;
-	}
-
-	t = &runtime·sigtab[sig];
-	if(SIG_CODE0(info, ctxt) != SI_USER && (t->flags & SigPanic)) {
-		// Make it look like a call to the signal func.
-		// Have to pass arguments out of band since
-		// augmenting the stack frame would break
-		// the unwinding code.
-		gp->sig = sig;
-		gp->sigcode0 = SIG_CODE0(info, ctxt);
-		gp->sigcode1 = SIG_FAULT(info, ctxt);
-		gp->sigpc = SIG_PC(info, ctxt);
-
-		// We arrange lr, and pc to pretend the panicking
-		// function calls sigpanic directly.
-		// Always save LR to stack so that panics in leaf
-		// functions are correctly handled. This smashes
-		// the stack frame but we're not going back there
-		// anyway.
-		SIG_SP(info, ctxt) -= 4;
-		*(uint32*)SIG_SP(info, ctxt) = SIG_LR(info, ctxt);
-		// Don't bother saving PC if it's zero, which is
-		// probably a call to a nil func: the old link register
-		// is more useful in the stack trace.
-		if(gp->sigpc != 0)
-			SIG_LR(info, ctxt) = gp->sigpc;
-		// In case we are panicking from external C code
-		SIG_R10(info, ctxt) = (uintptr)gp;
-		SIG_PC(info, ctxt) = (uintptr)runtime·sigpanic;
-		return;
-	}
-
-	if(SIG_CODE0(info, ctxt) == SI_USER || (t->flags & SigNotify))
-		if(runtime·sigsend(sig))
-			return;
-	if(t->flags & SigKill)
-		runtime·exit(2);
-	if(!(t->flags & SigThrow))
-		return;
-
-	g->m->throwing = 1;
-	g->m->caughtsig = gp;
-	if(runtime·panicking)	// traceback already printed
-		runtime·exit(2);
-	runtime·panicking = 1;
-
-	if(sig < 0 || sig >= NSIG)
-		runtime·printf("Signal %d\n", sig);
-	else
-		runtime·printf("%s\n", runtime·sigtab[sig].name);
-
-	runtime·printf("PC=%x\n", SIG_PC(info, ctxt));
-	if(g->m->lockedg != nil && g->m->ncgo > 0 && gp == g->m->g0) {
-		runtime·printf("signal arrived during cgo execution\n");
-		gp = g->m->lockedg;
-	}
-	runtime·printf("\n");
-
-	if(runtime·gotraceback(&crash)){
-		runtime·goroutineheader(gp);
-		runtime·tracebacktrap(SIG_PC(info, ctxt), SIG_SP(info, ctxt), SIG_LR(info, ctxt), gp);
-		runtime·tracebackothers(gp);
-		runtime·printf("\n");
-		runtime·dumpregs(info, ctxt);
-	}
-	
-	if(crash)
-		runtime·crash();
-
-	runtime·exit(2);
-}
diff --git a/src/runtime/signal_arm.go b/src/runtime/signal_arm.go
new file mode 100644
index 0000000..d224ce6
--- /dev/null
+++ b/src/runtime/signal_arm.go
@@ -0,0 +1,126 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build darwin dragonfly freebsd linux nacl netbsd openbsd
+
+package runtime
+
+import "unsafe"
+
+func dumpregs(c *sigctxt) {
+	print("trap    ", hex(c.trap()), "\n")
+	print("error   ", hex(c.error()), "\n")
+	print("oldmask ", hex(c.oldmask()), "\n")
+	print("r0      ", hex(c.r0()), "\n")
+	print("r1      ", hex(c.r1()), "\n")
+	print("r2      ", hex(c.r2()), "\n")
+	print("r3      ", hex(c.r3()), "\n")
+	print("r4      ", hex(c.r4()), "\n")
+	print("r5      ", hex(c.r5()), "\n")
+	print("r6      ", hex(c.r6()), "\n")
+	print("r7      ", hex(c.r7()), "\n")
+	print("r8      ", hex(c.r8()), "\n")
+	print("r9      ", hex(c.r9()), "\n")
+	print("r10     ", hex(c.r10()), "\n")
+	print("fp      ", hex(c.fp()), "\n")
+	print("ip      ", hex(c.ip()), "\n")
+	print("sp      ", hex(c.sp()), "\n")
+	print("lr      ", hex(c.lr()), "\n")
+	print("pc      ", hex(c.pc()), "\n")
+	print("cpsr    ", hex(c.cpsr()), "\n")
+	print("fault   ", hex(c.fault()), "\n")
+}
+
+func sighandler(sig uint32, info *siginfo, ctxt unsafe.Pointer, gp *g) {
+	_g_ := getg()
+	c := &sigctxt{info, ctxt}
+
+	if sig == _SIGPROF {
+		sigprof((*byte)(unsafe.Pointer(uintptr(c.pc()))), (*byte)(unsafe.Pointer(uintptr(c.sp()))), (*byte)(unsafe.Pointer(uintptr(c.lr()))), gp, _g_.m)
+		return
+	}
+
+	flags := int32(_SigThrow)
+	if sig < uint32(len(sigtable)) {
+		flags = sigtable[sig].flags
+	}
+	if c.sigcode() != _SI_USER && flags&_SigPanic != 0 {
+		// Make it look like a call to the signal func.
+		// Have to pass arguments out of band since
+		// augmenting the stack frame would break
+		// the unwinding code.
+		gp.sig = sig
+		gp.sigcode0 = uintptr(c.sigcode())
+		gp.sigcode1 = uintptr(c.fault())
+		gp.sigpc = uintptr(c.pc())
+
+		// We arrange lr, and pc to pretend the panicking
+		// function calls sigpanic directly.
+		// Always save LR to stack so that panics in leaf
+		// functions are correctly handled. This smashes
+		// the stack frame but we're not going back there
+		// anyway.
+		sp := c.sp() - 4
+		c.set_sp(sp)
+		*(*uint32)(unsafe.Pointer(uintptr(sp))) = c.lr()
+
+		// Don't bother saving PC if it's zero, which is
+		// probably a call to a nil func: the old link register
+		// is more useful in the stack trace.
+		if gp.sigpc != 0 {
+			c.set_lr(uint32(gp.sigpc))
+		}
+
+		// In case we are panicking from external C code
+		c.set_r10(uint32(uintptr(unsafe.Pointer(gp))))
+		c.set_pc(uint32(funcPC(sigpanic)))
+		return
+	}
+
+	if c.sigcode() == _SI_USER || flags&_SigNotify != 0 {
+		if sigsend(sig) {
+			return
+		}
+	}
+
+	if flags&_SigKill != 0 {
+		exit(2)
+	}
+
+	if flags&_SigThrow == 0 {
+		return
+	}
+
+	_g_.m.throwing = 1
+	_g_.m.caughtsig = gp
+	startpanic()
+
+	if sig < uint32(len(sigtable)) {
+		print(sigtable[sig].name, "\n")
+	} else {
+		print("Signal ", sig, "\n")
+	}
+
+	print("PC=", hex(c.pc()), "\n")
+	if _g_.m.lockedg != nil && _g_.m.ncgo > 0 && gp == _g_.m.g0 {
+		print("signal arrived during cgo execution\n")
+		gp = _g_.m.lockedg
+	}
+	print("\n")
+
+	var docrash bool
+	if gotraceback(&docrash) > 0 {
+		goroutineheader(gp)
+		tracebacktrap(uintptr(c.pc()), uintptr(c.sp()), uintptr(c.lr()), gp)
+		tracebackothers(gp)
+		print("\n")
+		dumpregs(c)
+	}
+
+	if docrash {
+		crash()
+	}
+
+	exit(2)
+}
diff --git a/src/runtime/signal_darwin.go b/src/runtime/signal_darwin.go
new file mode 100644
index 0000000..122648b
--- /dev/null
+++ b/src/runtime/signal_darwin.go
@@ -0,0 +1,45 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
+
+var sigtable = [...]sigTabT{
+	/* 0 */ {0, "SIGNONE: no trap"},
+	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
+	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
+	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
+	/* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
+	/* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
+	/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
+	/* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/* 9 */ {0, "SIGKILL: kill"},
+	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
+	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
+	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
+	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
+	/* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+	/* 17 */ {0, "SIGSTOP: stop"},
+	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
+	/* 19 */ {0, "SIGCONT: continue after stop"},
+	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
+	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
+	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
+	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
+	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
+	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
+	/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
+}
diff --git a/src/runtime/signal_darwin_386.go b/src/runtime/signal_darwin_386.go
new file mode 100644
index 0000000..ccf30ef
--- /dev/null
+++ b/src/runtime/signal_darwin_386.go
@@ -0,0 +1,34 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *regs32   { return &(*ucontext)(c.ctxt).uc_mcontext.ss }
+func (c *sigctxt) eax() uint32     { return c.regs().eax }
+func (c *sigctxt) ebx() uint32     { return c.regs().ebx }
+func (c *sigctxt) ecx() uint32     { return c.regs().ecx }
+func (c *sigctxt) edx() uint32     { return c.regs().edx }
+func (c *sigctxt) edi() uint32     { return c.regs().edi }
+func (c *sigctxt) esi() uint32     { return c.regs().esi }
+func (c *sigctxt) ebp() uint32     { return c.regs().ebp }
+func (c *sigctxt) esp() uint32     { return c.regs().esp }
+func (c *sigctxt) eip() uint32     { return c.regs().eip }
+func (c *sigctxt) eflags() uint32  { return c.regs().eflags }
+func (c *sigctxt) cs() uint32      { return c.regs().cs }
+func (c *sigctxt) fs() uint32      { return c.regs().fs }
+func (c *sigctxt) gs() uint32      { return c.regs().gs }
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint32 { return uint32(uintptr(unsafe.Pointer(c.info.si_addr))) }
+
+func (c *sigctxt) set_eip(x uint32)     { c.regs().eip = x }
+func (c *sigctxt) set_esp(x uint32)     { c.regs().esp = x }
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) { c.info.si_addr = (*byte)(unsafe.Pointer(uintptr(x))) }
diff --git a/src/runtime/signal_darwin_386.h b/src/runtime/signal_darwin_386.h
deleted file mode 100644
index 5459e10..0000000
--- a/src/runtime/signal_darwin_386.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext->ss)
-
-#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).eax)
-#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).ebx)
-#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).ecx)
-#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).edx)
-#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).edi)
-#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).esi)
-#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).ebp)
-#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).esp)
-#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).eip)
-#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).eflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_darwin_amd64.go b/src/runtime/signal_darwin_amd64.go
new file mode 100644
index 0000000..409bc6d
--- /dev/null
+++ b/src/runtime/signal_darwin_amd64.go
@@ -0,0 +1,42 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *regs64   { return &(*ucontext)(c.ctxt).uc_mcontext.ss }
+func (c *sigctxt) rax() uint64     { return c.regs().rax }
+func (c *sigctxt) rbx() uint64     { return c.regs().rbx }
+func (c *sigctxt) rcx() uint64     { return c.regs().rcx }
+func (c *sigctxt) rdx() uint64     { return c.regs().rdx }
+func (c *sigctxt) rdi() uint64     { return c.regs().rdi }
+func (c *sigctxt) rsi() uint64     { return c.regs().rsi }
+func (c *sigctxt) rbp() uint64     { return c.regs().rbp }
+func (c *sigctxt) rsp() uint64     { return c.regs().rsp }
+func (c *sigctxt) r8() uint64      { return c.regs().r8 }
+func (c *sigctxt) r9() uint64      { return c.regs().r9 }
+func (c *sigctxt) r10() uint64     { return c.regs().r10 }
+func (c *sigctxt) r11() uint64     { return c.regs().r11 }
+func (c *sigctxt) r12() uint64     { return c.regs().r12 }
+func (c *sigctxt) r13() uint64     { return c.regs().r13 }
+func (c *sigctxt) r14() uint64     { return c.regs().r14 }
+func (c *sigctxt) r15() uint64     { return c.regs().r15 }
+func (c *sigctxt) rip() uint64     { return c.regs().rip }
+func (c *sigctxt) rflags() uint64  { return c.regs().rflags }
+func (c *sigctxt) cs() uint64      { return c.regs().cs }
+func (c *sigctxt) fs() uint64      { return c.regs().fs }
+func (c *sigctxt) gs() uint64      { return c.regs().gs }
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return uint64(uintptr(unsafe.Pointer(c.info.si_addr))) }
+
+func (c *sigctxt) set_rip(x uint64)     { c.regs().rip = x }
+func (c *sigctxt) set_rsp(x uint64)     { c.regs().rsp = x }
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) { c.info.si_addr = (*byte)(unsafe.Pointer(uintptr(x))) }
diff --git a/src/runtime/signal_darwin_amd64.h b/src/runtime/signal_darwin_amd64.h
deleted file mode 100644
index e3da6de..0000000
--- a/src/runtime/signal_darwin_amd64.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext->ss)
-
-#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).rax)
-#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).rbx)
-#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).rcx)
-#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).rdx)
-#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).rdi)
-#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).rsi)
-#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).rbp)
-#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).rsp)
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).r8)
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).r9)
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).r10)
-#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).r11)
-#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).r12)
-#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).r13)
-#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).r14)
-#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).r15)
-#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).rip)
-#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).rflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_dragonfly.go b/src/runtime/signal_dragonfly.go
new file mode 100644
index 0000000..d37e11a
--- /dev/null
+++ b/src/runtime/signal_dragonfly.go
@@ -0,0 +1,46 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
+
+var sigtable = [...]sigTabT{
+	/* 0 */ {0, "SIGNONE: no trap"},
+	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
+	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
+	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
+	/* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
+	/* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
+	/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
+	/* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/* 9 */ {0, "SIGKILL: kill"},
+	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
+	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
+	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
+	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
+	/* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+	/* 17 */ {0, "SIGSTOP: stop"},
+	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
+	/* 19 */ {0, "SIGCONT: continue after stop"},
+	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
+	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
+	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
+	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
+	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
+	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
+	/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
+	/* 32 */ {_SigNotify, "SIGTHR: reserved"},
+}
diff --git a/src/runtime/signal_dragonfly_amd64.go b/src/runtime/signal_dragonfly_amd64.go
new file mode 100644
index 0000000..740959c
--- /dev/null
+++ b/src/runtime/signal_dragonfly_amd64.go
@@ -0,0 +1,44 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *mcontext {
+	return (*mcontext)(unsafe.Pointer(&(*ucontext)(c.ctxt).uc_mcontext))
+}
+func (c *sigctxt) rax() uint64     { return c.regs().mc_rax }
+func (c *sigctxt) rbx() uint64     { return c.regs().mc_rbx }
+func (c *sigctxt) rcx() uint64     { return c.regs().mc_rcx }
+func (c *sigctxt) rdx() uint64     { return c.regs().mc_rdx }
+func (c *sigctxt) rdi() uint64     { return c.regs().mc_rdi }
+func (c *sigctxt) rsi() uint64     { return c.regs().mc_rsi }
+func (c *sigctxt) rbp() uint64     { return c.regs().mc_rbp }
+func (c *sigctxt) rsp() uint64     { return c.regs().mc_rsp }
+func (c *sigctxt) r8() uint64      { return c.regs().mc_r8 }
+func (c *sigctxt) r9() uint64      { return c.regs().mc_r9 }
+func (c *sigctxt) r10() uint64     { return c.regs().mc_r10 }
+func (c *sigctxt) r11() uint64     { return c.regs().mc_r11 }
+func (c *sigctxt) r12() uint64     { return c.regs().mc_r12 }
+func (c *sigctxt) r13() uint64     { return c.regs().mc_r13 }
+func (c *sigctxt) r14() uint64     { return c.regs().mc_r14 }
+func (c *sigctxt) r15() uint64     { return c.regs().mc_r15 }
+func (c *sigctxt) rip() uint64     { return c.regs().mc_rip }
+func (c *sigctxt) rflags() uint64  { return c.regs().mc_rflags }
+func (c *sigctxt) cs() uint64      { return uint64(c.regs().mc_cs) }
+func (c *sigctxt) fs() uint64      { return uint64(c.regs().mc_ss) }
+func (c *sigctxt) gs() uint64      { return uint64(c.regs().mc_ss) }
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return uint64(c.info.si_addr) }
+
+func (c *sigctxt) set_rip(x uint64)     { c.regs().mc_rip = x }
+func (c *sigctxt) set_rsp(x uint64)     { c.regs().mc_rsp = x }
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) { c.info.si_addr = x }
diff --git a/src/runtime/signal_dragonfly_amd64.h b/src/runtime/signal_dragonfly_amd64.h
deleted file mode 100644
index 5b4f977..0000000
--- a/src/runtime/signal_dragonfly_amd64.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
-
-#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).mc_rax)
-#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).mc_rbx)
-#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).mc_rcx)
-#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).mc_rdx)
-#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).mc_rdi)
-#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).mc_rsi)
-#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).mc_rbp)
-#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).mc_rsp)
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).mc_r8)
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).mc_r9)
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).mc_r10)
-#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).mc_r11)
-#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).mc_r12)
-#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).mc_r13)
-#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).mc_r14)
-#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).mc_r15)
-#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).mc_rip)
-#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_rflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_ss)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_ss)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_freebsd.go b/src/runtime/signal_freebsd.go
new file mode 100644
index 0000000..1dbdb1b
--- /dev/null
+++ b/src/runtime/signal_freebsd.go
@@ -0,0 +1,46 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
+
+var sigtable = [...]sigTabT{
+	/* 0 */ {0, "SIGNONE: no trap"},
+	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
+	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
+	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
+	/* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
+	/* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
+	/* 7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
+	/* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/* 9 */ {0, "SIGKILL: kill"},
+	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 12 */ {_SigNotify, "SIGSYS: bad system call"},
+	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
+	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
+	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
+	/* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+	/* 17 */ {0, "SIGSTOP: stop"},
+	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
+	/* 19 */ {0, "SIGCONT: continue after stop"},
+	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
+	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
+	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
+	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
+	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
+	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
+	/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
+	/* 32 */ {_SigNotify, "SIGTHR: reserved"},
+}
diff --git a/src/runtime/signal_freebsd_386.go b/src/runtime/signal_freebsd_386.go
new file mode 100644
index 0000000..a0fec13
--- /dev/null
+++ b/src/runtime/signal_freebsd_386.go
@@ -0,0 +1,34 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *mcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+func (c *sigctxt) eax() uint32     { return c.regs().mc_eax }
+func (c *sigctxt) ebx() uint32     { return c.regs().mc_ebx }
+func (c *sigctxt) ecx() uint32     { return c.regs().mc_ecx }
+func (c *sigctxt) edx() uint32     { return c.regs().mc_edx }
+func (c *sigctxt) edi() uint32     { return c.regs().mc_edi }
+func (c *sigctxt) esi() uint32     { return c.regs().mc_esi }
+func (c *sigctxt) ebp() uint32     { return c.regs().mc_ebp }
+func (c *sigctxt) esp() uint32     { return c.regs().mc_esp }
+func (c *sigctxt) eip() uint32     { return c.regs().mc_eip }
+func (c *sigctxt) eflags() uint32  { return c.regs().mc_eflags }
+func (c *sigctxt) cs() uint32      { return uint32(c.regs().mc_cs) }
+func (c *sigctxt) fs() uint32      { return uint32(c.regs().mc_fs) }
+func (c *sigctxt) gs() uint32      { return uint32(c.regs().mc_gs) }
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint32 { return uint32(c.info.si_addr) }
+
+func (c *sigctxt) set_eip(x uint32)     { c.regs().mc_eip = x }
+func (c *sigctxt) set_esp(x uint32)     { c.regs().mc_esp = x }
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) { c.info.si_addr = uintptr(x) }
diff --git a/src/runtime/signal_freebsd_386.h b/src/runtime/signal_freebsd_386.h
deleted file mode 100644
index a24f1ee..0000000
--- a/src/runtime/signal_freebsd_386.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
-
-#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).mc_eax)
-#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).mc_ebx)
-#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).mc_ecx)
-#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).mc_edx)
-#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).mc_edi)
-#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).mc_esi)
-#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).mc_ebp)
-#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).mc_esp)
-#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).mc_eip)
-#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_eflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_freebsd_amd64.go b/src/runtime/signal_freebsd_amd64.go
new file mode 100644
index 0000000..d10c883
--- /dev/null
+++ b/src/runtime/signal_freebsd_amd64.go
@@ -0,0 +1,44 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *mcontext {
+	return (*mcontext)(unsafe.Pointer(&(*ucontext)(c.ctxt).uc_mcontext))
+}
+func (c *sigctxt) rax() uint64     { return c.regs().mc_rax }
+func (c *sigctxt) rbx() uint64     { return c.regs().mc_rbx }
+func (c *sigctxt) rcx() uint64     { return c.regs().mc_rcx }
+func (c *sigctxt) rdx() uint64     { return c.regs().mc_rdx }
+func (c *sigctxt) rdi() uint64     { return c.regs().mc_rdi }
+func (c *sigctxt) rsi() uint64     { return c.regs().mc_rsi }
+func (c *sigctxt) rbp() uint64     { return c.regs().mc_rbp }
+func (c *sigctxt) rsp() uint64     { return c.regs().mc_rsp }
+func (c *sigctxt) r8() uint64      { return c.regs().mc_r8 }
+func (c *sigctxt) r9() uint64      { return c.regs().mc_r9 }
+func (c *sigctxt) r10() uint64     { return c.regs().mc_r10 }
+func (c *sigctxt) r11() uint64     { return c.regs().mc_r11 }
+func (c *sigctxt) r12() uint64     { return c.regs().mc_r12 }
+func (c *sigctxt) r13() uint64     { return c.regs().mc_r13 }
+func (c *sigctxt) r14() uint64     { return c.regs().mc_r14 }
+func (c *sigctxt) r15() uint64     { return c.regs().mc_r15 }
+func (c *sigctxt) rip() uint64     { return c.regs().mc_rip }
+func (c *sigctxt) rflags() uint64  { return c.regs().mc_rflags }
+func (c *sigctxt) cs() uint64      { return uint64(c.regs().mc_cs) }
+func (c *sigctxt) fs() uint64      { return uint64(c.regs().mc_fs) }
+func (c *sigctxt) gs() uint64      { return uint64(c.regs().mc_gs) }
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return uint64(c.info.si_addr) }
+
+func (c *sigctxt) set_rip(x uint64)     { c.regs().mc_rip = x }
+func (c *sigctxt) set_rsp(x uint64)     { c.regs().mc_rsp = x }
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) { c.info.si_addr = x }
diff --git a/src/runtime/signal_freebsd_amd64.h b/src/runtime/signal_freebsd_amd64.h
deleted file mode 100644
index 7d35b7f..0000000
--- a/src/runtime/signal_freebsd_amd64.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
-
-#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).mc_rax)
-#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).mc_rbx)
-#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).mc_rcx)
-#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).mc_rdx)
-#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).mc_rdi)
-#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).mc_rsi)
-#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).mc_rbp)
-#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).mc_rsp)
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).mc_r8)
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).mc_r9)
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).mc_r10)
-#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).mc_r11)
-#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).mc_r12)
-#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).mc_r13)
-#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).mc_r14)
-#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).mc_r15)
-#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).mc_rip)
-#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).mc_rflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).mc_cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).mc_fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).mc_gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) ((uintptr)(info)->si_addr)
diff --git a/src/runtime/signal_freebsd_arm.go b/src/runtime/signal_freebsd_arm.go
new file mode 100644
index 0000000..12de23d
--- /dev/null
+++ b/src/runtime/signal_freebsd_arm.go
@@ -0,0 +1,48 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *mcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+func (c *sigctxt) r0() uint32      { return c.regs().__gregs[0] }
+func (c *sigctxt) r1() uint32      { return c.regs().__gregs[1] }
+func (c *sigctxt) r2() uint32      { return c.regs().__gregs[2] }
+func (c *sigctxt) r3() uint32      { return c.regs().__gregs[3] }
+func (c *sigctxt) r4() uint32      { return c.regs().__gregs[4] }
+func (c *sigctxt) r5() uint32      { return c.regs().__gregs[5] }
+func (c *sigctxt) r6() uint32      { return c.regs().__gregs[6] }
+func (c *sigctxt) r7() uint32      { return c.regs().__gregs[7] }
+func (c *sigctxt) r8() uint32      { return c.regs().__gregs[8] }
+func (c *sigctxt) r9() uint32      { return c.regs().__gregs[9] }
+func (c *sigctxt) r10() uint32     { return c.regs().__gregs[10] }
+func (c *sigctxt) fp() uint32      { return c.regs().__gregs[11] }
+func (c *sigctxt) ip() uint32      { return c.regs().__gregs[12] }
+func (c *sigctxt) sp() uint32      { return c.regs().__gregs[13] }
+func (c *sigctxt) lr() uint32      { return c.regs().__gregs[14] }
+func (c *sigctxt) pc() uint32      { return c.regs().__gregs[15] }
+func (c *sigctxt) cpsr() uint32    { return c.regs().__gregs[16] }
+func (c *sigctxt) fault() uint32   { return uint32(c.info.si_addr) }
+func (c *sigctxt) trap() uint32    { return 0 }
+func (c *sigctxt) error() uint32   { return 0 }
+func (c *sigctxt) oldmask() uint32 { return 0 }
+
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint32 { return uint32(c.info.si_addr) }
+
+func (c *sigctxt) set_pc(x uint32)  { c.regs().__gregs[15] = x }
+func (c *sigctxt) set_sp(x uint32)  { c.regs().__gregs[13] = x }
+func (c *sigctxt) set_lr(x uint32)  { c.regs().__gregs[14] = x }
+func (c *sigctxt) set_r10(x uint32) { c.regs().__gregs[10] = x }
+
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) {
+	c.info.si_addr = uintptr(x)
+}
diff --git a/src/runtime/signal_freebsd_arm.h b/src/runtime/signal_freebsd_arm.h
deleted file mode 100644
index 87a45aa..0000000
--- a/src/runtime/signal_freebsd_arm.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
-
-#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).__gregs[0])
-#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).__gregs[1])
-#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).__gregs[2])
-#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).__gregs[3])
-#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).__gregs[4])
-#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).__gregs[5])
-#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).__gregs[6])
-#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).__gregs[7])
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).__gregs[8])
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).__gregs[9])
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).__gregs[10])
-#define SIG_FP(info, ctxt) (SIG_REGS(ctxt).__gregs[11])
-#define SIG_IP(info, ctxt) (SIG_REGS(ctxt).__gregs[12])
-#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).__gregs[13])
-#define SIG_LR(info, ctxt) (SIG_REGS(ctxt).__gregs[14])
-#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).__gregs[15])
-#define SIG_CPSR(info, ctxt) (SIG_REGS(ctxt).__gregs[16])
-#define SIG_FAULT(info, ctxt) ((uintptr)(info)->si_addr)
-#define SIG_TRAP(info, ctxt) (0)
-#define SIG_ERROR(info, ctxt) (0)
-#define SIG_OLDMASK(info, ctxt) (0)
-#define SIG_CODE0(info, ctxt) ((uintptr)(info)->si_code)
diff --git a/src/runtime/signal_linux.go b/src/runtime/signal_linux.go
new file mode 100644
index 0000000..1c3d687
--- /dev/null
+++ b/src/runtime/signal_linux.go
@@ -0,0 +1,78 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
+
+var sigtable = [...]sigTabT{
+	/* 0 */ {0, "SIGNONE: no trap"},
+	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
+	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
+	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
+	/* 4 */ {_SigThrow, "SIGILL: illegal instruction"},
+	/* 5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
+	/* 7 */ {_SigPanic, "SIGBUS: bus error"},
+	/* 8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/* 9 */ {0, "SIGKILL: kill"},
+	/* 10 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
+	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 12 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
+	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
+	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
+	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
+	/* 16 */ {_SigThrow, "SIGSTKFLT: stack fault"},
+	/* 17 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 18 */ {0, "SIGCONT: continue"},
+	/* 19 */ {0, "SIGSTOP: stop, unblockable"},
+	/* 20 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
+	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
+	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+	/* 23 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
+	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
+	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
+	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
+	/* 29 */ {_SigNotify, "SIGIO: i/o now possible"},
+	/* 30 */ {_SigNotify, "SIGPWR: power failure restart"},
+	/* 31 */ {_SigNotify, "SIGSYS: bad system call"},
+	/* 32 */ {0, "signal 32"}, /* SIGCANCEL; see issue 6997 */
+	/* 33 */ {0, "signal 33"}, /* SIGSETXID; see issue 3871 */
+	/* 34 */ {_SigNotify, "signal 34"},
+	/* 35 */ {_SigNotify, "signal 35"},
+	/* 36 */ {_SigNotify, "signal 36"},
+	/* 37 */ {_SigNotify, "signal 37"},
+	/* 38 */ {_SigNotify, "signal 38"},
+	/* 39 */ {_SigNotify, "signal 39"},
+	/* 40 */ {_SigNotify, "signal 40"},
+	/* 41 */ {_SigNotify, "signal 41"},
+	/* 42 */ {_SigNotify, "signal 42"},
+	/* 43 */ {_SigNotify, "signal 43"},
+	/* 44 */ {_SigNotify, "signal 44"},
+	/* 45 */ {_SigNotify, "signal 45"},
+	/* 46 */ {_SigNotify, "signal 46"},
+	/* 47 */ {_SigNotify, "signal 47"},
+	/* 48 */ {_SigNotify, "signal 48"},
+	/* 49 */ {_SigNotify, "signal 49"},
+	/* 50 */ {_SigNotify, "signal 50"},
+	/* 51 */ {_SigNotify, "signal 51"},
+	/* 52 */ {_SigNotify, "signal 52"},
+	/* 53 */ {_SigNotify, "signal 53"},
+	/* 54 */ {_SigNotify, "signal 54"},
+	/* 55 */ {_SigNotify, "signal 55"},
+	/* 56 */ {_SigNotify, "signal 56"},
+	/* 57 */ {_SigNotify, "signal 57"},
+	/* 58 */ {_SigNotify, "signal 58"},
+	/* 59 */ {_SigNotify, "signal 59"},
+	/* 60 */ {_SigNotify, "signal 60"},
+	/* 61 */ {_SigNotify, "signal 61"},
+	/* 62 */ {_SigNotify, "signal 62"},
+	/* 63 */ {_SigNotify, "signal 63"},
+	/* 64 */ {_SigNotify, "signal 64"},
+}
diff --git a/src/runtime/signal_linux_386.go b/src/runtime/signal_linux_386.go
new file mode 100644
index 0000000..41eae80
--- /dev/null
+++ b/src/runtime/signal_linux_386.go
@@ -0,0 +1,36 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *sigcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+func (c *sigctxt) eax() uint32       { return c.regs().eax }
+func (c *sigctxt) ebx() uint32       { return c.regs().ebx }
+func (c *sigctxt) ecx() uint32       { return c.regs().ecx }
+func (c *sigctxt) edx() uint32       { return c.regs().edx }
+func (c *sigctxt) edi() uint32       { return c.regs().edi }
+func (c *sigctxt) esi() uint32       { return c.regs().esi }
+func (c *sigctxt) ebp() uint32       { return c.regs().ebp }
+func (c *sigctxt) esp() uint32       { return c.regs().esp }
+func (c *sigctxt) eip() uint32       { return c.regs().eip }
+func (c *sigctxt) eflags() uint32    { return c.regs().eflags }
+func (c *sigctxt) cs() uint32        { return uint32(c.regs().cs) }
+func (c *sigctxt) fs() uint32        { return uint32(c.regs().fs) }
+func (c *sigctxt) gs() uint32        { return uint32(c.regs().gs) }
+func (c *sigctxt) sigcode() uint32   { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint32   { return uint32(*(*uintptr)(add(unsafe.Pointer(c.info), 2*ptrSize))) }
+
+func (c *sigctxt) set_eip(x uint32)     { c.regs().eip = x }
+func (c *sigctxt) set_esp(x uint32)     { c.regs().esp = x }
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) {
+	*(*uintptr)(add(unsafe.Pointer(c.info), 2*ptrSize)) = uintptr(x)
+}
diff --git a/src/runtime/signal_linux_386.h b/src/runtime/signal_linux_386.h
deleted file mode 100644
index f77f1c9..0000000
--- a/src/runtime/signal_linux_386.h
+++ /dev/null
@@ -1,24 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext))
-
-#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).eax)
-#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).ebx)
-#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).ecx)
-#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).edx)
-#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).edi)
-#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).esi)
-#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).ebp)
-#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).esp)
-#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).eip)
-#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).eflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) (((uintptr*)(info))[2])
-
diff --git a/src/runtime/signal_linux_amd64.go b/src/runtime/signal_linux_amd64.go
new file mode 100644
index 0000000..d94b191
--- /dev/null
+++ b/src/runtime/signal_linux_amd64.go
@@ -0,0 +1,46 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *sigcontext {
+	return (*sigcontext)(unsafe.Pointer(&(*ucontext)(c.ctxt).uc_mcontext))
+}
+func (c *sigctxt) rax() uint64     { return c.regs().rax }
+func (c *sigctxt) rbx() uint64     { return c.regs().rbx }
+func (c *sigctxt) rcx() uint64     { return c.regs().rcx }
+func (c *sigctxt) rdx() uint64     { return c.regs().rdx }
+func (c *sigctxt) rdi() uint64     { return c.regs().rdi }
+func (c *sigctxt) rsi() uint64     { return c.regs().rsi }
+func (c *sigctxt) rbp() uint64     { return c.regs().rbp }
+func (c *sigctxt) rsp() uint64     { return c.regs().rsp }
+func (c *sigctxt) r8() uint64      { return c.regs().r8 }
+func (c *sigctxt) r9() uint64      { return c.regs().r9 }
+func (c *sigctxt) r10() uint64     { return c.regs().r10 }
+func (c *sigctxt) r11() uint64     { return c.regs().r11 }
+func (c *sigctxt) r12() uint64     { return c.regs().r12 }
+func (c *sigctxt) r13() uint64     { return c.regs().r13 }
+func (c *sigctxt) r14() uint64     { return c.regs().r14 }
+func (c *sigctxt) r15() uint64     { return c.regs().r15 }
+func (c *sigctxt) rip() uint64     { return c.regs().rip }
+func (c *sigctxt) rflags() uint64  { return c.regs().eflags }
+func (c *sigctxt) cs() uint64      { return uint64(c.regs().cs) }
+func (c *sigctxt) fs() uint64      { return uint64(c.regs().fs) }
+func (c *sigctxt) gs() uint64      { return uint64(c.regs().gs) }
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return uint64(*(*uintptr)(add(unsafe.Pointer(c.info), 2*ptrSize))) }
+
+func (c *sigctxt) set_rip(x uint64)     { c.regs().rip = x }
+func (c *sigctxt) set_rsp(x uint64)     { c.regs().rsp = x }
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) {
+	*(*uintptr)(add(unsafe.Pointer(c.info), 2*ptrSize)) = uintptr(x)
+}
diff --git a/src/runtime/signal_linux_amd64.h b/src/runtime/signal_linux_amd64.h
deleted file mode 100644
index 5a9a3e5..0000000
--- a/src/runtime/signal_linux_amd64.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext))
-
-#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).rax)
-#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).rbx)
-#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).rcx)
-#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).rdx)
-#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).rdi)
-#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).rsi)
-#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).rbp)
-#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).rsp)
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).r8)
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).r9)
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).r10)
-#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).r11)
-#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).r12)
-#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).r13)
-#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).r14)
-#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).r15)
-#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).rip)
-#define SIG_RFLAGS(info, ctxt) ((uint64)SIG_REGS(ctxt).eflags)
-
-#define SIG_CS(info, ctxt) ((uint64)SIG_REGS(ctxt).cs)
-#define SIG_FS(info, ctxt) ((uint64)SIG_REGS(ctxt).fs)
-#define SIG_GS(info, ctxt) ((uint64)SIG_REGS(ctxt).gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) (((uintptr*)(info))[2])
-
diff --git a/src/runtime/signal_linux_arm.go b/src/runtime/signal_linux_arm.go
new file mode 100644
index 0000000..4a5670e
--- /dev/null
+++ b/src/runtime/signal_linux_arm.go
@@ -0,0 +1,48 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *sigcontext { return &(*ucontext)(c.ctxt).uc_mcontext }
+func (c *sigctxt) r0() uint32        { return c.regs().r0 }
+func (c *sigctxt) r1() uint32        { return c.regs().r1 }
+func (c *sigctxt) r2() uint32        { return c.regs().r2 }
+func (c *sigctxt) r3() uint32        { return c.regs().r3 }
+func (c *sigctxt) r4() uint32        { return c.regs().r4 }
+func (c *sigctxt) r5() uint32        { return c.regs().r5 }
+func (c *sigctxt) r6() uint32        { return c.regs().r6 }
+func (c *sigctxt) r7() uint32        { return c.regs().r7 }
+func (c *sigctxt) r8() uint32        { return c.regs().r8 }
+func (c *sigctxt) r9() uint32        { return c.regs().r9 }
+func (c *sigctxt) r10() uint32       { return c.regs().r10 }
+func (c *sigctxt) fp() uint32        { return c.regs().fp }
+func (c *sigctxt) ip() uint32        { return c.regs().ip }
+func (c *sigctxt) sp() uint32        { return c.regs().sp }
+func (c *sigctxt) lr() uint32        { return c.regs().lr }
+func (c *sigctxt) pc() uint32        { return c.regs().pc }
+func (c *sigctxt) cpsr() uint32      { return c.regs().cpsr }
+func (c *sigctxt) fault() uint32     { return c.regs().fault_address }
+func (c *sigctxt) trap() uint32      { return c.regs().trap_no }
+func (c *sigctxt) error() uint32     { return c.regs().error_code }
+func (c *sigctxt) oldmask() uint32   { return c.regs().oldmask }
+
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint32 { return uint32(*(*uintptr)(add(unsafe.Pointer(c.info), 2*ptrSize))) }
+
+func (c *sigctxt) set_pc(x uint32)  { c.regs().pc = x }
+func (c *sigctxt) set_sp(x uint32)  { c.regs().sp = x }
+func (c *sigctxt) set_lr(x uint32)  { c.regs().lr = x }
+func (c *sigctxt) set_r10(x uint32) { c.regs().r10 = x }
+
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) {
+	*(*uintptr)(add(unsafe.Pointer(c.info), 2*ptrSize)) = uintptr(x)
+}
diff --git a/src/runtime/signal_linux_arm.h b/src/runtime/signal_linux_arm.h
deleted file mode 100644
index a674c0d..0000000
--- a/src/runtime/signal_linux_arm.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (*((Sigcontext*)&((Ucontext*)(ctxt))->uc_mcontext))
-
-#define SIG_R0(info, ctxt) (SIG_REGS(ctxt).arm_r0)
-#define SIG_R1(info, ctxt) (SIG_REGS(ctxt).arm_r1)
-#define SIG_R2(info, ctxt) (SIG_REGS(ctxt).arm_r2)
-#define SIG_R3(info, ctxt) (SIG_REGS(ctxt).arm_r3)
-#define SIG_R4(info, ctxt) (SIG_REGS(ctxt).arm_r4)
-#define SIG_R5(info, ctxt) (SIG_REGS(ctxt).arm_r5)
-#define SIG_R6(info, ctxt) (SIG_REGS(ctxt).arm_r6)
-#define SIG_R7(info, ctxt) (SIG_REGS(ctxt).arm_r7)
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).arm_r8)
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).arm_r9)
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).arm_r10)
-#define SIG_FP(info, ctxt) (SIG_REGS(ctxt).arm_fp)
-#define SIG_IP(info, ctxt) (SIG_REGS(ctxt).arm_ip)
-#define SIG_SP(info, ctxt) (SIG_REGS(ctxt).arm_sp)
-#define SIG_LR(info, ctxt) (SIG_REGS(ctxt).arm_lr)
-#define SIG_PC(info, ctxt) (SIG_REGS(ctxt).arm_pc)
-#define SIG_CPSR(info, ctxt) (SIG_REGS(ctxt).arm_cpsr)
-#define SIG_FAULT(info, ctxt) (SIG_REGS(ctxt).fault_address)
-#define SIG_TRAP(info, ctxt) (SIG_REGS(ctxt).trap_no)
-#define SIG_ERROR(info, ctxt) (SIG_REGS(ctxt).error_code)
-#define SIG_OLDMASK(info, ctxt) (SIG_REGS(ctxt).oldmask)
-#define SIG_CODE0(info, ctxt) ((uintptr)(info)->si_code)
diff --git a/src/runtime/signal_openbsd.go b/src/runtime/signal_openbsd.go
new file mode 100644
index 0000000..78afc59
--- /dev/null
+++ b/src/runtime/signal_openbsd.go
@@ -0,0 +1,46 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
+
+var sigtable = [...]sigTabT{
+	/*  0 */ {0, "SIGNONE: no trap"},
+	/*  1 */ {_SigNotify + _SigKill, "SIGHUP: terminal line hangup"},
+	/*  2 */ {_SigNotify + _SigKill, "SIGINT: interrupt"},
+	/*  3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit"},
+	/*  4 */ {_SigThrow, "SIGILL: illegal instruction"},
+	/*  5 */ {_SigThrow, "SIGTRAP: trace trap"},
+	/*  6 */ {_SigNotify + _SigThrow, "SIGABRT: abort"},
+	/*  7 */ {_SigThrow, "SIGEMT: emulate instruction executed"},
+	/*  8 */ {_SigPanic, "SIGFPE: floating-point exception"},
+	/*  9 */ {0, "SIGKILL: kill"},
+	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 12 */ {_SigThrow, "SIGSYS: bad system call"},
+	/* 13 */ {_SigNotify, "SIGPIPE: write to broken pipe"},
+	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
+	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: termination"},
+	/* 16 */ {_SigNotify, "SIGURG: urgent condition on socket"},
+	/* 17 */ {0, "SIGSTOP: stop"},
+	/* 18 */ {_SigNotify + _SigDefault, "SIGTSTP: keyboard stop"},
+	/* 19 */ {0, "SIGCONT: continue after stop"},
+	/* 20 */ {_SigNotify, "SIGCHLD: child status has changed"},
+	/* 21 */ {_SigNotify + _SigDefault, "SIGTTIN: background read from tty"},
+	/* 22 */ {_SigNotify + _SigDefault, "SIGTTOU: background write to tty"},
+	/* 23 */ {_SigNotify, "SIGIO: i/o now possible"},
+	/* 24 */ {_SigNotify, "SIGXCPU: cpu limit exceeded"},
+	/* 25 */ {_SigNotify, "SIGXFSZ: file size limit exceeded"},
+	/* 26 */ {_SigNotify, "SIGVTALRM: virtual alarm clock"},
+	/* 27 */ {_SigNotify, "SIGPROF: profiling alarm clock"},
+	/* 28 */ {_SigNotify, "SIGWINCH: window size change"},
+	/* 29 */ {_SigNotify, "SIGINFO: status request from keyboard"},
+	/* 30 */ {_SigNotify, "SIGUSR1: user-defined signal 1"},
+	/* 31 */ {_SigNotify, "SIGUSR2: user-defined signal 2"},
+	/* 32 */ {_SigNotify, "SIGTHR: reserved"},
+}
diff --git a/src/runtime/signal_openbsd_386.go b/src/runtime/signal_openbsd_386.go
new file mode 100644
index 0000000..c582a449
--- /dev/null
+++ b/src/runtime/signal_openbsd_386.go
@@ -0,0 +1,41 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *sigcontext {
+	return (*sigcontext)(c.ctxt)
+}
+
+func (c *sigctxt) eax() uint32     { return c.regs().sc_eax }
+func (c *sigctxt) ebx() uint32     { return c.regs().sc_ebx }
+func (c *sigctxt) ecx() uint32     { return c.regs().sc_ecx }
+func (c *sigctxt) edx() uint32     { return c.regs().sc_edx }
+func (c *sigctxt) edi() uint32     { return c.regs().sc_edi }
+func (c *sigctxt) esi() uint32     { return c.regs().sc_esi }
+func (c *sigctxt) ebp() uint32     { return c.regs().sc_ebp }
+func (c *sigctxt) esp() uint32     { return c.regs().sc_esp }
+func (c *sigctxt) eip() uint32     { return c.regs().sc_eip }
+func (c *sigctxt) eflags() uint32  { return c.regs().sc_eflags }
+func (c *sigctxt) cs() uint32      { return c.regs().sc_cs }
+func (c *sigctxt) fs() uint32      { return c.regs().sc_fs }
+func (c *sigctxt) gs() uint32      { return c.regs().sc_gs }
+func (c *sigctxt) sigcode() uint32 { return uint32(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint32 {
+	return *(*uint32)(add(unsafe.Pointer(c.info), 12))
+}
+
+func (c *sigctxt) set_eip(x uint32)     { c.regs().sc_eip = x }
+func (c *sigctxt) set_esp(x uint32)     { c.regs().sc_esp = x }
+func (c *sigctxt) set_sigcode(x uint32) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint32) {
+	*(*uint32)(add(unsafe.Pointer(c.info), 12)) = x
+}
diff --git a/src/runtime/signal_openbsd_386.h b/src/runtime/signal_openbsd_386.h
deleted file mode 100644
index 6742db8..0000000
--- a/src/runtime/signal_openbsd_386.h
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (*(Sigcontext*)(ctxt))
-
-#define SIG_EAX(info, ctxt) (SIG_REGS(ctxt).sc_eax)
-#define SIG_EBX(info, ctxt) (SIG_REGS(ctxt).sc_ebx)
-#define SIG_ECX(info, ctxt) (SIG_REGS(ctxt).sc_ecx)
-#define SIG_EDX(info, ctxt) (SIG_REGS(ctxt).sc_edx)
-#define SIG_EDI(info, ctxt) (SIG_REGS(ctxt).sc_edi)
-#define SIG_ESI(info, ctxt) (SIG_REGS(ctxt).sc_esi)
-#define SIG_EBP(info, ctxt) (SIG_REGS(ctxt).sc_ebp)
-#define SIG_ESP(info, ctxt) (SIG_REGS(ctxt).sc_esp)
-#define SIG_EIP(info, ctxt) (SIG_REGS(ctxt).sc_eip)
-#define SIG_EFLAGS(info, ctxt) (SIG_REGS(ctxt).sc_eflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).sc_cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).sc_fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).sc_gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) (*(uintptr*)((byte*)info + 12))
diff --git a/src/runtime/signal_openbsd_amd64.go b/src/runtime/signal_openbsd_amd64.go
new file mode 100644
index 0000000..4f0d19d
--- /dev/null
+++ b/src/runtime/signal_openbsd_amd64.go
@@ -0,0 +1,49 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *sigcontext {
+	return (*sigcontext)(c.ctxt)
+}
+
+func (c *sigctxt) rax() uint64     { return c.regs().sc_rax }
+func (c *sigctxt) rbx() uint64     { return c.regs().sc_rbx }
+func (c *sigctxt) rcx() uint64     { return c.regs().sc_rcx }
+func (c *sigctxt) rdx() uint64     { return c.regs().sc_rdx }
+func (c *sigctxt) rdi() uint64     { return c.regs().sc_rdi }
+func (c *sigctxt) rsi() uint64     { return c.regs().sc_rsi }
+func (c *sigctxt) rbp() uint64     { return c.regs().sc_rbp }
+func (c *sigctxt) rsp() uint64     { return c.regs().sc_rsp }
+func (c *sigctxt) r8() uint64      { return c.regs().sc_r8 }
+func (c *sigctxt) r9() uint64      { return c.regs().sc_r9 }
+func (c *sigctxt) r10() uint64     { return c.regs().sc_r10 }
+func (c *sigctxt) r11() uint64     { return c.regs().sc_r11 }
+func (c *sigctxt) r12() uint64     { return c.regs().sc_r12 }
+func (c *sigctxt) r13() uint64     { return c.regs().sc_r13 }
+func (c *sigctxt) r14() uint64     { return c.regs().sc_r14 }
+func (c *sigctxt) r15() uint64     { return c.regs().sc_r15 }
+func (c *sigctxt) rip() uint64     { return c.regs().sc_rip }
+func (c *sigctxt) rflags() uint64  { return c.regs().sc_rflags }
+func (c *sigctxt) cs() uint64      { return c.regs().sc_cs }
+func (c *sigctxt) fs() uint64      { return c.regs().sc_fs }
+func (c *sigctxt) gs() uint64      { return c.regs().sc_gs }
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 {
+	return *(*uint64)(add(unsafe.Pointer(c.info), 16))
+}
+
+func (c *sigctxt) set_rip(x uint64)     { c.regs().sc_rip = x }
+func (c *sigctxt) set_rsp(x uint64)     { c.regs().sc_rsp = x }
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) {
+	*(*uint64)(add(unsafe.Pointer(c.info), 16)) = x
+}
diff --git a/src/runtime/signal_openbsd_amd64.h b/src/runtime/signal_openbsd_amd64.h
deleted file mode 100644
index b46a5df..0000000
--- a/src/runtime/signal_openbsd_amd64.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (*(Sigcontext*)(ctxt))
-
-#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).sc_rax)
-#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).sc_rbx)
-#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).sc_rcx)
-#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).sc_rdx)
-#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).sc_rdi)
-#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).sc_rsi)
-#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).sc_rbp)
-#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).sc_rsp)
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).sc_r8)
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).sc_r9)
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).sc_r10)
-#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).sc_r11)
-#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).sc_r12)
-#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).sc_r13)
-#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).sc_r14)
-#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).sc_r15)
-#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).sc_rip)
-#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).sc_rflags)
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).sc_cs)
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).sc_fs)
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).sc_gs)
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) (*(uintptr*)((byte*)(info) + 16))
diff --git a/src/runtime/signal_solaris.go b/src/runtime/signal_solaris.go
new file mode 100644
index 0000000..2986c5a
--- /dev/null
+++ b/src/runtime/signal_solaris.go
@@ -0,0 +1,88 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+type sigTabT struct {
+	flags int32
+	name  string
+}
+
+var sigtable = [...]sigTabT{
+	/* 0 */ {0, "SIGNONE: no trap"},
+	/* 1 */ {_SigNotify + _SigKill, "SIGHUP: hangup"},
+	/* 2 */ {_SigNotify + _SigKill, "SIGINT: interrupt (rubout)"},
+	/* 3 */ {_SigNotify + _SigThrow, "SIGQUIT: quit (ASCII FS)"},
+	/* 4 */ {_SigThrow, "SIGILL: illegal instruction (not reset when caught)"},
+	/* 5 */ {_SigThrow, "SIGTRAP: trace trap (not reset when caught)"},
+	/* 6 */ {_SigNotify + _SigThrow, "SIGABRT: used by abort, replace SIGIOT in the future"},
+	/* 7 */ {_SigThrow, "SIGEMT: EMT instruction"},
+	/* 8 */ {_SigPanic, "SIGFPE: floating point exception"},
+	/* 9 */ {0, "SIGKILL: kill (cannot be caught or ignored)"},
+	/* 10 */ {_SigPanic, "SIGBUS: bus error"},
+	/* 11 */ {_SigPanic, "SIGSEGV: segmentation violation"},
+	/* 12 */ {_SigThrow, "SIGSYS: bad argument to system call"},
+	/* 13 */ {_SigNotify, "SIGPIPE: write on a pipe with no one to read it"},
+	/* 14 */ {_SigNotify, "SIGALRM: alarm clock"},
+	/* 15 */ {_SigNotify + _SigKill, "SIGTERM: software termination signal from kill"},
+	/* 16 */ {_SigNotify, "SIGUSR1: user defined signal 1"},
+	/* 17 */ {_SigNotify, "SIGUSR2: user defined signal 2"},
+	/* 18 */ {_SigNotify, "SIGCHLD: child status change alias (POSIX)"},
+	/* 19 */ {_SigNotify, "SIGPWR: power-fail restart"},
+	/* 20 */ {_SigNotify, "SIGWINCH: window size change"},
+	/* 21 */ {_SigNotify, "SIGURG: urgent socket condition"},
+	/* 22 */ {_SigNotify, "SIGPOLL: pollable event occured"},
+	/* 23 */ {_SigNotify + _SigDefault, "SIGSTOP: stop (cannot be caught or ignored)"},
+	/* 24 */ {0, "SIGTSTP: user stop requested from tty"},
+	/* 25 */ {0, "SIGCONT: stopped process has been continued"},
+	/* 26 */ {_SigNotify + _SigDefault, "SIGTTIN: background tty read attempted"},
+	/* 27 */ {_SigNotify + _SigDefault, "SIGTTOU: background tty write attempted"},
+	/* 28 */ {_SigNotify, "SIGVTALRM: virtual timer expired"},
+	/* 29 */ {_SigNotify, "SIGPROF: profiling timer expired"},
+	/* 30 */ {_SigNotify, "SIGXCPU: exceeded cpu limit"},
+	/* 31 */ {_SigNotify, "SIGXFSZ: exceeded file size limit"},
+	/* 32 */ {_SigNotify, "SIGWAITING: reserved signal no longer used by"},
+	/* 33 */ {_SigNotify, "SIGLWP: reserved signal no longer used by"},
+	/* 34 */ {_SigNotify, "SIGFREEZE: special signal used by CPR"},
+	/* 35 */ {_SigNotify, "SIGTHAW: special signal used by CPR"},
+	/* 36 */ {0, "SIGCANCEL: reserved signal for thread cancellation"},
+	/* 37 */ {_SigNotify, "SIGLOST: resource lost (eg, record-lock lost)"},
+	/* 38 */ {_SigNotify, "SIGXRES: resource control exceeded"},
+	/* 39 */ {_SigNotify, "SIGJVM1: reserved signal for Java Virtual Machine"},
+	/* 40 */ {_SigNotify, "SIGJVM2: reserved signal for Java Virtual Machine"},
+
+	/* TODO(aram): what should be do about these signals? _SigDefault or _SigNotify? is this set static? */
+	/* 41 */ {_SigNotify, "real time signal"},
+	/* 42 */ {_SigNotify, "real time signal"},
+	/* 43 */ {_SigNotify, "real time signal"},
+	/* 44 */ {_SigNotify, "real time signal"},
+	/* 45 */ {_SigNotify, "real time signal"},
+	/* 46 */ {_SigNotify, "real time signal"},
+	/* 47 */ {_SigNotify, "real time signal"},
+	/* 48 */ {_SigNotify, "real time signal"},
+	/* 49 */ {_SigNotify, "real time signal"},
+	/* 50 */ {_SigNotify, "real time signal"},
+	/* 51 */ {_SigNotify, "real time signal"},
+	/* 52 */ {_SigNotify, "real time signal"},
+	/* 53 */ {_SigNotify, "real time signal"},
+	/* 54 */ {_SigNotify, "real time signal"},
+	/* 55 */ {_SigNotify, "real time signal"},
+	/* 56 */ {_SigNotify, "real time signal"},
+	/* 57 */ {_SigNotify, "real time signal"},
+	/* 58 */ {_SigNotify, "real time signal"},
+	/* 59 */ {_SigNotify, "real time signal"},
+	/* 60 */ {_SigNotify, "real time signal"},
+	/* 61 */ {_SigNotify, "real time signal"},
+	/* 62 */ {_SigNotify, "real time signal"},
+	/* 63 */ {_SigNotify, "real time signal"},
+	/* 64 */ {_SigNotify, "real time signal"},
+	/* 65 */ {_SigNotify, "real time signal"},
+	/* 66 */ {_SigNotify, "real time signal"},
+	/* 67 */ {_SigNotify, "real time signal"},
+	/* 68 */ {_SigNotify, "real time signal"},
+	/* 69 */ {_SigNotify, "real time signal"},
+	/* 70 */ {_SigNotify, "real time signal"},
+	/* 71 */ {_SigNotify, "real time signal"},
+	/* 72 */ {_SigNotify, "real time signal"},
+}
diff --git a/src/runtime/signal_solaris_amd64.go b/src/runtime/signal_solaris_amd64.go
new file mode 100644
index 0000000..a577c8c
--- /dev/null
+++ b/src/runtime/signal_solaris_amd64.go
@@ -0,0 +1,46 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+type sigctxt struct {
+	info *siginfo
+	ctxt unsafe.Pointer
+}
+
+func (c *sigctxt) regs() *mcontext {
+	return (*mcontext)(unsafe.Pointer(&(*ucontext)(c.ctxt).uc_mcontext))
+}
+func (c *sigctxt) rax() uint64     { return uint64(c.regs().gregs[_REG_RAX]) }
+func (c *sigctxt) rbx() uint64     { return uint64(c.regs().gregs[_REG_RBX]) }
+func (c *sigctxt) rcx() uint64     { return uint64(c.regs().gregs[_REG_RCX]) }
+func (c *sigctxt) rdx() uint64     { return uint64(c.regs().gregs[_REG_RDX]) }
+func (c *sigctxt) rdi() uint64     { return uint64(c.regs().gregs[_REG_RDI]) }
+func (c *sigctxt) rsi() uint64     { return uint64(c.regs().gregs[_REG_RSI]) }
+func (c *sigctxt) rbp() uint64     { return uint64(c.regs().gregs[_REG_RBP]) }
+func (c *sigctxt) rsp() uint64     { return uint64(c.regs().gregs[_REG_RSP]) }
+func (c *sigctxt) r8() uint64      { return uint64(c.regs().gregs[_REG_R8]) }
+func (c *sigctxt) r9() uint64      { return uint64(c.regs().gregs[_REG_R9]) }
+func (c *sigctxt) r10() uint64     { return uint64(c.regs().gregs[_REG_R10]) }
+func (c *sigctxt) r11() uint64     { return uint64(c.regs().gregs[_REG_R11]) }
+func (c *sigctxt) r12() uint64     { return uint64(c.regs().gregs[_REG_R12]) }
+func (c *sigctxt) r13() uint64     { return uint64(c.regs().gregs[_REG_R13]) }
+func (c *sigctxt) r14() uint64     { return uint64(c.regs().gregs[_REG_R14]) }
+func (c *sigctxt) r15() uint64     { return uint64(c.regs().gregs[_REG_R15]) }
+func (c *sigctxt) rip() uint64     { return uint64(c.regs().gregs[_REG_RIP]) }
+func (c *sigctxt) rflags() uint64  { return uint64(c.regs().gregs[_REG_RFLAGS]) }
+func (c *sigctxt) cs() uint64      { return uint64(c.regs().gregs[_REG_CS]) }
+func (c *sigctxt) fs() uint64      { return uint64(c.regs().gregs[_REG_FS]) }
+func (c *sigctxt) gs() uint64      { return uint64(c.regs().gregs[_REG_GS]) }
+func (c *sigctxt) sigcode() uint64 { return uint64(c.info.si_code) }
+func (c *sigctxt) sigaddr() uint64 { return *(*uint64)(unsafe.Pointer(&c.info.__data[0])) }
+
+func (c *sigctxt) set_rip(x uint64)     { c.regs().gregs[_REG_RIP] = int64(x) }
+func (c *sigctxt) set_rsp(x uint64)     { c.regs().gregs[_REG_RSP] = int64(x) }
+func (c *sigctxt) set_sigcode(x uint64) { c.info.si_code = int32(x) }
+func (c *sigctxt) set_sigaddr(x uint64) {
+	*(*uintptr)(unsafe.Pointer(&c.info.__data[0])) = uintptr(x)
+}
diff --git a/src/runtime/signal_solaris_amd64.h b/src/runtime/signal_solaris_amd64.h
deleted file mode 100644
index c2e0a15..0000000
--- a/src/runtime/signal_solaris_amd64.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#define SIG_REGS(ctxt) (((Ucontext*)(ctxt))->uc_mcontext)
-
-#define SIG_RAX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RAX])
-#define SIG_RBX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RBX])
-#define SIG_RCX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RCX])
-#define SIG_RDX(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RDX])
-#define SIG_RDI(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RDI])
-#define SIG_RSI(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RSI])
-#define SIG_RBP(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RBP])
-#define SIG_RSP(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RSP])
-#define SIG_R8(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R8])
-#define SIG_R9(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R9])
-#define SIG_R10(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R10])
-#define SIG_R11(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R11])
-#define SIG_R12(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R12])
-#define SIG_R13(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R13])
-#define SIG_R14(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R14])
-#define SIG_R15(info, ctxt) (SIG_REGS(ctxt).gregs[REG_R15])
-#define SIG_RIP(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RIP])
-#define SIG_RFLAGS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_RFLAGS])
-
-#define SIG_CS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_CS])
-#define SIG_FS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_FS])
-#define SIG_GS(info, ctxt) (SIG_REGS(ctxt).gregs[REG_GS])
-
-#define SIG_CODE0(info, ctxt) ((info)->si_code)
-#define SIG_CODE1(info, ctxt) (*(uintptr*)&(info)->__data[0])
diff --git a/src/runtime/signal_unix.c b/src/runtime/signal_unix.c
deleted file mode 100644
index 0e33ece..0000000
--- a/src/runtime/signal_unix.c
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright 2012 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build darwin dragonfly freebsd linux netbsd openbsd solaris
-
-#include "runtime.h"
-#include "defs_GOOS_GOARCH.h"
-#include "os_GOOS.h"
-#include "signal_unix.h"
-
-extern SigTab runtime·sigtab[];
-
-void
-runtime·initsig(void)
-{
-	int32 i;
-	SigTab *t;
-
-	// First call: basic setup.
-	for(i = 0; i<NSIG; i++) {
-		t = &runtime·sigtab[i];
-		if((t->flags == 0) || (t->flags & SigDefault))
-			continue;
-
-		// For some signals, we respect an inherited SIG_IGN handler
-		// rather than insist on installing our own default handler.
-		// Even these signals can be fetched using the os/signal package.
-		switch(i) {
-		case SIGHUP:
-		case SIGINT:
-			if(runtime·getsig(i) == SIG_IGN) {
-				t->flags = SigNotify | SigIgnored;
-				continue;
-			}
-		}
-
-		t->flags |= SigHandling;
-		runtime·setsig(i, runtime·sighandler, true);
-	}
-}
-
-void
-runtime·sigenable(uint32 sig)
-{
-	SigTab *t;
-
-	if(sig >= NSIG)
-		return;
-
-	t = &runtime·sigtab[sig];
-	if((t->flags & SigNotify) && !(t->flags & SigHandling)) {
-		t->flags |= SigHandling;
-		if(runtime·getsig(sig) == SIG_IGN)
-			t->flags |= SigIgnored;
-		runtime·setsig(sig, runtime·sighandler, true);
-	}
-}
-
-void
-runtime·sigdisable(uint32 sig)
-{
-	SigTab *t;
-
-	if(sig >= NSIG)
-		return;
-
-	t = &runtime·sigtab[sig];
-	if((t->flags & SigNotify) && (t->flags & SigHandling)) {
-		t->flags &= ~SigHandling;
-		if(t->flags & SigIgnored)
-			runtime·setsig(sig, SIG_IGN, true);
-		else
-			runtime·setsig(sig, SIG_DFL, true);
-	}
-}
-
-void
-runtime·resetcpuprofiler(int32 hz)
-{
-	Itimerval it;
-
-	runtime·memclr((byte*)&it, sizeof it);
-	if(hz == 0) {
-		runtime·setitimer(ITIMER_PROF, &it, nil);
-	} else {
-		it.it_interval.tv_sec = 0;
-		it.it_interval.tv_usec = 1000000 / hz;
-		it.it_value = it.it_interval;
-		runtime·setitimer(ITIMER_PROF, &it, nil);
-	}
-	g->m->profilehz = hz;
-}
-
-void
-runtime·sigpipe(void)
-{
-	runtime·setsig(SIGPIPE, SIG_DFL, false);
-	runtime·raise(SIGPIPE);
-}
-
-void
-runtime·crash(void)
-{
-#ifdef GOOS_darwin
-	// OS X core dumps are linear dumps of the mapped memory,
-	// from the first virtual byte to the last, with zeros in the gaps.
-	// Because of the way we arrange the address space on 64-bit systems,
-	// this means the OS X core file will be >128 GB and even on a zippy
-	// workstation can take OS X well over an hour to write (uninterruptible).
-	// Save users from making that mistake.
-	if(sizeof(void*) == 8)
-		return;
-#endif
-
-	runtime·unblocksignals();
-	runtime·setsig(SIGABRT, SIG_DFL, false);
-	runtime·raise(SIGABRT);
-}
diff --git a/src/runtime/signal_unix.go b/src/runtime/signal_unix.go
index ba77b6e..c457083 100644
--- a/src/runtime/signal_unix.go
+++ b/src/runtime/signal_unix.go
@@ -6,8 +6,6 @@
 
 package runtime
 
-func sigpipe()
-
 func os_sigpipe() {
-	onM(sigpipe)
+	systemstack(sigpipe)
 }
diff --git a/src/runtime/signals_darwin.h b/src/runtime/signals_darwin.h
deleted file mode 100644
index 8761e1b..0000000
--- a/src/runtime/signals_darwin.h
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#define N SigNotify
-#define K SigKill
-#define T SigThrow
-#define P SigPanic
-#define D SigDefault
-
-#pragma dataflag NOPTR
-SigTab runtime·sigtab[] = {
-	/* 0 */	0, "SIGNONE: no trap",
-	/* 1 */	N+K, "SIGHUP: terminal line hangup",
-	/* 2 */	N+K, "SIGINT: interrupt",
-	/* 3 */	N+T, "SIGQUIT: quit",
-	/* 4 */	T, "SIGILL: illegal instruction",
-	/* 5 */	T, "SIGTRAP: trace trap",
-	/* 6 */	N+T, "SIGABRT: abort",
-	/* 7 */	T, "SIGEMT: emulate instruction executed",
-	/* 8 */	P, "SIGFPE: floating-point exception",
-	/* 9 */	0, "SIGKILL: kill",
-	/* 10 */	P, "SIGBUS: bus error",
-	/* 11 */	P, "SIGSEGV: segmentation violation",
-	/* 12 */	T, "SIGSYS: bad system call",
-	/* 13 */	N, "SIGPIPE: write to broken pipe",
-	/* 14 */	N, "SIGALRM: alarm clock",
-	/* 15 */	N+K, "SIGTERM: termination",
-	/* 16 */	N, "SIGURG: urgent condition on socket",
-	/* 17 */	0, "SIGSTOP: stop",
-	/* 18 */	N+D, "SIGTSTP: keyboard stop",
-	/* 19 */	0, "SIGCONT: continue after stop",
-	/* 20 */	N, "SIGCHLD: child status has changed",
-	/* 21 */	N+D, "SIGTTIN: background read from tty",
-	/* 22 */	N+D, "SIGTTOU: background write to tty",
-	/* 23 */	N, "SIGIO: i/o now possible",
-	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
-	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
-	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
-	/* 27 */	N, "SIGPROF: profiling alarm clock",
-	/* 28 */	N, "SIGWINCH: window size change",
-	/* 29 */	N, "SIGINFO: status request from keyboard",
-	/* 30 */	N, "SIGUSR1: user-defined signal 1",
-	/* 31 */	N, "SIGUSR2: user-defined signal 2",
-};
-
-#undef N
-#undef K
-#undef T
-#undef P
-#undef D
diff --git a/src/runtime/signals_dragonfly.h b/src/runtime/signals_dragonfly.h
deleted file mode 100644
index 07343a7..0000000
--- a/src/runtime/signals_dragonfly.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#define N SigNotify
-#define K SigKill
-#define T SigThrow
-#define P SigPanic
-#define D SigDefault
-
-#pragma dataflag NOPTR
-SigTab runtime·sigtab[] = {
-	/* 0 */	0, "SIGNONE: no trap",
-	/* 1 */	N+K, "SIGHUP: terminal line hangup",
-	/* 2 */	N+K, "SIGINT: interrupt",
-	/* 3 */	N+T, "SIGQUIT: quit",
-	/* 4 */	T, "SIGILL: illegal instruction",
-	/* 5 */	T, "SIGTRAP: trace trap",
-	/* 6 */	N+T, "SIGABRT: abort",
-	/* 7 */	T, "SIGEMT: emulate instruction executed",
-	/* 8 */	P, "SIGFPE: floating-point exception",
-	/* 9 */	0, "SIGKILL: kill",
-	/* 10 */	P, "SIGBUS: bus error",
-	/* 11 */	P, "SIGSEGV: segmentation violation",
-	/* 12 */	T, "SIGSYS: bad system call",
-	/* 13 */	N, "SIGPIPE: write to broken pipe",
-	/* 14 */	N, "SIGALRM: alarm clock",
-	/* 15 */	N+K, "SIGTERM: termination",
-	/* 16 */	N, "SIGURG: urgent condition on socket",
-	/* 17 */	0, "SIGSTOP: stop",
-	/* 18 */	N+D, "SIGTSTP: keyboard stop",
-	/* 19 */	0, "SIGCONT: continue after stop",
-	/* 20 */	N, "SIGCHLD: child status has changed",
-	/* 21 */	N+D, "SIGTTIN: background read from tty",
-	/* 22 */	N+D, "SIGTTOU: background write to tty",
-	/* 23 */	N, "SIGIO: i/o now possible",
-	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
-	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
-	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
-	/* 27 */	N, "SIGPROF: profiling alarm clock",
-	/* 28 */	N, "SIGWINCH: window size change",
-	/* 29 */	N, "SIGINFO: status request from keyboard",
-	/* 30 */	N, "SIGUSR1: user-defined signal 1",
-	/* 31 */	N, "SIGUSR2: user-defined signal 2",
-	/* 32 */	N, "SIGTHR: reserved",
-};
-
-#undef N
-#undef K
-#undef T
-#undef P
-#undef D
diff --git a/src/runtime/signals_freebsd.h b/src/runtime/signals_freebsd.h
deleted file mode 100644
index 39e0a94..0000000
--- a/src/runtime/signals_freebsd.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#define N SigNotify
-#define K SigKill
-#define T SigThrow
-#define P SigPanic
-#define D SigDefault
-
-#pragma dataflag NOPTR
-SigTab runtime·sigtab[] = {
-	/* 0 */	0, "SIGNONE: no trap",
-	/* 1 */	N+K, "SIGHUP: terminal line hangup",
-	/* 2 */	N+K, "SIGINT: interrupt",
-	/* 3 */	N+T, "SIGQUIT: quit",
-	/* 4 */	T, "SIGILL: illegal instruction",
-	/* 5 */	T, "SIGTRAP: trace trap",
-	/* 6 */	N+T, "SIGABRT: abort",
-	/* 7 */	T, "SIGEMT: emulate instruction executed",
-	/* 8 */	P, "SIGFPE: floating-point exception",
-	/* 9 */	0, "SIGKILL: kill",
-	/* 10 */	P, "SIGBUS: bus error",
-	/* 11 */	P, "SIGSEGV: segmentation violation",
-	/* 12 */	N, "SIGSYS: bad system call",
-	/* 13 */	N, "SIGPIPE: write to broken pipe",
-	/* 14 */	N, "SIGALRM: alarm clock",
-	/* 15 */	N+K, "SIGTERM: termination",
-	/* 16 */	N, "SIGURG: urgent condition on socket",
-	/* 17 */	0, "SIGSTOP: stop",
-	/* 18 */	N+D, "SIGTSTP: keyboard stop",
-	/* 19 */	0, "SIGCONT: continue after stop",
-	/* 20 */	N, "SIGCHLD: child status has changed",
-	/* 21 */	N+D, "SIGTTIN: background read from tty",
-	/* 22 */	N+D, "SIGTTOU: background write to tty",
-	/* 23 */	N, "SIGIO: i/o now possible",
-	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
-	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
-	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
-	/* 27 */	N, "SIGPROF: profiling alarm clock",
-	/* 28 */	N, "SIGWINCH: window size change",
-	/* 29 */	N, "SIGINFO: status request from keyboard",
-	/* 30 */	N, "SIGUSR1: user-defined signal 1",
-	/* 31 */	N, "SIGUSR2: user-defined signal 2",
-	/* 32 */	N, "SIGTHR: reserved",
-};
-
-#undef N
-#undef K
-#undef T
-#undef P
-#undef D
diff --git a/src/runtime/signals_linux.h b/src/runtime/signals_linux.h
deleted file mode 100644
index 3741076..0000000
--- a/src/runtime/signals_linux.h
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#define N SigNotify
-#define K SigKill
-#define T SigThrow
-#define P SigPanic
-#define D SigDefault
-
-#pragma dataflag NOPTR
-SigTab runtime·sigtab[] = {
-	/* 0 */	0, "SIGNONE: no trap",
-	/* 1 */	N+K, "SIGHUP: terminal line hangup",
-	/* 2 */	N+K, "SIGINT: interrupt",
-	/* 3 */	N+T, "SIGQUIT: quit",
-	/* 4 */	T, "SIGILL: illegal instruction",
-	/* 5 */	T, "SIGTRAP: trace trap",
-	/* 6 */	N+T, "SIGABRT: abort",
-	/* 7 */	P, "SIGBUS: bus error",
-	/* 8 */	P, "SIGFPE: floating-point exception",
-	/* 9 */	0, "SIGKILL: kill",
-	/* 10 */	N, "SIGUSR1: user-defined signal 1",
-	/* 11 */	P, "SIGSEGV: segmentation violation",
-	/* 12 */	N, "SIGUSR2: user-defined signal 2",
-	/* 13 */	N, "SIGPIPE: write to broken pipe",
-	/* 14 */	N, "SIGALRM: alarm clock",
-	/* 15 */	N+K, "SIGTERM: termination",
-	/* 16 */	T, "SIGSTKFLT: stack fault",
-	/* 17 */	N, "SIGCHLD: child status has changed",
-	/* 18 */	0, "SIGCONT: continue",
-	/* 19 */	0, "SIGSTOP: stop, unblockable",
-	/* 20 */	N+D, "SIGTSTP: keyboard stop",
-	/* 21 */	N+D, "SIGTTIN: background read from tty",
-	/* 22 */	N+D, "SIGTTOU: background write to tty",
-	/* 23 */	N, "SIGURG: urgent condition on socket",
-	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
-	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
-	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
-	/* 27 */	N, "SIGPROF: profiling alarm clock",
-	/* 28 */	N, "SIGWINCH: window size change",
-	/* 29 */	N, "SIGIO: i/o now possible",
-	/* 30 */	N, "SIGPWR: power failure restart",
-	/* 31 */	N, "SIGSYS: bad system call",
-	/* 32 */	0, "signal 32", /* SIGCANCEL; see issue 6997 */
-	/* 33 */	0, "signal 33", /* SIGSETXID; see issue 3871 */
-	/* 34 */	N, "signal 34",
-	/* 35 */	N, "signal 35",
-	/* 36 */	N, "signal 36",
-	/* 37 */	N, "signal 37",
-	/* 38 */	N, "signal 38",
-	/* 39 */	N, "signal 39",
-	/* 40 */	N, "signal 40",
-	/* 41 */	N, "signal 41",
-	/* 42 */	N, "signal 42",
-	/* 43 */	N, "signal 43",
-	/* 44 */	N, "signal 44",
-	/* 45 */	N, "signal 45",
-	/* 46 */	N, "signal 46",
-	/* 47 */	N, "signal 47",
-	/* 48 */	N, "signal 48",
-	/* 49 */	N, "signal 49",
-	/* 50 */	N, "signal 50",
-	/* 51 */	N, "signal 51",
-	/* 52 */	N, "signal 52",
-	/* 53 */	N, "signal 53",
-	/* 54 */	N, "signal 54",
-	/* 55 */	N, "signal 55",
-	/* 56 */	N, "signal 56",
-	/* 57 */	N, "signal 57",
-	/* 58 */	N, "signal 58",
-	/* 59 */	N, "signal 59",
-	/* 60 */	N, "signal 60",
-	/* 61 */	N, "signal 61",
-	/* 62 */	N, "signal 62",
-	/* 63 */	N, "signal 63",
-	/* 64 */	N, "signal 64",
-};
-
-#undef N
-#undef K
-#undef T
-#undef P
-#undef D
diff --git a/src/runtime/signals_openbsd.h b/src/runtime/signals_openbsd.h
deleted file mode 100644
index 950a2fe..0000000
--- a/src/runtime/signals_openbsd.h
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#define N SigNotify
-#define K SigKill
-#define T SigThrow
-#define P SigPanic
-#define D SigDefault
-
-#pragma dataflag NOPTR
-SigTab runtime·sigtab[] = {
-	/*  0 */	0, "SIGNONE: no trap",
-	/*  1 */	N+K, "SIGHUP: terminal line hangup",
-	/*  2 */	N+K, "SIGINT: interrupt",
-	/*  3 */	N+T, "SIGQUIT: quit",
-	/*  4 */	T, "SIGILL: illegal instruction",
-	/*  5 */	T, "SIGTRAP: trace trap",
-	/*  6 */	N+T, "SIGABRT: abort",
-	/*  7 */	T, "SIGEMT: emulate instruction executed",
-	/*  8 */	P, "SIGFPE: floating-point exception",
-	/*  9 */	0, "SIGKILL: kill",
-	/* 10 */	P, "SIGBUS: bus error",
-	/* 11 */	P, "SIGSEGV: segmentation violation",
-	/* 12 */	T, "SIGSYS: bad system call",
-	/* 13 */	N, "SIGPIPE: write to broken pipe",
-	/* 14 */	N, "SIGALRM: alarm clock",
-	/* 15 */	N+K, "SIGTERM: termination",
-	/* 16 */	N, "SIGURG: urgent condition on socket",
-	/* 17 */	0, "SIGSTOP: stop",
-	/* 18 */	N+D, "SIGTSTP: keyboard stop",
-	/* 19 */	0, "SIGCONT: continue after stop",
-	/* 20 */	N, "SIGCHLD: child status has changed",
-	/* 21 */	N+D, "SIGTTIN: background read from tty",
-	/* 22 */	N+D, "SIGTTOU: background write to tty",
-	/* 23 */	N, "SIGIO: i/o now possible",
-	/* 24 */	N, "SIGXCPU: cpu limit exceeded",
-	/* 25 */	N, "SIGXFSZ: file size limit exceeded",
-	/* 26 */	N, "SIGVTALRM: virtual alarm clock",
-	/* 27 */	N, "SIGPROF: profiling alarm clock",
-	/* 28 */	N, "SIGWINCH: window size change",
-	/* 29 */	N, "SIGINFO: status request from keyboard",
-	/* 30 */	N, "SIGUSR1: user-defined signal 1",
-	/* 31 */	N, "SIGUSR2: user-defined signal 2",
-	/* 32 */	N, "SIGTHR: reserved",
-};
-
-#undef N
-#undef K
-#undef T
-#undef P
-#undef D
diff --git a/src/runtime/signals_solaris.h b/src/runtime/signals_solaris.h
deleted file mode 100644
index 1f0a65e..0000000
--- a/src/runtime/signals_solaris.h
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "textflag.h"
-
-#define N SigNotify
-#define K SigKill
-#define T SigThrow
-#define P SigPanic
-#define D SigDefault
-
-#pragma dataflag NOPTR
-SigTab runtime·sigtab[] = {
-	/* 0 */		0, "SIGNONE: no trap",
-	/* 1 */		N+K, "SIGHUP: hangup",
-	/* 2 */		N+K, "SIGINT: interrupt (rubout)",
-	/* 3 */		N+T, "SIGQUIT: quit (ASCII FS)",
-	/* 4 */		T, "SIGILL: illegal instruction (not reset when caught)",
-	/* 5 */		T, "SIGTRAP: trace trap (not reset when caught)",
-	/* 6 */		N+T, "SIGABRT: used by abort, replace SIGIOT in the future",
-	/* 7 */		T, "SIGEMT: EMT instruction",
-	/* 8 */		P, "SIGFPE: floating point exception",
-	/* 9 */		0, "SIGKILL: kill (cannot be caught or ignored)",
-	/* 10 */	P, "SIGBUS: bus error",
-	/* 11 */	P, "SIGSEGV: segmentation violation",
-	/* 12 */	T, "SIGSYS: bad argument to system call",
-	/* 13 */	N, "SIGPIPE: write on a pipe with no one to read it",
-	/* 14 */	N, "SIGALRM: alarm clock",
-	/* 15 */	N+K, "SIGTERM: software termination signal from kill",
-	/* 16 */	N, "SIGUSR1: user defined signal 1",
-	/* 17 */	N, "SIGUSR2: user defined signal 2",
-	/* 18 */	N, "SIGCLD: child status change",
-	/* 18 */	N, "SIGCHLD: child status change alias (POSIX)",
-	/* 19 */	N, "SIGPWR: power-fail restart",
-	/* 20 */	N, "SIGWINCH: window size change",
-	/* 21 */	N, "SIGURG: urgent socket condition",
-	/* 22 */	N, "SIGPOLL: pollable event occured",
-	/* 23 */	N+D, "SIGSTOP: stop (cannot be caught or ignored)",
-	/* 24 */	0, "SIGTSTP: user stop requested from tty",
-	/* 25 */	0, "SIGCONT: stopped process has been continued",
-	/* 26 */	N+D, "SIGTTIN: background tty read attempted",
-	/* 27 */	N+D, "SIGTTOU: background tty write attempted",
-	/* 28 */	N, "SIGVTALRM: virtual timer expired",
-	/* 29 */	N, "SIGPROF: profiling timer expired",
-	/* 30 */	N, "SIGXCPU: exceeded cpu limit",
-	/* 31 */	N, "SIGXFSZ: exceeded file size limit",
-	/* 32 */	N, "SIGWAITING: reserved signal no longer used by",
-	/* 33 */	N, "SIGLWP: reserved signal no longer used by",
-	/* 34 */	N, "SIGFREEZE: special signal used by CPR",
-	/* 35 */	N, "SIGTHAW: special signal used by CPR",
-	/* 36 */	0, "SIGCANCEL: reserved signal for thread cancellation",
-	/* 37 */	N, "SIGLOST: resource lost (eg, record-lock lost)",
-	/* 38 */	N, "SIGXRES: resource control exceeded",
-	/* 39 */	N, "SIGJVM1: reserved signal for Java Virtual Machine",
-	/* 40 */	N, "SIGJVM2: reserved signal for Java Virtual Machine",
-
-	/* TODO(aram): what should be do about these signals? D or N? is this set static? */
-	/* 41 */	N, "real time signal",
-	/* 42 */	N, "real time signal",
-	/* 43 */	N, "real time signal",
-	/* 44 */	N, "real time signal",
-	/* 45 */	N, "real time signal",
-	/* 46 */	N, "real time signal",
-	/* 47 */	N, "real time signal",
-	/* 48 */	N, "real time signal",
-	/* 49 */	N, "real time signal",
-	/* 50 */	N, "real time signal",
-	/* 51 */	N, "real time signal",
-	/* 52 */	N, "real time signal",
-	/* 53 */	N, "real time signal",
-	/* 54 */	N, "real time signal",
-	/* 55 */	N, "real time signal",
-	/* 56 */	N, "real time signal",
-	/* 57 */	N, "real time signal",
-	/* 58 */	N, "real time signal",
-	/* 59 */	N, "real time signal",
-	/* 60 */	N, "real time signal",
-	/* 61 */	N, "real time signal",
-	/* 62 */	N, "real time signal",
-	/* 63 */	N, "real time signal",
-	/* 64 */	N, "real time signal",
-	/* 65 */	N, "real time signal",
-	/* 66 */	N, "real time signal",
-	/* 67 */	N, "real time signal",
-	/* 68 */	N, "real time signal",
-	/* 69 */	N, "real time signal",
-	/* 70 */	N, "real time signal",
-	/* 71 */	N, "real time signal",
-	/* 72 */	N, "real time signal",
-};
-
-#undef N
-#undef K
-#undef T
-#undef P
-#undef D
diff --git a/src/runtime/sigpanic_unix.go b/src/runtime/sigpanic_unix.go
index 6807985..7bf2c15 100644
--- a/src/runtime/sigpanic_unix.go
+++ b/src/runtime/sigpanic_unix.go
@@ -6,8 +6,6 @@
 
 package runtime
 
-func signame(int32) *byte
-
 func sigpanic() {
 	g := getg()
 	if !canpanic(g) {
@@ -36,5 +34,10 @@
 		}
 		panicfloat()
 	}
-	panic(errorString(gostringnocopy(signame(g.sig))))
+
+	if g.sig >= uint32(len(sigtable)) {
+		// can't happen: we looked up g.sig in sigtable to decide to call sigpanic
+		gothrow("unexpected signal value")
+	}
+	panic(errorString(sigtable[g.sig].name))
 }
diff --git a/src/runtime/sigqueue.go b/src/runtime/sigqueue.go
index 2d9c24d..82ead22 100644
--- a/src/runtime/sigqueue.go
+++ b/src/runtime/sigqueue.go
@@ -45,7 +45,7 @@
 
 // Called from sighandler to send a signal back out of the signal handling thread.
 // Reports whether the signal was sent. If not, the caller typically crashes the program.
-func sigsend(s int32) bool {
+func sigsend(s uint32) bool {
 	bit := uint32(1) << uint(s&31)
 	if !sig.inuse || s < 0 || int(s) >= 32*len(sig.wanted) || sig.wanted[s/32]&bit == 0 {
 		return false
@@ -139,7 +139,7 @@
 		return
 	}
 	sig.wanted[s/32] |= 1 << (s & 31)
-	sigenable_go(s)
+	sigenable(s)
 }
 
 // Must only be called from a single goroutine at a time.
@@ -148,7 +148,7 @@
 		return
 	}
 	sig.wanted[s/32] &^= 1 << (s & 31)
-	sigdisable_go(s)
+	sigdisable(s)
 }
 
 // This runs on a foreign stack, without an m or a g.  No stack split.
@@ -156,18 +156,3 @@
 func badsignal(sig uintptr) {
 	cgocallback(unsafe.Pointer(funcPC(sigsend)), noescape(unsafe.Pointer(&sig)), unsafe.Sizeof(sig))
 }
-
-func sigenable_m()
-func sigdisable_m()
-
-func sigenable_go(s uint32) {
-	g := getg()
-	g.m.scalararg[0] = uintptr(s)
-	onM(sigenable_m)
-}
-
-func sigdisable_go(s uint32) {
-	g := getg()
-	g.m.scalararg[0] = uintptr(s)
-	onM(sigdisable_m)
-}
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index 171087d..93cea5c 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -22,11 +22,11 @@
 	// but since the cap is only being supplied implicitly, saying len is clearer.
 	// See issue 4085.
 	len := int(len64)
-	if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > maxmem/uintptr(t.elem.size) {
+	if len64 < 0 || int64(len) != len64 || t.elem.size > 0 && uintptr(len) > _MaxMem/uintptr(t.elem.size) {
 		panic(errorString("makeslice: len out of range"))
 	}
 	cap := int(cap64)
-	if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > maxmem/uintptr(t.elem.size) {
+	if cap < len || int64(cap) != cap64 || t.elem.size > 0 && uintptr(cap) > _MaxMem/uintptr(t.elem.size) {
 		panic(errorString("makeslice: cap out of range"))
 	}
 	p := newarray(t.elem, uintptr(cap))
@@ -42,7 +42,7 @@
 	cap64 := int64(old.cap) + n
 	cap := int(cap64)
 
-	if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > maxmem/uintptr(t.elem.size) {
+	if int64(cap) != cap64 || cap < old.cap || t.elem.size > 0 && uintptr(cap) > _MaxMem/uintptr(t.elem.size) {
 		panic(errorString("growslice: cap out of range"))
 	}
 
@@ -72,7 +72,7 @@
 		}
 	}
 
-	if uintptr(newcap) >= maxmem/uintptr(et.size) {
+	if uintptr(newcap) >= _MaxMem/uintptr(et.size) {
 		panic(errorString("growslice: cap out of range"))
 	}
 	lenmem := uintptr(old.len) * uintptr(et.size)
diff --git a/src/runtime/softfloat64.go b/src/runtime/softfloat64.go
index 4fcf8f2..c157a14 100644
--- a/src/runtime/softfloat64.go
+++ b/src/runtime/softfloat64.go
@@ -340,7 +340,7 @@
 	return fpack64(fs64, uint64(fm)<<d, fe, 0)
 }
 
-func fcmp64(f, g uint64) (cmp int, isnan bool) {
+func fcmp64(f, g uint64) (cmp int32, isnan bool) {
 	fs, fm, _, fi, fn := funpack64(f)
 	gs, gm, _, gi, gn := funpack64(g)
 
@@ -486,13 +486,13 @@
 
 // callable from C
 
-func fadd64c(f, g uint64, ret *uint64)            { *ret = fadd64(f, g) }
-func fsub64c(f, g uint64, ret *uint64)            { *ret = fsub64(f, g) }
-func fmul64c(f, g uint64, ret *uint64)            { *ret = fmul64(f, g) }
-func fdiv64c(f, g uint64, ret *uint64)            { *ret = fdiv64(f, g) }
-func fneg64c(f uint64, ret *uint64)               { *ret = fneg64(f) }
-func f32to64c(f uint32, ret *uint64)              { *ret = f32to64(f) }
-func f64to32c(f uint64, ret *uint32)              { *ret = f64to32(f) }
-func fcmp64c(f, g uint64, ret *int, retnan *bool) { *ret, *retnan = fcmp64(f, g) }
-func fintto64c(val int64, ret *uint64)            { *ret = fintto64(val) }
-func f64tointc(f uint64, ret *int64, retok *bool) { *ret, *retok = f64toint(f) }
+func fadd64c(f, g uint64, ret *uint64)              { *ret = fadd64(f, g) }
+func fsub64c(f, g uint64, ret *uint64)              { *ret = fsub64(f, g) }
+func fmul64c(f, g uint64, ret *uint64)              { *ret = fmul64(f, g) }
+func fdiv64c(f, g uint64, ret *uint64)              { *ret = fdiv64(f, g) }
+func fneg64c(f uint64, ret *uint64)                 { *ret = fneg64(f) }
+func f32to64c(f uint32, ret *uint64)                { *ret = f32to64(f) }
+func f64to32c(f uint64, ret *uint32)                { *ret = f64to32(f) }
+func fcmp64c(f, g uint64, ret *int32, retnan *bool) { *ret, *retnan = fcmp64(f, g) }
+func fintto64c(val int64, ret *uint64)              { *ret = fintto64(val) }
+func f64tointc(f uint64, ret *int64, retok *bool)   { *ret, *retok = f64toint(f) }
diff --git a/src/runtime/softfloat64_test.go b/src/runtime/softfloat64_test.go
index df63010..e108872 100644
--- a/src/runtime/softfloat64_test.go
+++ b/src/runtime/softfloat64_test.go
@@ -182,7 +182,7 @@
 func testcmp(t *testing.T, f, g float64) {
 	hcmp, hisnan := hwcmp(f, g)
 	scmp, sisnan := Fcmp64(math.Float64bits(f), math.Float64bits(g))
-	if hcmp != scmp || hisnan != sisnan {
+	if int32(hcmp) != scmp || hisnan != sisnan {
 		err(t, "cmp(%g, %g) = sw %v, %v, hw %v, %v\n", f, g, scmp, sisnan, hcmp, hisnan)
 	}
 }
diff --git a/src/runtime/softfloat_arm.c b/src/runtime/softfloat_arm.c
deleted file mode 100644
index 3f3f33a..0000000
--- a/src/runtime/softfloat_arm.c
+++ /dev/null
@@ -1,687 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Software floating point interpretaton of ARM 7500 FP instructions.
-// The interpretation is not bit compatible with the 7500.
-// It uses true little-endian doubles, while the 7500 used mixed-endian.
-
-#include "runtime.h"
-#include "textflag.h"
-
-#define CPSR 14
-#define FLAGS_N (1U << 31)
-#define FLAGS_Z (1U << 30)
-#define FLAGS_C (1U << 29)
-#define FLAGS_V (1U << 28)
-
-void	runtime·abort(void);
-void	runtime·sqrtC(uint64, uint64*);
-
-static	uint32	trace = 0;
-
-static void
-fabort(void)
-{
-	if (1) {
-		runtime·printf("Unsupported floating point instruction\n");
-		runtime·abort();
-	}
-}
-
-static void
-putf(uint32 reg, uint32 val)
-{
-	g->m->freglo[reg] = val;
-}
-
-static void
-putd(uint32 reg, uint64 val)
-{
-	g->m->freglo[reg] = (uint32)val;
-	g->m->freghi[reg] = (uint32)(val>>32);
-}
-
-static uint64
-getd(uint32 reg)
-{
-	return (uint64)g->m->freglo[reg] | ((uint64)g->m->freghi[reg]<<32);
-}
-
-static void
-fprint(void)
-{
-	uint32 i;
-	for (i = 0; i < 16; i++) {
-		runtime·printf("\tf%d:\t%X %X\n", i, g->m->freghi[i], g->m->freglo[i]);
-	}
-}
-
-static uint32
-d2f(uint64 d)
-{
-	uint32 x;
-
-	runtime·f64to32c(d, &x);
-	return x;
-}
-
-static uint64
-f2d(uint32 f)
-{
-	uint64 x;
-
-	runtime·f32to64c(f, &x);
-	return x;
-}
-
-static uint32
-fstatus(bool nan, int32 cmp)
-{
-	if(nan)
-		return FLAGS_C | FLAGS_V;
-	if(cmp == 0)
-		return FLAGS_Z | FLAGS_C;
-	if(cmp < 0)
-		return FLAGS_N;
-	return FLAGS_C;
-}
-
-// conditions array record the required CPSR cond field for the
-// first 5 pairs of conditional execution opcodes
-// higher 4 bits are must set, lower 4 bits are must clear
-#pragma dataflag NOPTR
-static const uint8 conditions[10/2] = {
-	[0/2] = (FLAGS_Z >> 24) | 0, // 0: EQ (Z set), 1: NE (Z clear)
-	[2/2] = (FLAGS_C >> 24) | 0, // 2: CS/HS (C set), 3: CC/LO (C clear)
-	[4/2] = (FLAGS_N >> 24) | 0, // 4: MI (N set), 5: PL (N clear)
-	[6/2] = (FLAGS_V >> 24) | 0, // 6: VS (V set), 7: VC (V clear)
-	[8/2] = (FLAGS_C >> 24) | 
-	        (FLAGS_Z >> 28),     // 8: HI (C set and Z clear), 9: LS (C clear and Z set)
-};
-
-#define FAULT (0x80000000U) // impossible PC offset
-
-// returns number of words that the fp instruction
-// is occupying, 0 if next instruction isn't float.
-static uint32
-stepflt(uint32 *pc, uint32 *regs)
-{
-	uint32 i, opc, regd, regm, regn, cpsr;
-	int32 delta;
-	uint32 *addr;
-	uint64 uval;
-	int64 sval;
-	bool nan, ok;
-	int32 cmp;
-	M *m;
-
-	// m is locked in vlop_arm.s, so g->m cannot change during this function call,
-	// so caching it in a local variable is safe.
-	m = g->m;
-	i = *pc;
-
-	if(trace)
-		runtime·printf("stepflt %p %x (cpsr %x)\n", pc, i, regs[CPSR] >> 28);
-
-	opc = i >> 28;
-	if(opc == 14) // common case first
-		goto execute;
-	cpsr = regs[CPSR] >> 28;
-	switch(opc) {
-	case 0: case 1: case 2: case 3: case 4: 
-	case 5: case 6: case 7: case 8: case 9:
-		if(((cpsr & (conditions[opc/2] >> 4)) == (conditions[opc/2] >> 4)) &&
-		   ((cpsr & (conditions[opc/2] & 0xf)) == 0)) {
-			if(opc & 1) return 1;
-		} else {
-			if(!(opc & 1)) return 1;
-		}
-		break;
-	case 10: // GE (N == V)
-	case 11: // LT (N != V)
-		if((cpsr & (FLAGS_N >> 28)) == (cpsr & (FLAGS_V >> 28))) {
-			if(opc & 1) return 1;
-		} else {
-			if(!(opc & 1)) return 1;
-		}
-		break;
-	case 12: // GT (N == V and Z == 0)
-	case 13: // LE (N != V or Z == 1)
-		if((cpsr & (FLAGS_N >> 28)) == (cpsr & (FLAGS_V >> 28)) &&
-		   (cpsr & (FLAGS_Z >> 28)) == 0) {
-			if(opc & 1) return 1;
-		} else {
-			if(!(opc & 1)) return 1;
-		}
-		break;
-	case 14: // AL
-		break;
-	case 15: // shouldn't happen
-		return 0;
-	}
-	if(trace)
-		runtime·printf("conditional %x (cpsr %x) pass\n", opc, cpsr);
-	i = (0xeU << 28) | (i & 0xfffffff);
-
-execute:
-	// special cases
-	if((i&0xfffff000) == 0xe59fb000) {
-		// load r11 from pc-relative address.
-		// might be part of a floating point move
-		// (or might not, but no harm in simulating
-		// one instruction too many).
-		addr = (uint32*)((uint8*)pc + (i&0xfff) + 8);
-		regs[11] = addr[0];
-
-		if(trace)
-			runtime·printf("*** cpu R[%d] = *(%p) %x\n",
-				11, addr, regs[11]);
-		return 1;
-	}
-	if(i == 0xe08bb00d) {
-		// add sp to r11.
-		// might be part of a large stack offset address
-		// (or might not, but again no harm done).
-		regs[11] += regs[13];
-
-		if(trace)
-			runtime·printf("*** cpu R[%d] += R[%d] %x\n",
-				11, 13, regs[11]);
-		return 1;
-	}
-	if(i == 0xeef1fa10) {
-		regs[CPSR] = (regs[CPSR]&0x0fffffff) | m->fflag;
-
-		if(trace)
-			runtime·printf("*** fpsr R[CPSR] = F[CPSR] %x\n", regs[CPSR]);
-		return 1;
-	}
-	if((i&0xff000000) == 0xea000000) {
-		// unconditional branch
-		// can happen in the middle of floating point
-		// if the linker decides it is time to lay down
-		// a sequence of instruction stream constants.
-		delta = i&0xffffff;
-		delta = (delta<<8) >> 8;	// sign extend
-
-		if(trace)
-			runtime·printf("*** cpu PC += %x\n", (delta+2)*4);
-		return delta+2;
-	}
-
-	goto stage1;
-
-stage1:	// load/store regn is cpureg, regm is 8bit offset
-	regd = i>>12 & 0xf;
-	regn = i>>16 & 0xf;
-	regm = (i & 0xff) << 2;	// PLUS or MINUS ??
-
-	switch(i & 0xfff00f00) {
-	default:
-		goto stage2;
-
-	case 0xed900a00:	// single load
-		addr = (uint32*)(regs[regn] + regm);
-		if((uintptr)addr < 4096) {
-			if(trace)
-				runtime·printf("*** load @%p => fault\n", addr);
-			return FAULT;
-		}
-		m->freglo[regd] = addr[0];
-
-		if(trace)
-			runtime·printf("*** load F[%d] = %x\n",
-				regd, m->freglo[regd]);
-		break;
-
-	case 0xed900b00:	// double load
-		addr = (uint32*)(regs[regn] + regm);
-		if((uintptr)addr < 4096) {
-			if(trace)
-				runtime·printf("*** double load @%p => fault\n", addr);
-			return FAULT;
-		}
-		m->freglo[regd] = addr[0];
-		m->freghi[regd] = addr[1];
-
-		if(trace)
-			runtime·printf("*** load D[%d] = %x-%x\n",
-				regd, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xed800a00:	// single store
-		addr = (uint32*)(regs[regn] + regm);
-		if((uintptr)addr < 4096) {
-			if(trace)
-				runtime·printf("*** store @%p => fault\n", addr);
-			return FAULT;
-		}
-		addr[0] = m->freglo[regd];
-
-		if(trace)
-			runtime·printf("*** *(%p) = %x\n",
-				addr, addr[0]);
-		break;
-
-	case 0xed800b00:	// double store
-		addr = (uint32*)(regs[regn] + regm);
-		if((uintptr)addr < 4096) {
-			if(trace)
-				runtime·printf("*** double store @%p => fault\n", addr);
-			return FAULT;
-		}
-		addr[0] = m->freglo[regd];
-		addr[1] = m->freghi[regd];
-
-		if(trace)
-			runtime·printf("*** *(%p) = %x-%x\n",
-				addr, addr[1], addr[0]);
-		break;
-	}
-	return 1;
-
-stage2:	// regd, regm, regn are 4bit variables
-	regm = i>>0 & 0xf;
-	switch(i & 0xfff00ff0) {
-	default:
-		goto stage3;
-
-	case 0xf3000110:	// veor
-		m->freglo[regd] = m->freglo[regm]^m->freglo[regn];
-		m->freghi[regd] = m->freghi[regm]^m->freghi[regn];
-
-		if(trace)
-			runtime·printf("*** veor D[%d] = %x-%x\n",
-				regd, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb00b00:	// D[regd] = const(regn,regm)
-		regn = (regn<<4) | regm;
-		regm = 0x40000000UL;
-		if(regn & 0x80)
-			regm |= 0x80000000UL;
-		if(regn & 0x40)
-			regm ^= 0x7fc00000UL;
-		regm |= (regn & 0x3f) << 16;
-		m->freglo[regd] = 0;
-		m->freghi[regd] = regm;
-
-		if(trace)
-			runtime·printf("*** immed D[%d] = %x-%x\n",
-				regd, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb00a00:	// F[regd] = const(regn,regm)
-		regn = (regn<<4) | regm;
-		regm = 0x40000000UL;
-		if(regn & 0x80)
-			regm |= 0x80000000UL;
-		if(regn & 0x40)
-			regm ^= 0x7e000000UL;
-		regm |= (regn & 0x3f) << 19;
-		m->freglo[regd] = regm;
-
-		if(trace)
-			runtime·printf("*** immed D[%d] = %x\n",
-				regd, m->freglo[regd]);
-		break;
-
-	case 0xee300b00:	// D[regd] = D[regn]+D[regm]
-		runtime·fadd64c(getd(regn), getd(regm), &uval);
-		putd(regd, uval);
-
-		if(trace)
-			runtime·printf("*** add D[%d] = D[%d]+D[%d] %x-%x\n",
-				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xee300a00:	// F[regd] = F[regn]+F[regm]
-		runtime·fadd64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
-		m->freglo[regd] = d2f(uval);
-
-		if(trace)
-			runtime·printf("*** add F[%d] = F[%d]+F[%d] %x\n",
-				regd, regn, regm, m->freglo[regd]);
-		break;
-
-	case 0xee300b40:	// D[regd] = D[regn]-D[regm]
-		runtime·fsub64c(getd(regn), getd(regm), &uval);
-		putd(regd, uval);
-
-		if(trace)
-			runtime·printf("*** sub D[%d] = D[%d]-D[%d] %x-%x\n",
-				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xee300a40:	// F[regd] = F[regn]-F[regm]
-		runtime·fsub64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
-		m->freglo[regd] = d2f(uval);
-
-		if(trace)
-			runtime·printf("*** sub F[%d] = F[%d]-F[%d] %x\n",
-				regd, regn, regm, m->freglo[regd]);
-		break;
-
-	case 0xee200b00:	// D[regd] = D[regn]*D[regm]
-		runtime·fmul64c(getd(regn), getd(regm), &uval);
-		putd(regd, uval);
-
-		if(trace)
-			runtime·printf("*** mul D[%d] = D[%d]*D[%d] %x-%x\n",
-				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xee200a00:	// F[regd] = F[regn]*F[regm]
-		runtime·fmul64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
-		m->freglo[regd] = d2f(uval);
-
-		if(trace)
-			runtime·printf("*** mul F[%d] = F[%d]*F[%d] %x\n",
-				regd, regn, regm, m->freglo[regd]);
-		break;
-
-	case 0xee800b00:	// D[regd] = D[regn]/D[regm]
-		runtime·fdiv64c(getd(regn), getd(regm), &uval);
-		putd(regd, uval);
-
-		if(trace)
-			runtime·printf("*** div D[%d] = D[%d]/D[%d] %x-%x\n",
-				regd, regn, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xee800a00:	// F[regd] = F[regn]/F[regm]
-		runtime·fdiv64c(f2d(m->freglo[regn]), f2d(m->freglo[regm]), &uval);
-		m->freglo[regd] = d2f(uval);
-
-		if(trace)
-			runtime·printf("*** div F[%d] = F[%d]/F[%d] %x\n",
-				regd, regn, regm, m->freglo[regd]);
-		break;
-
-	case 0xee000b10:	// S[regn] = R[regd] (MOVW) (regm ignored)
-		m->freglo[regn] = regs[regd];
-
-		if(trace)
-			runtime·printf("*** cpy S[%d] = R[%d] %x\n",
-				regn, regd, m->freglo[regn]);
-		break;
-
-	case 0xee100b10:	// R[regd] = S[regn] (MOVW) (regm ignored)
-		regs[regd] = m->freglo[regn];
-
-		if(trace)
-			runtime·printf("*** cpy R[%d] = S[%d] %x\n",
-				regd, regn, regs[regd]);
-		break;
-	}
-	return 1;
-
-stage3:	// regd, regm are 4bit variables
-	switch(i & 0xffff0ff0) {
-	default:
-		goto done;
-
-	case 0xeeb00a40:	// F[regd] = F[regm] (MOVF)
-		m->freglo[regd] = m->freglo[regm];
-
-		if(trace)
-			runtime·printf("*** F[%d] = F[%d] %x\n",
-				regd, regm, m->freglo[regd]);
-		break;
-
-	case 0xeeb00b40:	// D[regd] = D[regm] (MOVD)
-		m->freglo[regd] = m->freglo[regm];
-		m->freghi[regd] = m->freghi[regm];
-
-		if(trace)
-			runtime·printf("*** D[%d] = D[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb10bc0:	// D[regd] = sqrt D[regm]
-		runtime·sqrtC(getd(regm), &uval);
-		putd(regd, uval);
-
-		if(trace)
-			runtime·printf("*** D[%d] = sqrt D[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb00bc0:	// D[regd] = abs D[regm]
-		m->freglo[regd] = m->freglo[regm];
-		m->freghi[regd] = m->freghi[regm] & ((1<<31)-1);
-
-		if(trace)
-			runtime·printf("*** D[%d] = abs D[%d] %x-%x\n",
-					regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb00ac0:	// F[regd] = abs F[regm]
-		m->freglo[regd] = m->freglo[regm] & ((1<<31)-1);
-
-		if(trace)
-			runtime·printf("*** F[%d] = abs F[%d] %x\n",
-					regd, regm, m->freglo[regd]);
-		break;
-
-	case 0xeeb40bc0:	// D[regd] :: D[regm] (CMPD)
-		runtime·fcmp64c(getd(regd), getd(regm), &cmp, &nan);
-		m->fflag = fstatus(nan, cmp);
-
-		if(trace)
-			runtime·printf("*** cmp D[%d]::D[%d] %x\n",
-				regd, regm, m->fflag);
-		break;
-
-	case 0xeeb40ac0:	// F[regd] :: F[regm] (CMPF)
-		runtime·fcmp64c(f2d(m->freglo[regd]), f2d(m->freglo[regm]), &cmp, &nan);
-		m->fflag = fstatus(nan, cmp);
-
-		if(trace)
-			runtime·printf("*** cmp F[%d]::F[%d] %x\n",
-				regd, regm, m->fflag);
-		break;
-
-	case 0xeeb70ac0:	// D[regd] = F[regm] (MOVFD)
-		putd(regd, f2d(m->freglo[regm]));
-
-		if(trace)
-			runtime·printf("*** f2d D[%d]=F[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb70bc0:	// F[regd] = D[regm] (MOVDF)
-		m->freglo[regd] = d2f(getd(regm));
-
-		if(trace)
-			runtime·printf("*** d2f F[%d]=D[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeebd0ac0:	// S[regd] = F[regm] (MOVFW)
-		runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
-		if(!ok || (int32)sval != sval)
-			sval = 0;
-		m->freglo[regd] = sval;
-
-		if(trace)
-			runtime·printf("*** fix S[%d]=F[%d] %x\n",
-				regd, regm, m->freglo[regd]);
-		break;
-
-	case 0xeebc0ac0:	// S[regd] = F[regm] (MOVFW.U)
-		runtime·f64tointc(f2d(m->freglo[regm]), &sval, &ok);
-		if(!ok || (uint32)sval != sval)
-			sval = 0;
-		m->freglo[regd] = sval;
-
-		if(trace)
-			runtime·printf("*** fix unsigned S[%d]=F[%d] %x\n",
-				regd, regm, m->freglo[regd]);
-		break;
-
-	case 0xeebd0bc0:	// S[regd] = D[regm] (MOVDW)
-		runtime·f64tointc(getd(regm), &sval, &ok);
-		if(!ok || (int32)sval != sval)
-			sval = 0;
-		m->freglo[regd] = sval;
-
-		if(trace)
-			runtime·printf("*** fix S[%d]=D[%d] %x\n",
-				regd, regm, m->freglo[regd]);
-		break;
-
-	case 0xeebc0bc0:	// S[regd] = D[regm] (MOVDW.U)
-		runtime·f64tointc(getd(regm), &sval, &ok);
-		if(!ok || (uint32)sval != sval)
-			sval = 0;
-		m->freglo[regd] = sval;
-
-		if(trace)
-			runtime·printf("*** fix unsigned S[%d]=D[%d] %x\n",
-				regd, regm, m->freglo[regd]);
-		break;
-
-	case 0xeeb80ac0:	// D[regd] = S[regm] (MOVWF)
-		cmp = m->freglo[regm];
-		if(cmp < 0) {
-			runtime·fintto64c(-cmp, &uval);
-			putf(regd, d2f(uval));
-			m->freglo[regd] ^= 0x80000000;
-		} else {
-			runtime·fintto64c(cmp, &uval);
-			putf(regd, d2f(uval));
-		}
-
-		if(trace)
-			runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb80a40:	// D[regd] = S[regm] (MOVWF.U)
-		runtime·fintto64c(m->freglo[regm], &uval);
-		putf(regd, d2f(uval));
-
-		if(trace)
-			runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb80bc0:	// D[regd] = S[regm] (MOVWD)
-		cmp = m->freglo[regm];
-		if(cmp < 0) {
-			runtime·fintto64c(-cmp, &uval);
-			putd(regd, uval);
-			m->freghi[regd] ^= 0x80000000;
-		} else {
-			runtime·fintto64c(cmp, &uval);
-			putd(regd, uval);
-		}
-
-		if(trace)
-			runtime·printf("*** float D[%d]=S[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-
-	case 0xeeb80b40:	// D[regd] = S[regm] (MOVWD.U)
-		runtime·fintto64c(m->freglo[regm], &uval);
-		putd(regd, uval);
-
-		if(trace)
-			runtime·printf("*** float unsigned D[%d]=S[%d] %x-%x\n",
-				regd, regm, m->freghi[regd], m->freglo[regd]);
-		break;
-	}
-	return 1;
-
-done:
-	if((i&0xff000000) == 0xee000000 ||
-	   (i&0xff000000) == 0xed000000) {
-		runtime·printf("stepflt %p %x\n", pc, i);
-		fabort();
-	}
-	return 0;
-}
-
-typedef struct Sfregs Sfregs;
-
-// NOTE: These are all recorded as pointers because they are possibly live registers,
-// and we don't know what they contain. Recording them as pointers should be
-// safer than not.
-struct Sfregs
-{
-	uint32 *r0;
-	uint32 *r1;
-	uint32 *r2;
-	uint32 *r3;
-	uint32 *r4;
-	uint32 *r5;
-	uint32 *r6;
-	uint32 *r7;
-	uint32 *r8;
-	uint32 *r9;
-	uint32 *r10;
-	uint32 *r11;
-	uint32 *r12;
-	uint32 *r13;
-	uint32 cspr;
-};
-
-static void sfloat2(void);
-void _sfloatpanic(void);
-
-#pragma textflag NOSPLIT
-uint32*
-runtime·_sfloat2(uint32 *pc, Sfregs regs)
-{
-	void (*fn)(void);
-	
-	g->m->ptrarg[0] = pc;
-	g->m->ptrarg[1] = &regs;
-	fn = sfloat2;
-	runtime·onM(&fn);
-	pc = g->m->ptrarg[0];
-	g->m->ptrarg[0] = nil;
-	return pc;
-}
-
-static void
-sfloat2(void)
-{
-	uint32 *pc;
-	G *curg;
-	Sfregs *regs;
-	int32 skip;
-	bool first;
-	
-	pc = g->m->ptrarg[0];
-	regs = g->m->ptrarg[1];
-	g->m->ptrarg[0] = nil;
-	g->m->ptrarg[1] = nil;
-
-	first = true;
-	while(skip = stepflt(pc, (uint32*)&regs->r0)) {
-		first = false;
-		if(skip == FAULT) {
-			// Encountered bad address in store/load.
-			// Record signal information and return to assembly
-			// trampoline that fakes the call.
-			enum { SIGSEGV = 11 };
-			curg = g->m->curg;
-			curg->sig = SIGSEGV;
-			curg->sigcode0 = 0;
-			curg->sigcode1 = 0;
-			curg->sigpc = (uint32)pc;
-			pc = (uint32*)_sfloatpanic;
-			break;
-		}
-		pc += skip;
-	}
-	if(first) {
-		runtime·printf("sfloat2 %p %x\n", pc, *pc);
-		fabort(); // not ok to fail first instruction
-	}
-		
-	g->m->ptrarg[0] = pc;
-}
diff --git a/src/runtime/softfloat_arm.go b/src/runtime/softfloat_arm.go
new file mode 100644
index 0000000..746b9ea
--- /dev/null
+++ b/src/runtime/softfloat_arm.go
@@ -0,0 +1,644 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Software floating point interpretaton of ARM 7500 FP instructions.
+// The interpretation is not bit compatible with the 7500.
+// It uses true little-endian doubles, while the 7500 used mixed-endian.
+
+package runtime
+
+import "unsafe"
+
+const (
+	_CPSR    = 14
+	_FLAGS_N = 1 << 31
+	_FLAGS_Z = 1 << 30
+	_FLAGS_C = 1 << 29
+	_FLAGS_V = 1 << 28
+)
+
+var fptrace = 0
+
+func fabort() {
+	gothrow("unsupported floating point instruction")
+}
+
+func fputf(reg uint32, val uint32) {
+	_g_ := getg()
+	_g_.m.freglo[reg] = val
+}
+
+func fputd(reg uint32, val uint64) {
+	_g_ := getg()
+	_g_.m.freglo[reg] = uint32(val)
+	_g_.m.freghi[reg] = uint32(val >> 32)
+}
+
+func fgetd(reg uint32) uint64 {
+	_g_ := getg()
+	return uint64(_g_.m.freglo[reg]) | uint64(_g_.m.freghi[reg])<<32
+}
+
+func fprintregs() {
+	_g_ := getg()
+	for i := range _g_.m.freglo {
+		print("\tf", i, ":\t", hex(_g_.m.freghi[i]), " ", hex(_g_.m.freglo[i]), "\n")
+	}
+}
+
+func fstatus(nan bool, cmp int32) uint32 {
+	if nan {
+		return _FLAGS_C | _FLAGS_V
+	}
+	if cmp == 0 {
+		return _FLAGS_Z | _FLAGS_C
+	}
+	if cmp < 0 {
+		return _FLAGS_N
+	}
+	return _FLAGS_C
+}
+
+// conditions array record the required CPSR cond field for the
+// first 5 pairs of conditional execution opcodes
+// higher 4 bits are must set, lower 4 bits are must clear
+var conditions = [10 / 2]uint32{
+	0 / 2: _FLAGS_Z>>24 | 0, // 0: EQ (Z set), 1: NE (Z clear)
+	2 / 2: _FLAGS_C>>24 | 0, // 2: CS/HS (C set), 3: CC/LO (C clear)
+	4 / 2: _FLAGS_N>>24 | 0, // 4: MI (N set), 5: PL (N clear)
+	6 / 2: _FLAGS_V>>24 | 0, // 6: VS (V set), 7: VC (V clear)
+	8 / 2: _FLAGS_C>>24 |
+		_FLAGS_Z>>28,
+}
+
+const _FAULT = 0x80000000 // impossible PC offset
+
+// returns number of words that the fp instruction
+// is occupying, 0 if next instruction isn't float.
+func stepflt(pc *uint32, regs *[15]uint32) uint32 {
+	var (
+		i, opc, regd, regm, regn, cpsr uint32
+		cmp, delta                     int32
+		uval                           uint64
+		sval                           int64
+		nan, ok                        bool
+	)
+
+	// m is locked in vlop_arm.s, so g.m cannot change during this function call,
+	// so caching it in a local variable is safe.
+	m := getg().m
+	i = *pc
+
+	if fptrace > 0 {
+		print("stepflt ", pc, " ", hex(i), " (cpsr ", hex(regs[_CPSR]>>28), ")\n")
+	}
+
+	opc = i >> 28
+	if opc == 14 { // common case first
+		goto execute
+	}
+
+	cpsr = regs[_CPSR] >> 28
+	switch opc {
+	case 0, 1, 2, 3, 4, 5, 6, 7, 8, 9:
+		if cpsr&(conditions[opc/2]>>4) == conditions[opc/2]>>4 &&
+			cpsr&(conditions[opc/2]&0xf) == 0 {
+			if opc&1 != 0 {
+				return 1
+			}
+		} else {
+			if opc&1 == 0 {
+				return 1
+			}
+		}
+
+	case 10, 11: // GE (N == V), LT (N != V)
+		if cpsr&(_FLAGS_N>>28) == cpsr&(_FLAGS_V>>28) {
+			if opc&1 != 0 {
+				return 1
+			}
+		} else {
+			if opc&1 == 0 {
+				return 1
+			}
+		}
+
+	case 12, 13: // GT (N == V and Z == 0), LE (N != V or Z == 1)
+		if cpsr&(_FLAGS_N>>28) == cpsr&(_FLAGS_V>>28) &&
+			cpsr&(_FLAGS_Z>>28) == 0 {
+			if opc&1 != 0 {
+				return 1
+			}
+		} else {
+			if opc&1 == 0 {
+				return 1
+			}
+		}
+
+	case 14: // AL
+		// ok
+
+	case 15: // shouldn't happen
+		return 0
+	}
+
+	if fptrace > 0 {
+		print("conditional ", hex(opc), " (cpsr ", hex(cpsr), ") pass\n")
+	}
+	i = 0xe<<28 | i&(1<<28-1)
+
+execute:
+	// special cases
+	if i&0xfffff000 == 0xe59fb000 {
+		// load r11 from pc-relative address.
+		// might be part of a floating point move
+		// (or might not, but no harm in simulating
+		// one instruction too many).
+		addr := (*[1]uint32)(add(unsafe.Pointer(pc), uintptr(i&0xfff+8)))
+		regs[11] = addr[0]
+
+		if fptrace > 0 {
+			print("*** cpu R[11] = *(", addr, ") ", hex(regs[11]), "\n")
+		}
+		return 1
+	}
+	if i == 0xe08bb00d {
+		// add sp to r11.
+		// might be part of a large stack offset address
+		// (or might not, but again no harm done).
+		regs[11] += regs[13]
+
+		if fptrace > 0 {
+			print("*** cpu R[11] += R[13] ", hex(regs[11]), "\n")
+		}
+		return 1
+	}
+	if i == 0xeef1fa10 {
+		regs[_CPSR] = regs[_CPSR]&0x0fffffff | m.fflag
+
+		if fptrace > 0 {
+			print("*** fpsr R[CPSR] = F[CPSR] ", hex(regs[_CPSR]), "\n")
+		}
+		return 1
+	}
+	if i&0xff000000 == 0xea000000 {
+		// unconditional branch
+		// can happen in the middle of floating point
+		// if the linker decides it is time to lay down
+		// a sequence of instruction stream constants.
+		delta = int32(i&0xffffff) << 8 >> 8 // sign extend
+
+		if fptrace > 0 {
+			print("*** cpu PC += ", hex((delta+2)*4), "\n")
+		}
+		return uint32(delta + 2)
+	}
+
+	goto stage1
+
+stage1: // load/store regn is cpureg, regm is 8bit offset
+	regd = i >> 12 & 0xf
+	regn = i >> 16 & 0xf
+	regm = i & 0xff << 2 // PLUS or MINUS ??
+
+	switch i & 0xfff00f00 {
+	default:
+		goto stage2
+
+	case 0xed900a00: // single load
+		uaddr := uintptr(regs[regn] + regm)
+		if uaddr < 4096 {
+			if fptrace > 0 {
+				print("*** load @", hex(uaddr), " => fault\n")
+			}
+			return _FAULT
+		}
+		addr := (*[1]uint32)(unsafe.Pointer(uaddr))
+		m.freglo[regd] = addr[0]
+
+		if fptrace > 0 {
+			print("*** load F[", regd, "] = ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xed900b00: // double load
+		uaddr := uintptr(regs[regn] + regm)
+		if uaddr < 4096 {
+			if fptrace > 0 {
+				print("*** double load @", hex(uaddr), " => fault\n")
+			}
+			return _FAULT
+		}
+		addr := (*[2]uint32)(unsafe.Pointer(uaddr))
+		m.freglo[regd] = addr[0]
+		m.freghi[regd] = addr[1]
+
+		if fptrace > 0 {
+			print("*** load D[", regd, "] = ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xed800a00: // single store
+		uaddr := uintptr(regs[regn] + regm)
+		if uaddr < 4096 {
+			if fptrace > 0 {
+				print("*** store @", hex(uaddr), " => fault\n")
+			}
+			return _FAULT
+		}
+		addr := (*[1]uint32)(unsafe.Pointer(uaddr))
+		addr[0] = m.freglo[regd]
+
+		if fptrace > 0 {
+			print("*** *(", addr, ") = ", hex(addr[0]), "\n")
+		}
+		break
+
+	case 0xed800b00: // double store
+		uaddr := uintptr(regs[regn] + regm)
+		if uaddr < 4096 {
+			if fptrace > 0 {
+				print("*** double store @", hex(uaddr), " => fault\n")
+			}
+			return _FAULT
+		}
+		addr := (*[2]uint32)(unsafe.Pointer(uaddr))
+		addr[0] = m.freglo[regd]
+		addr[1] = m.freghi[regd]
+
+		if fptrace > 0 {
+			print("*** *(", addr, ") = ", hex(addr[1]), "-", hex(addr[0]), "\n")
+		}
+		break
+	}
+	return 1
+
+stage2: // regd, regm, regn are 4bit variables
+	regm = i >> 0 & 0xf
+	switch i & 0xfff00ff0 {
+	default:
+		goto stage3
+
+	case 0xf3000110: // veor
+		m.freglo[regd] = m.freglo[regm] ^ m.freglo[regn]
+		m.freghi[regd] = m.freghi[regm] ^ m.freghi[regn]
+
+		if fptrace > 0 {
+			print("*** veor D[", regd, "] = ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb00b00: // D[regd] = const(regn,regm)
+		regn = regn<<4 | regm
+		regm = 0x40000000
+		if regn&0x80 != 0 {
+			regm |= 0x80000000
+		}
+		if regn&0x40 != 0 {
+			regm ^= 0x7fc00000
+		}
+		regm |= regn & 0x3f << 16
+		m.freglo[regd] = 0
+		m.freghi[regd] = regm
+
+		if fptrace > 0 {
+			print("*** immed D[", regd, "] = ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb00a00: // F[regd] = const(regn,regm)
+		regn = regn<<4 | regm
+		regm = 0x40000000
+		if regn&0x80 != 0 {
+			regm |= 0x80000000
+		}
+		if regn&0x40 != 0 {
+			regm ^= 0x7e000000
+		}
+		regm |= regn & 0x3f << 19
+		m.freglo[regd] = regm
+
+		if fptrace > 0 {
+			print("*** immed D[", regd, "] = ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee300b00: // D[regd] = D[regn]+D[regm]
+		fadd64c(fgetd(regn), fgetd(regm), &uval)
+		fputd(regd, uval)
+
+		if fptrace > 0 {
+			print("*** add D[", regd, "] = D[", regn, "]+D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee300a00: // F[regd] = F[regn]+F[regm]
+		fadd64c(f32to64(m.freglo[regn]), f32to64(m.freglo[regm]), &uval)
+		m.freglo[regd] = f64to32(uval)
+
+		if fptrace > 0 {
+			print("*** add F[", regd, "] = F[", regn, "]+F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee300b40: // D[regd] = D[regn]-D[regm]
+		fsub64c(fgetd(regn), fgetd(regm), &uval)
+		fputd(regd, uval)
+
+		if fptrace > 0 {
+			print("*** sub D[", regd, "] = D[", regn, "]-D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee300a40: // F[regd] = F[regn]-F[regm]
+		fsub64c(f32to64(m.freglo[regn]), f32to64(m.freglo[regm]), &uval)
+		m.freglo[regd] = f64to32(uval)
+
+		if fptrace > 0 {
+			print("*** sub F[", regd, "] = F[", regn, "]-F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee200b00: // D[regd] = D[regn]*D[regm]
+		fmul64c(fgetd(regn), fgetd(regm), &uval)
+		fputd(regd, uval)
+
+		if fptrace > 0 {
+			print("*** mul D[", regd, "] = D[", regn, "]*D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee200a00: // F[regd] = F[regn]*F[regm]
+		fmul64c(f32to64(m.freglo[regn]), f32to64(m.freglo[regm]), &uval)
+		m.freglo[regd] = f64to32(uval)
+
+		if fptrace > 0 {
+			print("*** mul F[", regd, "] = F[", regn, "]*F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee800b00: // D[regd] = D[regn]/D[regm]
+		fdiv64c(fgetd(regn), fgetd(regm), &uval)
+		fputd(regd, uval)
+
+		if fptrace > 0 {
+			print("*** div D[", regd, "] = D[", regn, "]/D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee800a00: // F[regd] = F[regn]/F[regm]
+		fdiv64c(f32to64(m.freglo[regn]), f32to64(m.freglo[regm]), &uval)
+		m.freglo[regd] = f64to32(uval)
+
+		if fptrace > 0 {
+			print("*** div F[", regd, "] = F[", regn, "]/F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xee000b10: // S[regn] = R[regd] (MOVW) (regm ignored)
+		m.freglo[regn] = regs[regd]
+
+		if fptrace > 0 {
+			print("*** cpy S[", regn, "] = R[", regd, "] ", hex(m.freglo[regn]), "\n")
+		}
+		break
+
+	case 0xee100b10: // R[regd] = S[regn] (MOVW) (regm ignored)
+		regs[regd] = m.freglo[regn]
+
+		if fptrace > 0 {
+			print("*** cpy R[", regd, "] = S[", regn, "] ", hex(regs[regd]), "\n")
+		}
+		break
+	}
+	return 1
+
+stage3: // regd, regm are 4bit variables
+	switch i & 0xffff0ff0 {
+	default:
+		goto done
+
+	case 0xeeb00a40: // F[regd] = F[regm] (MOVF)
+		m.freglo[regd] = m.freglo[regm]
+
+		if fptrace > 0 {
+			print("*** F[", regd, "] = F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb00b40: // D[regd] = D[regm] (MOVD)
+		m.freglo[regd] = m.freglo[regm]
+		m.freghi[regd] = m.freghi[regm]
+
+		if fptrace > 0 {
+			print("*** D[", regd, "] = D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb10bc0: // D[regd] = sqrt D[regm]
+		uval = float64bits(sqrt(float64frombits(fgetd(regm))))
+		fputd(regd, uval)
+
+		if fptrace > 0 {
+			print("*** D[", regd, "] = sqrt D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb00bc0: // D[regd] = abs D[regm]
+		m.freglo[regd] = m.freglo[regm]
+		m.freghi[regd] = m.freghi[regm] & (1<<31 - 1)
+
+		if fptrace > 0 {
+			print("*** D[", regd, "] = abs D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb00ac0: // F[regd] = abs F[regm]
+		m.freglo[regd] = m.freglo[regm] & (1<<31 - 1)
+
+		if fptrace > 0 {
+			print("*** F[", regd, "] = abs F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb40bc0: // D[regd] :: D[regm] (CMPD)
+		fcmp64c(fgetd(regd), fgetd(regm), &cmp, &nan)
+		m.fflag = fstatus(nan, cmp)
+
+		if fptrace > 0 {
+			print("*** cmp D[", regd, "]::D[", regm, "] ", hex(m.fflag), "\n")
+		}
+		break
+
+	case 0xeeb40ac0: // F[regd] :: F[regm] (CMPF)
+		fcmp64c(f32to64(m.freglo[regd]), f32to64(m.freglo[regm]), &cmp, &nan)
+		m.fflag = fstatus(nan, cmp)
+
+		if fptrace > 0 {
+			print("*** cmp F[", regd, "]::F[", regm, "] ", hex(m.fflag), "\n")
+		}
+		break
+
+	case 0xeeb70ac0: // D[regd] = F[regm] (MOVFD)
+		fputd(regd, f32to64(m.freglo[regm]))
+
+		if fptrace > 0 {
+			print("*** f2d D[", regd, "]=F[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb70bc0: // F[regd] = D[regm] (MOVDF)
+		m.freglo[regd] = f64to32(fgetd(regm))
+
+		if fptrace > 0 {
+			print("*** d2f F[", regd, "]=D[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeebd0ac0: // S[regd] = F[regm] (MOVFW)
+		f64tointc(f32to64(m.freglo[regm]), &sval, &ok)
+		if !ok || int64(int32(sval)) != sval {
+			sval = 0
+		}
+		m.freglo[regd] = uint32(sval)
+		if fptrace > 0 {
+			print("*** fix S[", regd, "]=F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeebc0ac0: // S[regd] = F[regm] (MOVFW.U)
+		f64tointc(f32to64(m.freglo[regm]), &sval, &ok)
+		if !ok || int64(uint32(sval)) != sval {
+			sval = 0
+		}
+		m.freglo[regd] = uint32(sval)
+
+		if fptrace > 0 {
+			print("*** fix unsigned S[", regd, "]=F[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeebd0bc0: // S[regd] = D[regm] (MOVDW)
+		f64tointc(fgetd(regm), &sval, &ok)
+		if !ok || int64(int32(sval)) != sval {
+			sval = 0
+		}
+		m.freglo[regd] = uint32(sval)
+
+		if fptrace > 0 {
+			print("*** fix S[", regd, "]=D[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeebc0bc0: // S[regd] = D[regm] (MOVDW.U)
+		f64tointc(fgetd(regm), &sval, &ok)
+		if !ok || int64(uint32(sval)) != sval {
+			sval = 0
+		}
+		m.freglo[regd] = uint32(sval)
+
+		if fptrace > 0 {
+			print("*** fix unsigned S[", regd, "]=D[", regm, "] ", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb80ac0: // D[regd] = S[regm] (MOVWF)
+		cmp = int32(m.freglo[regm])
+		if cmp < 0 {
+			fintto64c(int64(-cmp), &uval)
+			fputf(regd, f64to32(uval))
+			m.freglo[regd] ^= 0x80000000
+		} else {
+			fintto64c(int64(cmp), &uval)
+			fputf(regd, f64to32(uval))
+		}
+
+		if fptrace > 0 {
+			print("*** float D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb80a40: // D[regd] = S[regm] (MOVWF.U)
+		fintto64c(int64(m.freglo[regm]), &uval)
+		fputf(regd, f64to32(uval))
+
+		if fptrace > 0 {
+			print("*** float unsigned D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb80bc0: // D[regd] = S[regm] (MOVWD)
+		cmp = int32(m.freglo[regm])
+		if cmp < 0 {
+			fintto64c(int64(-cmp), &uval)
+			fputd(regd, uval)
+			m.freghi[regd] ^= 0x80000000
+		} else {
+			fintto64c(int64(cmp), &uval)
+			fputd(regd, uval)
+		}
+
+		if fptrace > 0 {
+			print("*** float D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+
+	case 0xeeb80b40: // D[regd] = S[regm] (MOVWD.U)
+		fintto64c(int64(m.freglo[regm]), &uval)
+		fputd(regd, uval)
+
+		if fptrace > 0 {
+			print("*** float unsigned D[", regd, "]=S[", regm, "] ", hex(m.freghi[regd]), "-", hex(m.freglo[regd]), "\n")
+		}
+		break
+	}
+	return 1
+
+done:
+	if i&0xff000000 == 0xee000000 ||
+		i&0xff000000 == 0xed000000 {
+		print("stepflt ", pc, " ", hex(i), "\n")
+		fabort()
+	}
+	return 0
+}
+
+//go:nosplit
+func _sfloat2(pc uint32, regs *[15]uint32) {
+	systemstack(func() {
+		pc = sfloat2(pc, regs)
+	})
+}
+
+func _sfloatpanic()
+
+func sfloat2(pc uint32, regs *[15]uint32) uint32 {
+	first := true
+	for {
+		skip := stepflt((*uint32)(unsafe.Pointer(uintptr(pc))), regs)
+		if skip == 0 {
+			break
+		}
+		first = false
+		if skip == _FAULT {
+			// Encountered bad address in store/load.
+			// Record signal information and return to assembly
+			// trampoline that fakes the call.
+			const SIGSEGV = 11
+			curg := getg().m.curg
+			curg.sig = SIGSEGV
+			curg.sigcode0 = 0
+			curg.sigcode1 = 0
+			curg.sigpc = uintptr(pc)
+			pc = uint32(funcPC(_sfloatpanic))
+			break
+		}
+		pc += 4 * uint32(skip)
+	}
+	if first {
+		print("sfloat2 ", pc, " ", hex(*(*uint32)(unsafe.Pointer(uintptr(pc)))), "\n")
+		fabort() // not ok to fail first instruction
+	}
+	return pc
+}
diff --git a/src/runtime/sqrt.go b/src/runtime/sqrt.go
index 34a8c380..e3a2701 100644
--- a/src/runtime/sqrt.go
+++ b/src/runtime/sqrt.go
@@ -86,9 +86,6 @@
 // Notes:  Rounding mode detection omitted.
 
 const (
-	uvnan      = 0x7FF8000000000001
-	uvinf      = 0x7FF0000000000000
-	uvneginf   = 0xFFF0000000000000
 	mask       = 0x7FF
 	shift      = 64 - 11 - 1
 	bias       = 1023
@@ -104,7 +101,7 @@
 	case x == 0 || x != x || x > maxFloat64:
 		return x
 	case x < 0:
-		return nan
+		return nan()
 	}
 	ix := float64bits(x)
 	// normalize x
@@ -144,7 +141,3 @@
 	ix = q>>1 + uint64(exp-1+bias)<<shift // significand + biased exponent
 	return float64frombits(ix)
 }
-
-func sqrtC(f float64, r *float64) {
-	*r = sqrt(f)
-}
diff --git a/src/runtime/stack.c b/src/runtime/stack.c
deleted file mode 100644
index ffae73a..0000000
--- a/src/runtime/stack.c
+++ /dev/null
@@ -1,874 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "stack.h"
-#include "funcdata.h"
-#include "typekind.h"
-#include "type.h"
-#include "race.h"
-#include "mgc0.h"
-#include "textflag.h"
-
-enum
-{
-	// StackDebug == 0: no logging
-	//            == 1: logging of per-stack operations
-	//            == 2: logging of per-frame operations
-	//            == 3: logging of per-word updates
-	//            == 4: logging of per-word reads
-	StackDebug = 0,
-	StackFromSystem = 0,	// allocate stacks from system memory instead of the heap
-	StackFaultOnFree = 0,	// old stacks are mapped noaccess to detect use after free
-	StackPoisonCopy = 0,	// fill stack that should not be accessed with garbage, to detect bad dereferences during copy
-
-	StackCache = 1,
-};
-
-// Global pool of spans that have free stacks.
-// Stacks are assigned an order according to size.
-//     order = log_2(size/FixedStack)
-// There is a free list for each order.
-MSpan runtime·stackpool[NumStackOrders];
-Mutex runtime·stackpoolmu;
-// TODO: one lock per order?
-
-static Stack stackfreequeue;
-
-void
-runtime·stackinit(void)
-{
-	int32 i;
-
-	if((StackCacheSize & PageMask) != 0)
-		runtime·throw("cache size must be a multiple of page size");
-
-	for(i = 0; i < NumStackOrders; i++)
-		runtime·MSpanList_Init(&runtime·stackpool[i]);
-}
-
-// Allocates a stack from the free pool.  Must be called with
-// stackpoolmu held.
-static MLink*
-poolalloc(uint8 order)
-{
-	MSpan *list;
-	MSpan *s;
-	MLink *x;
-	uintptr i;
-
-	list = &runtime·stackpool[order];
-	s = list->next;
-	if(s == list) {
-		// no free stacks.  Allocate another span worth.
-		s = runtime·MHeap_AllocStack(&runtime·mheap, StackCacheSize >> PageShift);
-		if(s == nil)
-			runtime·throw("out of memory");
-		if(s->ref != 0)
-			runtime·throw("bad ref");
-		if(s->freelist != nil)
-			runtime·throw("bad freelist");
-		for(i = 0; i < StackCacheSize; i += FixedStack << order) {
-			x = (MLink*)((s->start << PageShift) + i);
-			x->next = s->freelist;
-			s->freelist = x;
-		}
-		runtime·MSpanList_Insert(list, s);
-	}
-	x = s->freelist;
-	if(x == nil)
-		runtime·throw("span has no free stacks");
-	s->freelist = x->next;
-	s->ref++;
-	if(s->freelist == nil) {
-		// all stacks in s are allocated.
-		runtime·MSpanList_Remove(s);
-	}
-	return x;
-}
-
-// Adds stack x to the free pool.  Must be called with stackpoolmu held.
-static void
-poolfree(MLink *x, uint8 order)
-{
-	MSpan *s;
-
-	s = runtime·MHeap_Lookup(&runtime·mheap, x);
-	if(s->state != MSpanStack)
-		runtime·throw("freeing stack not in a stack span");
-	if(s->freelist == nil) {
-		// s will now have a free stack
-		runtime·MSpanList_Insert(&runtime·stackpool[order], s);
-	}
-	x->next = s->freelist;
-	s->freelist = x;
-	s->ref--;
-	if(s->ref == 0) {
-		// span is completely free - return to heap
-		runtime·MSpanList_Remove(s);
-		s->freelist = nil;
-		runtime·MHeap_FreeStack(&runtime·mheap, s);
-	}
-}
-
-// stackcacherefill/stackcacherelease implement a global pool of stack segments.
-// The pool is required to prevent unlimited growth of per-thread caches.
-static void
-stackcacherefill(MCache *c, uint8 order)
-{
-	MLink *x, *list;
-	uintptr size;
-
-	if(StackDebug >= 1)
-		runtime·printf("stackcacherefill order=%d\n", order);
-
-	// Grab some stacks from the global cache.
-	// Grab half of the allowed capacity (to prevent thrashing).
-	list = nil;
-	size = 0;
-	runtime·lock(&runtime·stackpoolmu);
-	while(size < StackCacheSize/2) {
-		x = poolalloc(order);
-		x->next = list;
-		list = x;
-		size += FixedStack << order;
-	}
-	runtime·unlock(&runtime·stackpoolmu);
-
-	c->stackcache[order].list = list;
-	c->stackcache[order].size = size;
-}
-
-static void
-stackcacherelease(MCache *c, uint8 order)
-{
-	MLink *x, *y;
-	uintptr size;
-
-	if(StackDebug >= 1)
-		runtime·printf("stackcacherelease order=%d\n", order);
-	x = c->stackcache[order].list;
-	size = c->stackcache[order].size;
-	runtime·lock(&runtime·stackpoolmu);
-	while(size > StackCacheSize/2) {
-		y = x->next;
-		poolfree(x, order);
-		x = y;
-		size -= FixedStack << order;
-	}
-	runtime·unlock(&runtime·stackpoolmu);
-	c->stackcache[order].list = x;
-	c->stackcache[order].size = size;
-}
-
-void
-runtime·stackcache_clear(MCache *c)
-{
-	uint8 order;
-	MLink *x, *y;
-
-	if(StackDebug >= 1)
-		runtime·printf("stackcache clear\n");
-	runtime·lock(&runtime·stackpoolmu);
-	for(order = 0; order < NumStackOrders; order++) {
-		x = c->stackcache[order].list;
-		while(x != nil) {
-			y = x->next;
-			poolfree(x, order);
-			x = y;
-		}
-		c->stackcache[order].list = nil;
-		c->stackcache[order].size = 0;
-	}
-	runtime·unlock(&runtime·stackpoolmu);
-}
-
-Stack
-runtime·stackalloc(uint32 n)
-{
-	uint8 order;
-	uint32 n2;
-	void *v;
-	MLink *x;
-	MSpan *s;
-	MCache *c;
-
-	// Stackalloc must be called on scheduler stack, so that we
-	// never try to grow the stack during the code that stackalloc runs.
-	// Doing so would cause a deadlock (issue 1547).
-	if(g != g->m->g0)
-		runtime·throw("stackalloc not on scheduler stack");
-	if((n & (n-1)) != 0)
-		runtime·throw("stack size not a power of 2");
-	if(StackDebug >= 1)
-		runtime·printf("stackalloc %d\n", n);
-
-	if(runtime·debug.efence || StackFromSystem) {
-		v = runtime·sysAlloc(ROUND(n, PageSize), &mstats.stacks_sys);
-		if(v == nil)
-			runtime·throw("out of memory (stackalloc)");
-		return (Stack){(uintptr)v, (uintptr)v+n};
-	}
-
-	// Small stacks are allocated with a fixed-size free-list allocator.
-	// If we need a stack of a bigger size, we fall back on allocating
-	// a dedicated span.
-	if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) {
-		order = 0;
-		n2 = n;
-		while(n2 > FixedStack) {
-			order++;
-			n2 >>= 1;
-		}
-		c = g->m->mcache;
-		if(c == nil || g->m->gcing || g->m->helpgc) {
-			// c == nil can happen in the guts of exitsyscall or
-			// procresize. Just get a stack from the global pool.
-			// Also don't touch stackcache during gc
-			// as it's flushed concurrently.
-			runtime·lock(&runtime·stackpoolmu);
-			x = poolalloc(order);
-			runtime·unlock(&runtime·stackpoolmu);
-		} else {
-			x = c->stackcache[order].list;
-			if(x == nil) {
-				stackcacherefill(c, order);
-				x = c->stackcache[order].list;
-			}
-			c->stackcache[order].list = x->next;
-			c->stackcache[order].size -= n;
-		}
-		v = (byte*)x;
-	} else {
-		s = runtime·MHeap_AllocStack(&runtime·mheap, ROUND(n, PageSize) >> PageShift);
-		if(s == nil)
-			runtime·throw("out of memory");
-		v = (byte*)(s->start<<PageShift);
-	}
-	
-	if(raceenabled)
-		runtime·racemalloc(v, n);
-	if(StackDebug >= 1)
-		runtime·printf("  allocated %p\n", v);
-	return (Stack){(uintptr)v, (uintptr)v+n};
-}
-
-void
-runtime·stackfree(Stack stk)
-{
-	uint8 order;
-	uintptr n, n2;
-	MSpan *s;
-	MLink *x;
-	MCache *c;
-	void *v;
-	
-	n = stk.hi - stk.lo;
-	v = (void*)stk.lo;
-	if(n & (n-1))
-		runtime·throw("stack not a power of 2");
-	if(StackDebug >= 1) {
-		runtime·printf("stackfree %p %d\n", v, (int32)n);
-		runtime·memclr(v, n); // for testing, clobber stack data
-	}
-	if(runtime·debug.efence || StackFromSystem) {
-		if(runtime·debug.efence || StackFaultOnFree)
-			runtime·SysFault(v, n);
-		else
-			runtime·SysFree(v, n, &mstats.stacks_sys);
-		return;
-	}
-	if(StackCache && n < FixedStack << NumStackOrders && n < StackCacheSize) {
-		order = 0;
-		n2 = n;
-		while(n2 > FixedStack) {
-			order++;
-			n2 >>= 1;
-		}
-		x = (MLink*)v;
-		c = g->m->mcache;
-		if(c == nil || g->m->gcing || g->m->helpgc) {
-			runtime·lock(&runtime·stackpoolmu);
-			poolfree(x, order);
-			runtime·unlock(&runtime·stackpoolmu);
-		} else {
-			if(c->stackcache[order].size >= StackCacheSize)
-				stackcacherelease(c, order);
-			x->next = c->stackcache[order].list;
-			c->stackcache[order].list = x;
-			c->stackcache[order].size += n;
-		}
-	} else {
-		s = runtime·MHeap_Lookup(&runtime·mheap, v);
-		if(s->state != MSpanStack) {
-			runtime·printf("%p %p\n", s->start<<PageShift, v);
-			runtime·throw("bad span state");
-		}
-		runtime·MHeap_FreeStack(&runtime·mheap, s);
-	}
-}
-
-uintptr runtime·maxstacksize = 1<<20; // enough until runtime.main sets it for real
-
-static uint8*
-mapnames[] = {
-	(uint8*)"---",
-	(uint8*)"scalar",
-	(uint8*)"ptr",
-	(uint8*)"multi",
-};
-
-// Stack frame layout
-//
-// (x86)
-// +------------------+
-// | args from caller |
-// +------------------+ <- frame->argp
-// |  return address  |
-// +------------------+ <- frame->varp
-// |     locals       |
-// +------------------+
-// |  args to callee  |
-// +------------------+ <- frame->sp
-//
-// (arm)
-// +------------------+
-// | args from caller |
-// +------------------+ <- frame->argp
-// | caller's retaddr |
-// +------------------+ <- frame->varp
-// |     locals       |
-// +------------------+
-// |  args to callee  |
-// +------------------+
-// |  return address  |
-// +------------------+ <- frame->sp
-
-void runtime·main(void);
-void runtime·switchtoM(void(*)(void));
-
-typedef struct AdjustInfo AdjustInfo;
-struct AdjustInfo {
-	Stack old;
-	uintptr delta;  // ptr distance from old to new stack (newbase - oldbase)
-};
-
-// Adjustpointer checks whether *vpp is in the old stack described by adjinfo.
-// If so, it rewrites *vpp to point into the new stack.
-static void
-adjustpointer(AdjustInfo *adjinfo, void *vpp)
-{
-	byte **pp, *p;
-	
-	pp = vpp;
-	p = *pp;
-	if(StackDebug >= 4)
-		runtime·printf("        %p:%p\n", pp, p);
-	if(adjinfo->old.lo <= (uintptr)p && (uintptr)p < adjinfo->old.hi) {
-		*pp = p + adjinfo->delta;
-		if(StackDebug >= 3)
-			runtime·printf("        adjust ptr %p: %p -> %p\n", pp, p, *pp);
-	}
-}
-
-// bv describes the memory starting at address scanp.
-// Adjust any pointers contained therein.
-static void
-adjustpointers(byte **scanp, BitVector *bv, AdjustInfo *adjinfo, Func *f)
-{
-	uintptr delta;
-	int32 num, i;
-	byte *p, *minp, *maxp;
-	
-	minp = (byte*)adjinfo->old.lo;
-	maxp = (byte*)adjinfo->old.hi;
-	delta = adjinfo->delta;
-	num = bv->n / BitsPerPointer;
-	for(i = 0; i < num; i++) {
-		if(StackDebug >= 4)
-			runtime·printf("        %p:%s:%p\n", &scanp[i], mapnames[bv->bytedata[i / (8 / BitsPerPointer)] >> (i * BitsPerPointer & 7) & 3], scanp[i]);
-		switch(bv->bytedata[i / (8 / BitsPerPointer)] >> (i * BitsPerPointer & 7) & 3) {
-		case BitsDead:
-			if(runtime·debug.gcdead)
-				scanp[i] = (byte*)PoisonStack;
-			break;
-		case BitsScalar:
-			break;
-		case BitsPointer:
-			p = scanp[i];
-			if(f != nil && (byte*)0 < p && (p < (byte*)PageSize && runtime·invalidptr || (uintptr)p == PoisonGC || (uintptr)p == PoisonStack)) {
-				// Looks like a junk value in a pointer slot.
-				// Live analysis wrong?
-				g->m->traceback = 2;
-				runtime·printf("runtime: bad pointer in frame %s at %p: %p\n", runtime·funcname(f), &scanp[i], p);
-				runtime·throw("invalid stack pointer");
-			}
-			if(minp <= p && p < maxp) {
-				if(StackDebug >= 3)
-					runtime·printf("adjust ptr %p %s\n", p, runtime·funcname(f));
-				scanp[i] = p + delta;
-			}
-			break;
-		case BitsMultiWord:
-			runtime·throw("adjustpointers: unexpected garbage collection bits");
-		}
-	}
-}
-
-// Note: the argument/return area is adjusted by the callee.
-static bool
-adjustframe(Stkframe *frame, void *arg)
-{
-	AdjustInfo *adjinfo;
-	Func *f;
-	StackMap *stackmap;
-	int32 pcdata;
-	BitVector bv;
-	uintptr targetpc, size, minsize;
-
-	adjinfo = arg;
-	targetpc = frame->continpc;
-	if(targetpc == 0) {
-		// Frame is dead.
-		return true;
-	}
-	f = frame->fn;
-	if(StackDebug >= 2)
-		runtime·printf("    adjusting %s frame=[%p,%p] pc=%p continpc=%p\n", runtime·funcname(f), frame->sp, frame->fp, frame->pc, frame->continpc);
-	if(f->entry == (uintptr)runtime·switchtoM) {
-		// A special routine at the bottom of stack of a goroutine that does an onM call.
-		// We will allow it to be copied even though we don't
-		// have full GC info for it (because it is written in asm).
-		return true;
-	}
-	if(targetpc != f->entry)
-		targetpc--;
-	pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc);
-	if(pcdata == -1)
-		pcdata = 0; // in prologue
-
-	// Adjust local variables if stack frame has been allocated.
-	size = frame->varp - frame->sp;
-	if(thechar != '6' && thechar != '8')
-		minsize = sizeof(uintptr);
-	else
-		minsize = 0;
-	if(size > minsize) {
-		stackmap = runtime·funcdata(f, FUNCDATA_LocalsPointerMaps);
-		if(stackmap == nil || stackmap->n <= 0) {
-			runtime·printf("runtime: frame %s untyped locals %p+%p\n", runtime·funcname(f), (byte*)(frame->varp-size), size);
-			runtime·throw("missing stackmap");
-		}
-		// Locals bitmap information, scan just the pointers in locals.
-		if(pcdata < 0 || pcdata >= stackmap->n) {
-			// don't know where we are
-			runtime·printf("runtime: pcdata is %d and %d locals stack map entries for %s (targetpc=%p)\n",
-				pcdata, stackmap->n, runtime·funcname(f), targetpc);
-			runtime·throw("bad symbol table");
-		}
-		bv = runtime·stackmapdata(stackmap, pcdata);
-		size = (bv.n * PtrSize) / BitsPerPointer;
-		if(StackDebug >= 3)
-			runtime·printf("      locals\n");
-		adjustpointers((byte**)(frame->varp - size), &bv, adjinfo, f);
-	}
-	
-	// Adjust arguments.
-	if(frame->arglen > 0) {
-		if(frame->argmap != nil) {
-			bv = *frame->argmap;
-		} else {
-			stackmap = runtime·funcdata(f, FUNCDATA_ArgsPointerMaps);
-			if(stackmap == nil || stackmap->n <= 0) {
-				runtime·printf("runtime: frame %s untyped args %p+%p\n", runtime·funcname(f), frame->argp, (uintptr)frame->arglen);
-				runtime·throw("missing stackmap");
-			}
-			if(pcdata < 0 || pcdata >= stackmap->n) {
-				// don't know where we are
-				runtime·printf("runtime: pcdata is %d and %d args stack map entries for %s (targetpc=%p)\n",
-					pcdata, stackmap->n, runtime·funcname(f), targetpc);
-				runtime·throw("bad symbol table");
-			}
-			bv = runtime·stackmapdata(stackmap, pcdata);
-		}
-		if(StackDebug >= 3)
-			runtime·printf("      args\n");
-		adjustpointers((byte**)frame->argp, &bv, adjinfo, nil);
-	}
-	
-	return true;
-}
-
-static void
-adjustctxt(G *gp, AdjustInfo *adjinfo)
-{
-	adjustpointer(adjinfo, &gp->sched.ctxt);
-}
-
-static void
-adjustdefers(G *gp, AdjustInfo *adjinfo)
-{
-	Defer *d;
-	bool (*cb)(Stkframe*, void*);
-
-	// Adjust defer argument blocks the same way we adjust active stack frames.
-	cb = adjustframe;
-	runtime·tracebackdefers(gp, &cb, adjinfo);
-
-	// Adjust pointers in the Defer structs.
-	// Defer structs themselves are never on the stack.
-	for(d = gp->defer; d != nil; d = d->link) {
-		adjustpointer(adjinfo, &d->fn);
-		adjustpointer(adjinfo, &d->argp);
-		adjustpointer(adjinfo, &d->panic);
-	}
-}
-
-static void
-adjustpanics(G *gp, AdjustInfo *adjinfo)
-{
-	// Panics are on stack and already adjusted.
-	// Update pointer to head of list in G.
-	adjustpointer(adjinfo, &gp->panic);
-}
-
-static void
-adjustsudogs(G *gp, AdjustInfo *adjinfo)
-{
-	SudoG *s;
-
-	// the data elements pointed to by a SudoG structure
-	// might be in the stack.
-	for(s = gp->waiting; s != nil; s = s->waitlink) {
-		adjustpointer(adjinfo, &s->elem);
-		adjustpointer(adjinfo, &s->selectdone);
-	}
-}
-
-// Copies gp's stack to a new stack of a different size.
-// Caller must have changed gp status to Gcopystack.
-static void
-copystack(G *gp, uintptr newsize)
-{
-	Stack old, new;
-	uintptr used;
-	AdjustInfo adjinfo;
-	bool (*cb)(Stkframe*, void*);
-	byte *p, *ep;
-
-	if(gp->syscallsp != 0)
-		runtime·throw("stack growth not allowed in system call");
-	old = gp->stack;
-	if(old.lo == 0)
-		runtime·throw("nil stackbase");
-	used = old.hi - gp->sched.sp;
-
-	// allocate new stack
-	new = runtime·stackalloc(newsize);
-	if(StackPoisonCopy) {
-		p = (byte*)new.lo;
-		ep = (byte*)new.hi;
-		while(p < ep)
-			*p++ = 0xfd;
-	}
-
-	if(StackDebug >= 1)
-		runtime·printf("copystack gp=%p [%p %p %p]/%d -> [%p %p %p]/%d\n", gp, old.lo, old.hi-used, old.hi, (int32)(old.hi-old.lo), new.lo, new.hi-used, new.hi, (int32)newsize);
-	
-	// adjust pointers in the to-be-copied frames
-	adjinfo.old = old;
-	adjinfo.delta = new.hi - old.hi;
-	cb = adjustframe;
-	runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, 0, nil, 0x7fffffff, &cb, &adjinfo, 0);
-	
-	// adjust other miscellaneous things that have pointers into stacks.
-	adjustctxt(gp, &adjinfo);
-	adjustdefers(gp, &adjinfo);
-	adjustpanics(gp, &adjinfo);
-	adjustsudogs(gp, &adjinfo);
-	
-	// copy the stack to the new location
-	if(StackPoisonCopy) {
-		p = (byte*)new.lo;
-		ep = (byte*)new.hi;
-		while(p < ep)
-			*p++ = 0xfb;
-	}
-	runtime·memmove((byte*)new.hi - used, (byte*)old.hi - used, used);
-
-	// Swap out old stack for new one
-	gp->stack = new;
-	gp->stackguard0 = new.lo + StackGuard; // NOTE: might clobber a preempt request
-	gp->sched.sp = new.hi - used;
-
-	// free old stack
-	if(StackPoisonCopy) {
-		p = (byte*)old.lo;
-		ep = (byte*)old.hi;
-		while(p < ep)
-			*p++ = 0xfc;
-	}
-	if(newsize > old.hi-old.lo) {
-		// growing, free stack immediately
-		runtime·stackfree(old);
-	} else {
-		// shrinking, queue up free operation.  We can't actually free the stack
-		// just yet because we might run into the following situation:
-		// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
-		// 2) The stack that pointer points to is shrunk
-		// 3) The old stack is freed
-		// 4) The containing span is marked free
-		// 5) GC attempts to mark the SudoG.elem pointer.  The marking fails because
-		//    the pointer looks like a pointer into a free span.
-		// By not freeing, we prevent step #4 until GC is done.
-		runtime·lock(&runtime·stackpoolmu);
-		*(Stack*)old.lo = stackfreequeue;
-		stackfreequeue = old;
-		runtime·unlock(&runtime·stackpoolmu);
-	}
-}
-
-// round x up to a power of 2.
-int32
-runtime·round2(int32 x)
-{
-	int32 s;
-
-	s = 0;
-	while((1 << s) < x)
-		s++;
-	return 1 << s;
-}
-
-// Called from runtime·morestack when more stack is needed.
-// Allocate larger stack and relocate to new stack.
-// Stack growth is multiplicative, for constant amortized cost.
-//
-// g->atomicstatus will be Grunning or Gscanrunning upon entry. 
-// If the GC is trying to stop this g then it will set preemptscan to true.
-void
-runtime·newstack(void)
-{
-	int32 oldsize, newsize;
-	uint32 oldstatus;
-	uintptr sp;
-	G *gp;
-	Gobuf morebuf;
-
-	if(g->m->morebuf.g->stackguard0 == (uintptr)StackFork)
-		runtime·throw("stack growth after fork");
-	if(g->m->morebuf.g != g->m->curg) {
-		runtime·printf("runtime: newstack called from g=%p\n"
-			"\tm=%p m->curg=%p m->g0=%p m->gsignal=%p\n",
-			g->m->morebuf.g, g->m, g->m->curg, g->m->g0, g->m->gsignal);
-		morebuf = g->m->morebuf;
-		runtime·traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g);
-		runtime·throw("runtime: wrong goroutine in newstack");
-	}
-	if(g->m->curg->throwsplit)
-		runtime·throw("runtime: stack split at bad time");
-
-	// The goroutine must be executing in order to call newstack,
-	// so it must be Grunning or Gscanrunning.
-
-	gp = g->m->curg;
-	morebuf = g->m->morebuf;
-	g->m->morebuf.pc = (uintptr)nil;
-	g->m->morebuf.lr = (uintptr)nil;
-	g->m->morebuf.sp = (uintptr)nil;
-	g->m->morebuf.g = (G*)nil;
-
-	runtime·casgstatus(gp, Grunning, Gwaiting);
-	gp->waitreason = runtime·gostringnocopy((byte*)"stack growth");
-
-	runtime·rewindmorestack(&gp->sched);
-
-	if(gp->stack.lo == 0)
-		runtime·throw("missing stack in newstack");
-	sp = gp->sched.sp;
-	if(thechar == '6' || thechar == '8') {
-		// The call to morestack cost a word.
-		sp -= sizeof(uintreg);
-	}
-	if(StackDebug >= 1 || sp < gp->stack.lo) {
-		runtime·printf("runtime: newstack sp=%p stack=[%p, %p]\n"
-			"\tmorebuf={pc:%p sp:%p lr:%p}\n"
-			"\tsched={pc:%p sp:%p lr:%p ctxt:%p}\n",
-			sp, gp->stack.lo, gp->stack.hi,
-			g->m->morebuf.pc, g->m->morebuf.sp, g->m->morebuf.lr,
-			gp->sched.pc, gp->sched.sp, gp->sched.lr, gp->sched.ctxt);
-	}
-	if(sp < gp->stack.lo) {
-		runtime·printf("runtime: gp=%p, gp->status=%d\n ", (void*)gp, runtime·readgstatus(gp));
-		runtime·printf("runtime: split stack overflow: %p < %p\n", sp, gp->stack.lo);
-		runtime·throw("runtime: split stack overflow");
-	}
-	
-	if(gp->sched.ctxt != nil) {
-		// morestack wrote sched.ctxt on its way in here,
-		// without a write barrier. Run the write barrier now.
-		// It is not possible to be preempted between then
-		// and now, so it's okay.
-		runtime·writebarrierptr_nostore(&gp->sched.ctxt, gp->sched.ctxt);
-	}
-
-	if(gp->stackguard0 == (uintptr)StackPreempt) {
-		if(gp == g->m->g0)
-			runtime·throw("runtime: preempt g0");
-		if(g->m->p == nil && g->m->locks == 0)
-			runtime·throw("runtime: g is running but p is not");
-		if(gp->preemptscan) {
-			runtime·gcphasework(gp);
-			runtime·casgstatus(gp, Gwaiting, Grunning);
-			gp->stackguard0 = gp->stack.lo + StackGuard;
-			gp->preempt = false; 
-			gp->preemptscan = false;        // Tells the GC premption was successful.
-			runtime·gogo(&gp->sched);	// never return 
-		}
-
-		// Be conservative about where we preempt.
-		// We are interested in preempting user Go code, not runtime code.
-		if(g->m->locks || g->m->mallocing || g->m->gcing || g->m->p->status != Prunning) {
-			// Let the goroutine keep running for now.
-			// gp->preempt is set, so it will be preempted next time.
-			gp->stackguard0 = gp->stack.lo + StackGuard;
-			runtime·casgstatus(gp, Gwaiting, Grunning);
-			runtime·gogo(&gp->sched);	// never return
-		}
-		// Act like goroutine called runtime.Gosched.
-		runtime·casgstatus(gp, Gwaiting, Grunning);
-		runtime·gosched_m(gp);	// never return
-	}
-
-	// Allocate a bigger segment and move the stack.
-	oldsize = gp->stack.hi - gp->stack.lo;
-	newsize = oldsize * 2;
-	if(newsize > runtime·maxstacksize) {
-		runtime·printf("runtime: goroutine stack exceeds %D-byte limit\n", (uint64)runtime·maxstacksize);
-		runtime·throw("stack overflow");
-	}
-
-	oldstatus = runtime·readgstatus(gp);
-	oldstatus &= ~Gscan;
-	runtime·casgstatus(gp, oldstatus, Gcopystack); // oldstatus is Gwaiting or Grunnable
-	// The concurrent GC will not scan the stack while we are doing the copy since
-	// the gp is in a Gcopystack status.
-	copystack(gp, newsize);
-	if(StackDebug >= 1)
-		runtime·printf("stack grow done\n");
-	runtime·casgstatus(gp, Gcopystack, Grunning);
-	runtime·gogo(&gp->sched);
-}
-
-#pragma textflag NOSPLIT
-void
-runtime·nilfunc(void)
-{
-	*(byte*)0 = 0;
-}
-
-// adjust Gobuf as if it executed a call to fn
-// and then did an immediate gosave.
-void
-runtime·gostartcallfn(Gobuf *gobuf, FuncVal *fv)
-{
-	void *fn;
-
-	if(fv != nil)
-		fn = fv->fn;
-	else
-		fn = runtime·nilfunc;
-	runtime·gostartcall(gobuf, fn, fv);
-}
-
-// Maybe shrink the stack being used by gp.
-// Called at garbage collection time.
-void
-runtime·shrinkstack(G *gp)
-{
-	uintptr used, oldsize, newsize;
-	uint32 oldstatus;
-
-	if(runtime·readgstatus(gp) == Gdead) {
-		if(gp->stack.lo != 0) {
-			// Free whole stack - it will get reallocated
-			// if G is used again.
-			runtime·stackfree(gp->stack);
-			gp->stack.lo = 0;
-			gp->stack.hi = 0;
-		}
-		return;
-	}
-	if(gp->stack.lo == 0)
-		runtime·throw("missing stack in shrinkstack");
-
-	oldsize = gp->stack.hi - gp->stack.lo;
-	newsize = oldsize / 2;
-	if(newsize < FixedStack)
-		return; // don't shrink below the minimum-sized stack
-	used = gp->stack.hi - gp->sched.sp;
-	if(used >= oldsize / 4)
-		return; // still using at least 1/4 of the segment.
-
-	// We can't copy the stack if we're in a syscall.
-	// The syscall might have pointers into the stack.
-	if(gp->syscallsp != 0)
-		return;
-
-#ifdef GOOS_windows
-	if(gp->m != nil && gp->m->libcallsp != 0)
-		return;
-#endif
-	if(StackDebug > 0)
-		runtime·printf("shrinking stack %D->%D\n", (uint64)oldsize, (uint64)newsize);
-	// This is being done in a Gscan state and was initiated by the GC so no need to move to
-	// the Gcopystate.
-	// The world is stopped, so the goroutine must be Gwaiting or Grunnable,
-	// and what it is is not changing underfoot.
-
-	oldstatus = runtime·readgstatus(gp);
-	oldstatus &= ~Gscan;
-	if(oldstatus != Gwaiting && oldstatus != Grunnable)
-		runtime·throw("status is not Gwaiting or Grunnable");
-	runtime·casgstatus(gp, oldstatus, Gcopystack);
-	copystack(gp, newsize);
-	runtime·casgstatus(gp, Gcopystack, oldstatus);
- }
-
-// Do any delayed stack freeing that was queued up during GC.
-void
-runtime·shrinkfinish(void)
-{
-	Stack s, t;
-
-	runtime·lock(&runtime·stackpoolmu);
-	s = stackfreequeue;
-	stackfreequeue = (Stack){0,0};
-	runtime·unlock(&runtime·stackpoolmu);
-	while(s.lo != 0) {
-		t = *(Stack*)s.lo;
-		runtime·stackfree(s);
-		s = t;
-	}
-}
-
-static void badc(void);
-
-#pragma textflag NOSPLIT
-void
-runtime·morestackc(void)
-{
-	void (*fn)(void);
-	
-	fn = badc;
-	runtime·onM(&fn);
-}
-
-static void
-badc(void)
-{
-	runtime·throw("attempt to execute C code on Go stack");
-}
diff --git a/src/runtime/stack.go b/src/runtime/stack.go
deleted file mode 100644
index f1b7d32..0000000
--- a/src/runtime/stack.go
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright 2011 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package runtime
-
-const (
-	// Goroutine preemption request.
-	// Stored into g->stackguard0 to cause split stack check failure.
-	// Must be greater than any real sp.
-	// 0xfffffade in hex.
-	stackPreempt = ^uintptr(1313)
-)
diff --git a/src/runtime/stack.h b/src/runtime/stack.h
index f97dc4e..0099d05 100644
--- a/src/runtime/stack.h
+++ b/src/runtime/stack.h
@@ -2,117 +2,24 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-/*
-Stack layout parameters.
-Included both by runtime (compiled via 6c) and linkers (compiled via gcc).
-
-The per-goroutine g->stackguard is set to point StackGuard bytes
-above the bottom of the stack.  Each function compares its stack
-pointer against g->stackguard to check for overflow.  To cut one
-instruction from the check sequence for functions with tiny frames,
-the stack is allowed to protrude StackSmall bytes below the stack
-guard.  Functions with large frames don't bother with the check and
-always call morestack.  The sequences are (for amd64, others are
-similar):
- 
-	guard = g->stackguard
-	frame = function's stack frame size
-	argsize = size of function arguments (call + return)
-
-	stack frame size <= StackSmall:
-		CMPQ guard, SP
-		JHI 3(PC)
-		MOVQ m->morearg, $(argsize << 32)
-		CALL morestack(SB)
-
-	stack frame size > StackSmall but < StackBig
-		LEAQ (frame-StackSmall)(SP), R0
-		CMPQ guard, R0
-		JHI 3(PC)
-		MOVQ m->morearg, $(argsize << 32)
-		CALL morestack(SB)
-
-	stack frame size >= StackBig:
-		MOVQ m->morearg, $((argsize << 32) | frame)
-		CALL morestack(SB)
-
-The bottom StackGuard - StackSmall bytes are important: there has
-to be enough room to execute functions that refuse to check for
-stack overflow, either because they need to be adjacent to the
-actual caller's frame (deferproc) or because they handle the imminent
-stack overflow (morestack).
-
-For example, deferproc might call malloc, which does one of the
-above checks (without allocating a full frame), which might trigger
-a call to morestack.  This sequence needs to fit in the bottom
-section of the stack.  On amd64, morestack's frame is 40 bytes, and
-deferproc's frame is 56 bytes.  That fits well within the
-StackGuard - StackSmall bytes at the bottom.  
-The linkers explore all possible call traces involving non-splitting
-functions to make sure that this limit cannot be violated.
- */
+// For the linkers. Must match Go definitions.
+// TODO(rsc): Share Go definitions with linkers directly.
 
 enum {
-	// StackSystem is a number of additional bytes to add
-	// to each stack below the usual guard area for OS-specific
-	// purposes like signal handling. Used on Windows and on
-	// Plan 9 because they do not use a separate stack.
 #ifdef GOOS_windows
 	StackSystem = 512 * sizeof(uintptr),
 #else
 #ifdef GOOS_plan9
-	// The size of the note handler frame varies among architectures,
-	// but 512 bytes should be enough for every implementation.
 	StackSystem = 512,
 #else
 	StackSystem = 0,
 #endif	// Plan 9
 #endif	// Windows
 
-	// The minimum size of stack used by Go code
-	StackMin = 2048,
-
-	// The minimum stack size to allocate.
-	// The hackery here rounds FixedStack0 up to a power of 2.
-	FixedStack0 = StackMin + StackSystem,
-	FixedStack1 = FixedStack0 - 1,
-	FixedStack2 = FixedStack1 | (FixedStack1 >> 1),
-	FixedStack3 = FixedStack2 | (FixedStack2 >> 2),
-	FixedStack4 = FixedStack3 | (FixedStack3 >> 4),
-	FixedStack5 = FixedStack4 | (FixedStack4 >> 8),
-	FixedStack6 = FixedStack5 | (FixedStack5 >> 16),
-	FixedStack = FixedStack6 + 1,
-
-	// Functions that need frames bigger than this use an extra
-	// instruction to do the stack split check, to avoid overflow
-	// in case SP - framesize wraps below zero.
-	// This value can be no bigger than the size of the unmapped
-	// space at zero.
 	StackBig = 4096,
-
-	// The stack guard is a pointer this many bytes above the
-	// bottom of the stack.
 	StackGuard = 512 + StackSystem,
-
-	// After a stack split check the SP is allowed to be this
-	// many bytes below the stack guard.  This saves an instruction
-	// in the checking sequence for tiny frames.
 	StackSmall = 128,
-
-	// The maximum number of bytes that a chain of NOSPLIT
-	// functions can use.
 	StackLimit = StackGuard - StackSystem - StackSmall,
 };
 
-// Goroutine preemption request.
-// Stored into g->stackguard0 to cause split stack check failure.
-// Must be greater than any real sp.
-// 0xfffffade in hex.
 #define StackPreempt ((uint64)-1314)
-/*c2go
-enum
-{
-	StackPreempt = -1314,
-};
-*/
-#define StackFork ((uint64)-1234)
diff --git a/src/runtime/stack1.go b/src/runtime/stack1.go
new file mode 100644
index 0000000..963f4fa
--- /dev/null
+++ b/src/runtime/stack1.go
@@ -0,0 +1,818 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+const (
+	// StackDebug == 0: no logging
+	//            == 1: logging of per-stack operations
+	//            == 2: logging of per-frame operations
+	//            == 3: logging of per-word updates
+	//            == 4: logging of per-word reads
+	stackDebug       = 0
+	stackFromSystem  = 0 // allocate stacks from system memory instead of the heap
+	stackFaultOnFree = 0 // old stacks are mapped noaccess to detect use after free
+	stackPoisonCopy  = 0 // fill stack that should not be accessed with garbage, to detect bad dereferences during copy
+
+	stackCache = 1
+)
+
+const (
+	uintptrMask = 1<<(8*ptrSize) - 1
+	poisonGC    = uintptrMask & 0xf969696969696969
+	poisonStack = uintptrMask & 0x6868686868686868
+
+	// Goroutine preemption request.
+	// Stored into g->stackguard0 to cause split stack check failure.
+	// Must be greater than any real sp.
+	// 0xfffffade in hex.
+	stackPreempt = uintptrMask & -1314
+
+	// Thread is forking.
+	// Stored into g->stackguard0 to cause split stack check failure.
+	// Must be greater than any real sp.
+	stackFork = uintptrMask & -1234
+)
+
+// Global pool of spans that have free stacks.
+// Stacks are assigned an order according to size.
+//     order = log_2(size/FixedStack)
+// There is a free list for each order.
+// TODO: one lock per order?
+var stackpool [_NumStackOrders]mspan
+var stackpoolmu mutex
+
+var stackfreequeue stack
+
+func stackinit() {
+	if _StackCacheSize&_PageMask != 0 {
+		gothrow("cache size must be a multiple of page size")
+	}
+	for i := range stackpool {
+		mSpanList_Init(&stackpool[i])
+	}
+}
+
+// Allocates a stack from the free pool.  Must be called with
+// stackpoolmu held.
+func stackpoolalloc(order uint8) *mlink {
+	list := &stackpool[order]
+	s := list.next
+	if s == list {
+		// no free stacks.  Allocate another span worth.
+		s = mHeap_AllocStack(&mheap_, _StackCacheSize>>_PageShift)
+		if s == nil {
+			gothrow("out of memory")
+		}
+		if s.ref != 0 {
+			gothrow("bad ref")
+		}
+		if s.freelist != nil {
+			gothrow("bad freelist")
+		}
+		for i := uintptr(0); i < _StackCacheSize; i += _FixedStack << order {
+			x := (*mlink)(unsafe.Pointer(uintptr(s.start)<<_PageShift + i))
+			x.next = s.freelist
+			s.freelist = x
+		}
+		mSpanList_Insert(list, s)
+	}
+	x := s.freelist
+	if x == nil {
+		gothrow("span has no free stacks")
+	}
+	s.freelist = x.next
+	s.ref++
+	if s.freelist == nil {
+		// all stacks in s are allocated.
+		mSpanList_Remove(s)
+	}
+	return x
+}
+
+// Adds stack x to the free pool.  Must be called with stackpoolmu held.
+func stackpoolfree(x *mlink, order uint8) {
+	s := mHeap_Lookup(&mheap_, (unsafe.Pointer)(x))
+	if s.state != _MSpanStack {
+		gothrow("freeing stack not in a stack span")
+	}
+	if s.freelist == nil {
+		// s will now have a free stack
+		mSpanList_Insert(&stackpool[order], s)
+	}
+	x.next = s.freelist
+	s.freelist = x
+	s.ref--
+	if s.ref == 0 {
+		// span is completely free - return to heap
+		mSpanList_Remove(s)
+		s.freelist = nil
+		mHeap_FreeStack(&mheap_, s)
+	}
+}
+
+// stackcacherefill/stackcacherelease implement a global pool of stack segments.
+// The pool is required to prevent unlimited growth of per-thread caches.
+func stackcacherefill(c *mcache, order uint8) {
+	if stackDebug >= 1 {
+		print("stackcacherefill order=", order, "\n")
+	}
+
+	// Grab some stacks from the global cache.
+	// Grab half of the allowed capacity (to prevent thrashing).
+	var list *mlink
+	var size uintptr
+	lock(&stackpoolmu)
+	for size < _StackCacheSize/2 {
+		x := stackpoolalloc(order)
+		x.next = list
+		list = x
+		size += _FixedStack << order
+	}
+	unlock(&stackpoolmu)
+	c.stackcache[order].list = list
+	c.stackcache[order].size = size
+}
+
+func stackcacherelease(c *mcache, order uint8) {
+	if stackDebug >= 1 {
+		print("stackcacherelease order=", order, "\n")
+	}
+	x := c.stackcache[order].list
+	size := c.stackcache[order].size
+	lock(&stackpoolmu)
+	for size > _StackCacheSize/2 {
+		y := x.next
+		stackpoolfree(x, order)
+		x = y
+		size -= _FixedStack << order
+	}
+	unlock(&stackpoolmu)
+	c.stackcache[order].list = x
+	c.stackcache[order].size = size
+}
+
+func stackcache_clear(c *mcache) {
+	if stackDebug >= 1 {
+		print("stackcache clear\n")
+	}
+	lock(&stackpoolmu)
+	for order := uint8(0); order < _NumStackOrders; order++ {
+		x := c.stackcache[order].list
+		for x != nil {
+			y := x.next
+			stackpoolfree(x, order)
+			x = y
+		}
+		c.stackcache[order].list = nil
+		c.stackcache[order].size = 0
+	}
+	unlock(&stackpoolmu)
+}
+
+func stackalloc(n uint32) stack {
+	// Stackalloc must be called on scheduler stack, so that we
+	// never try to grow the stack during the code that stackalloc runs.
+	// Doing so would cause a deadlock (issue 1547).
+	thisg := getg()
+	if thisg != thisg.m.g0 {
+		gothrow("stackalloc not on scheduler stack")
+	}
+	if n&(n-1) != 0 {
+		gothrow("stack size not a power of 2")
+	}
+	if stackDebug >= 1 {
+		print("stackalloc ", n, "\n")
+	}
+
+	if debug.efence != 0 || stackFromSystem != 0 {
+		v := sysAlloc(round(uintptr(n), _PageSize), &memstats.stacks_sys)
+		if v == nil {
+			gothrow("out of memory (stackalloc)")
+		}
+		return stack{uintptr(v), uintptr(v) + uintptr(n)}
+	}
+
+	// Small stacks are allocated with a fixed-size free-list allocator.
+	// If we need a stack of a bigger size, we fall back on allocating
+	// a dedicated span.
+	var v unsafe.Pointer
+	if stackCache != 0 && n < _FixedStack<<_NumStackOrders && n < _StackCacheSize {
+		order := uint8(0)
+		n2 := n
+		for n2 > _FixedStack {
+			order++
+			n2 >>= 1
+		}
+		var x *mlink
+		c := thisg.m.mcache
+		if c == nil || thisg.m.gcing != 0 || thisg.m.helpgc != 0 {
+			// c == nil can happen in the guts of exitsyscall or
+			// procresize. Just get a stack from the global pool.
+			// Also don't touch stackcache during gc
+			// as it's flushed concurrently.
+			lock(&stackpoolmu)
+			x = stackpoolalloc(order)
+			unlock(&stackpoolmu)
+		} else {
+			x = c.stackcache[order].list
+			if x == nil {
+				stackcacherefill(c, order)
+				x = c.stackcache[order].list
+			}
+			c.stackcache[order].list = x.next
+			c.stackcache[order].size -= uintptr(n)
+		}
+		v = (unsafe.Pointer)(x)
+	} else {
+		s := mHeap_AllocStack(&mheap_, round(uintptr(n), _PageSize)>>_PageShift)
+		if s == nil {
+			gothrow("out of memory")
+		}
+		v = (unsafe.Pointer)(s.start << _PageShift)
+	}
+
+	if raceenabled {
+		racemalloc(v, uintptr(n))
+	}
+	if stackDebug >= 1 {
+		print("  allocated ", v, "\n")
+	}
+	return stack{uintptr(v), uintptr(v) + uintptr(n)}
+}
+
+func stackfree(stk stack) {
+	gp := getg()
+	n := stk.hi - stk.lo
+	v := (unsafe.Pointer)(stk.lo)
+	if n&(n-1) != 0 {
+		gothrow("stack not a power of 2")
+	}
+	if stackDebug >= 1 {
+		println("stackfree", v, n)
+		memclr(v, n) // for testing, clobber stack data
+	}
+	if debug.efence != 0 || stackFromSystem != 0 {
+		if debug.efence != 0 || stackFaultOnFree != 0 {
+			sysFault(v, n)
+		} else {
+			sysFree(v, n, &memstats.stacks_sys)
+		}
+		return
+	}
+	if stackCache != 0 && n < _FixedStack<<_NumStackOrders && n < _StackCacheSize {
+		order := uint8(0)
+		n2 := n
+		for n2 > _FixedStack {
+			order++
+			n2 >>= 1
+		}
+		x := (*mlink)(v)
+		c := gp.m.mcache
+		if c == nil || gp.m.gcing != 0 || gp.m.helpgc != 0 {
+			lock(&stackpoolmu)
+			stackpoolfree(x, order)
+			unlock(&stackpoolmu)
+		} else {
+			if c.stackcache[order].size >= _StackCacheSize {
+				stackcacherelease(c, order)
+			}
+			x.next = c.stackcache[order].list
+			c.stackcache[order].list = x
+			c.stackcache[order].size += n
+		}
+	} else {
+		s := mHeap_Lookup(&mheap_, v)
+		if s.state != _MSpanStack {
+			println(hex(s.start<<_PageShift), v)
+			gothrow("bad span state")
+		}
+		mHeap_FreeStack(&mheap_, s)
+	}
+}
+
+var maxstacksize uintptr = 1 << 20 // enough until runtime.main sets it for real
+
+var mapnames = []string{
+	_BitsDead:    "---",
+	_BitsScalar:  "scalar",
+	_BitsPointer: "ptr",
+}
+
+// Stack frame layout
+//
+// (x86)
+// +------------------+
+// | args from caller |
+// +------------------+ <- frame->argp
+// |  return address  |
+// +------------------+ <- frame->varp
+// |     locals       |
+// +------------------+
+// |  args to callee  |
+// +------------------+ <- frame->sp
+//
+// (arm)
+// +------------------+
+// | args from caller |
+// +------------------+ <- frame->argp
+// | caller's retaddr |
+// +------------------+ <- frame->varp
+// |     locals       |
+// +------------------+
+// |  args to callee  |
+// +------------------+
+// |  return address  |
+// +------------------+ <- frame->sp
+
+type adjustinfo struct {
+	old   stack
+	delta uintptr // ptr distance from old to new stack (newbase - oldbase)
+}
+
+// Adjustpointer checks whether *vpp is in the old stack described by adjinfo.
+// If so, it rewrites *vpp to point into the new stack.
+func adjustpointer(adjinfo *adjustinfo, vpp unsafe.Pointer) {
+	pp := (*unsafe.Pointer)(vpp)
+	p := *pp
+	if stackDebug >= 4 {
+		print("        ", pp, ":", p, "\n")
+	}
+	if adjinfo.old.lo <= uintptr(p) && uintptr(p) < adjinfo.old.hi {
+		*pp = add(p, adjinfo.delta)
+		if stackDebug >= 3 {
+			print("        adjust ptr ", pp, ":", p, " -> ", *pp, "\n")
+		}
+	}
+}
+
+type gobitvector struct {
+	n        uintptr
+	bytedata []uint8
+}
+
+func gobv(bv bitvector) gobitvector {
+	return gobitvector{
+		uintptr(bv.n),
+		(*[1 << 30]byte)(unsafe.Pointer(bv.bytedata))[:(bv.n+7)/8],
+	}
+}
+
+func ptrbits(bv *gobitvector, i uintptr) uint8 {
+	return (bv.bytedata[i/4] >> ((i & 3) * 2)) & 3
+}
+
+// bv describes the memory starting at address scanp.
+// Adjust any pointers contained therein.
+func adjustpointers(scanp unsafe.Pointer, cbv *bitvector, adjinfo *adjustinfo, f *_func) {
+	bv := gobv(*cbv)
+	minp := adjinfo.old.lo
+	maxp := adjinfo.old.hi
+	delta := adjinfo.delta
+	num := uintptr(bv.n / _BitsPerPointer)
+	for i := uintptr(0); i < num; i++ {
+		if stackDebug >= 4 {
+			print("        ", add(scanp, i*ptrSize), ":", mapnames[ptrbits(&bv, i)], ":", hex(*(*uintptr)(add(scanp, i*ptrSize))), " # ", i, " ", bv.bytedata[i/4], "\n")
+		}
+		switch ptrbits(&bv, i) {
+		default:
+			gothrow("unexpected pointer bits")
+		case _BitsDead:
+			if debug.gcdead != 0 {
+				*(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(uintptr(poisonStack))
+			}
+		case _BitsScalar:
+			// ok
+		case _BitsPointer:
+			p := *(*unsafe.Pointer)(add(scanp, i*ptrSize))
+			up := uintptr(p)
+			if f != nil && 0 < up && up < _PageSize && invalidptr != 0 || up == poisonGC || up == poisonStack {
+				// Looks like a junk value in a pointer slot.
+				// Live analysis wrong?
+				getg().m.traceback = 2
+				print("runtime: bad pointer in frame ", gofuncname(f), " at ", add(scanp, i*ptrSize), ": ", p, "\n")
+				gothrow("invalid stack pointer")
+			}
+			if minp <= up && up < maxp {
+				if stackDebug >= 3 {
+					print("adjust ptr ", p, " ", gofuncname(f), "\n")
+				}
+				*(*unsafe.Pointer)(add(scanp, i*ptrSize)) = unsafe.Pointer(up + delta)
+			}
+		}
+	}
+}
+
+// Note: the argument/return area is adjusted by the callee.
+func adjustframe(frame *stkframe, arg unsafe.Pointer) bool {
+	adjinfo := (*adjustinfo)(arg)
+	targetpc := frame.continpc
+	if targetpc == 0 {
+		// Frame is dead.
+		return true
+	}
+	f := frame.fn
+	if stackDebug >= 2 {
+		print("    adjusting ", funcname(f), " frame=[", hex(frame.sp), ",", hex(frame.fp), "] pc=", hex(frame.pc), " continpc=", hex(frame.continpc), "\n")
+	}
+	if f.entry == systemstack_switchPC {
+		// A special routine at the bottom of stack of a goroutine that does an systemstack call.
+		// We will allow it to be copied even though we don't
+		// have full GC info for it (because it is written in asm).
+		return true
+	}
+	if targetpc != f.entry {
+		targetpc--
+	}
+	pcdata := pcdatavalue(f, _PCDATA_StackMapIndex, targetpc)
+	if pcdata == -1 {
+		pcdata = 0 // in prologue
+	}
+
+	// Adjust local variables if stack frame has been allocated.
+	size := frame.varp - frame.sp
+	var minsize uintptr
+	if thechar != '6' && thechar != '8' {
+		minsize = ptrSize
+	} else {
+		minsize = 0
+	}
+	if size > minsize {
+		var bv bitvector
+		stackmap := (*stackmap)(funcdata(f, _FUNCDATA_LocalsPointerMaps))
+		if stackmap == nil || stackmap.n <= 0 {
+			print("runtime: frame ", funcname(f), " untyped locals ", hex(frame.varp-size), "+", hex(size), "\n")
+			gothrow("missing stackmap")
+		}
+		// Locals bitmap information, scan just the pointers in locals.
+		if pcdata < 0 || pcdata >= stackmap.n {
+			// don't know where we are
+			print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " locals stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
+			gothrow("bad symbol table")
+		}
+		bv = stackmapdata(stackmap, pcdata)
+		size = (uintptr(bv.n) * ptrSize) / _BitsPerPointer
+		if stackDebug >= 3 {
+			print("      locals ", pcdata, "/", stackmap.n, " ", size/ptrSize, " words ", bv.bytedata, "\n")
+		}
+		adjustpointers(unsafe.Pointer(frame.varp-size), &bv, adjinfo, f)
+	}
+
+	// Adjust arguments.
+	if frame.arglen > 0 {
+		var bv bitvector
+		if frame.argmap != nil {
+			bv = *frame.argmap
+		} else {
+			stackmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps))
+			if stackmap == nil || stackmap.n <= 0 {
+				print("runtime: frame ", funcname(f), " untyped args ", frame.argp, "+", uintptr(frame.arglen), "\n")
+				gothrow("missing stackmap")
+			}
+			if pcdata < 0 || pcdata >= stackmap.n {
+				// don't know where we are
+				print("runtime: pcdata is ", pcdata, " and ", stackmap.n, " args stack map entries for ", funcname(f), " (targetpc=", targetpc, ")\n")
+				gothrow("bad symbol table")
+			}
+			bv = stackmapdata(stackmap, pcdata)
+		}
+		if stackDebug >= 3 {
+			print("      args\n")
+		}
+		adjustpointers(unsafe.Pointer(frame.argp), &bv, adjinfo, nil)
+	}
+	return true
+}
+
+func adjustctxt(gp *g, adjinfo *adjustinfo) {
+	adjustpointer(adjinfo, (unsafe.Pointer)(&gp.sched.ctxt))
+}
+
+func adjustdefers(gp *g, adjinfo *adjustinfo) {
+	// Adjust defer argument blocks the same way we adjust active stack frames.
+	tracebackdefers(gp, adjustframe, noescape(unsafe.Pointer(adjinfo)))
+
+	// Adjust pointers in the Defer structs.
+	// Defer structs themselves are never on the stack.
+	for d := gp._defer; d != nil; d = d.link {
+		adjustpointer(adjinfo, (unsafe.Pointer)(&d.fn))
+		adjustpointer(adjinfo, (unsafe.Pointer)(&d.argp))
+		adjustpointer(adjinfo, (unsafe.Pointer)(&d._panic))
+	}
+}
+
+func adjustpanics(gp *g, adjinfo *adjustinfo) {
+	// Panics are on stack and already adjusted.
+	// Update pointer to head of list in G.
+	adjustpointer(adjinfo, (unsafe.Pointer)(&gp._panic))
+}
+
+func adjustsudogs(gp *g, adjinfo *adjustinfo) {
+	// the data elements pointed to by a SudoG structure
+	// might be in the stack.
+	for s := gp.waiting; s != nil; s = s.waitlink {
+		adjustpointer(adjinfo, (unsafe.Pointer)(&s.elem))
+		adjustpointer(adjinfo, (unsafe.Pointer)(&s.selectdone))
+	}
+}
+
+func fillstack(stk stack, b byte) {
+	for p := stk.lo; p < stk.hi; p++ {
+		*(*byte)(unsafe.Pointer(p)) = b
+	}
+}
+
+// Copies gp's stack to a new stack of a different size.
+// Caller must have changed gp status to Gcopystack.
+func copystack(gp *g, newsize uintptr) {
+	if gp.syscallsp != 0 {
+		gothrow("stack growth not allowed in system call")
+	}
+	old := gp.stack
+	if old.lo == 0 {
+		gothrow("nil stackbase")
+	}
+	used := old.hi - gp.sched.sp
+
+	// allocate new stack
+	new := stackalloc(uint32(newsize))
+	if stackPoisonCopy != 0 {
+		fillstack(new, 0xfd)
+	}
+	if stackDebug >= 1 {
+		print("copystack gp=", gp, " [", hex(old.lo), " ", hex(old.hi-used), " ", hex(old.hi), "]/", old.hi-old.lo, " -> [", hex(new.lo), " ", hex(new.hi-used), " ", hex(new.hi), "]/", newsize, "\n")
+	}
+
+	// adjust pointers in the to-be-copied frames
+	var adjinfo adjustinfo
+	adjinfo.old = old
+	adjinfo.delta = new.hi - old.hi
+	gentraceback(^uintptr(0), ^uintptr(0), 0, gp, 0, nil, 0x7fffffff, adjustframe, noescape(unsafe.Pointer(&adjinfo)), 0)
+
+	// adjust other miscellaneous things that have pointers into stacks.
+	adjustctxt(gp, &adjinfo)
+	adjustdefers(gp, &adjinfo)
+	adjustpanics(gp, &adjinfo)
+	adjustsudogs(gp, &adjinfo)
+
+	// copy the stack to the new location
+	if stackPoisonCopy != 0 {
+		fillstack(new, 0xfb)
+	}
+	memmove(unsafe.Pointer(new.hi-used), unsafe.Pointer(old.hi-used), used)
+
+	// Swap out old stack for new one
+	gp.stack = new
+	gp.stackguard0 = new.lo + _StackGuard // NOTE: might clobber a preempt request
+	gp.sched.sp = new.hi - used
+
+	// free old stack
+	if stackPoisonCopy != 0 {
+		fillstack(old, 0xfc)
+	}
+	if newsize > old.hi-old.lo {
+		// growing, free stack immediately
+		stackfree(old)
+	} else {
+		// shrinking, queue up free operation.  We can't actually free the stack
+		// just yet because we might run into the following situation:
+		// 1) GC starts, scans a SudoG but does not yet mark the SudoG.elem pointer
+		// 2) The stack that pointer points to is shrunk
+		// 3) The old stack is freed
+		// 4) The containing span is marked free
+		// 5) GC attempts to mark the SudoG.elem pointer.  The marking fails because
+		//    the pointer looks like a pointer into a free span.
+		// By not freeing, we prevent step #4 until GC is done.
+		lock(&stackpoolmu)
+		*(*stack)(unsafe.Pointer(old.lo)) = stackfreequeue
+		stackfreequeue = old
+		unlock(&stackpoolmu)
+	}
+}
+
+// round x up to a power of 2.
+func round2(x int32) int32 {
+	s := uint(0)
+	for 1<<s < x {
+		s++
+	}
+	return 1 << s
+}
+
+// Called from runtime·morestack when more stack is needed.
+// Allocate larger stack and relocate to new stack.
+// Stack growth is multiplicative, for constant amortized cost.
+//
+// g->atomicstatus will be Grunning or Gscanrunning upon entry.
+// If the GC is trying to stop this g then it will set preemptscan to true.
+func newstack() {
+	thisg := getg()
+	// TODO: double check all gp. shouldn't be getg().
+	if thisg.m.morebuf.g.stackguard0 == stackFork {
+		gothrow("stack growth after fork")
+	}
+	if thisg.m.morebuf.g != thisg.m.curg {
+		print("runtime: newstack called from g=", thisg.m.morebuf.g, "\n"+"\tm=", thisg.m, " m->curg=", thisg.m.curg, " m->g0=", thisg.m.g0, " m->gsignal=", thisg.m.gsignal, "\n")
+		morebuf := thisg.m.morebuf
+		traceback(morebuf.pc, morebuf.sp, morebuf.lr, morebuf.g)
+		gothrow("runtime: wrong goroutine in newstack")
+	}
+	if thisg.m.curg.throwsplit {
+		gp := thisg.m.curg
+		// Update syscallsp, syscallpc in case traceback uses them.
+		morebuf := thisg.m.morebuf
+		gp.syscallsp = morebuf.sp
+		gp.syscallpc = morebuf.pc
+		print("runtime: newstack sp=", hex(gp.sched.sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
+			"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
+			"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
+		gothrow("runtime: stack split at bad time")
+	}
+
+	// The goroutine must be executing in order to call newstack,
+	// so it must be Grunning or Gscanrunning.
+
+	gp := thisg.m.curg
+	morebuf := thisg.m.morebuf
+	thisg.m.morebuf.pc = 0
+	thisg.m.morebuf.lr = 0
+	thisg.m.morebuf.sp = 0
+	thisg.m.morebuf.g = nil
+
+	casgstatus(gp, _Grunning, _Gwaiting)
+	gp.waitreason = "stack growth"
+
+	rewindmorestack(&gp.sched)
+
+	if gp.stack.lo == 0 {
+		gothrow("missing stack in newstack")
+	}
+	sp := gp.sched.sp
+	if thechar == '6' || thechar == '8' {
+		// The call to morestack cost a word.
+		sp -= ptrSize
+	}
+	if stackDebug >= 1 || sp < gp.stack.lo {
+		print("runtime: newstack sp=", hex(sp), " stack=[", hex(gp.stack.lo), ", ", hex(gp.stack.hi), "]\n",
+			"\tmorebuf={pc:", hex(morebuf.pc), " sp:", hex(morebuf.sp), " lr:", hex(morebuf.lr), "}\n",
+			"\tsched={pc:", hex(gp.sched.pc), " sp:", hex(gp.sched.sp), " lr:", hex(gp.sched.lr), " ctxt:", gp.sched.ctxt, "}\n")
+	}
+	if sp < gp.stack.lo {
+		print("runtime: gp=", gp, ", gp->status=", hex(readgstatus(gp)), "\n ")
+		print("runtime: split stack overflow: ", hex(sp), " < ", hex(gp.stack.lo), "\n")
+		gothrow("runtime: split stack overflow")
+	}
+
+	if gp.sched.ctxt != nil {
+		// morestack wrote sched.ctxt on its way in here,
+		// without a write barrier. Run the write barrier now.
+		// It is not possible to be preempted between then
+		// and now, so it's okay.
+		writebarrierptr_nostore((*uintptr)(unsafe.Pointer(&gp.sched.ctxt)), uintptr(gp.sched.ctxt))
+	}
+
+	if gp.stackguard0 == stackPreempt {
+		if gp == thisg.m.g0 {
+			gothrow("runtime: preempt g0")
+		}
+		if thisg.m.p == nil && thisg.m.locks == 0 {
+			gothrow("runtime: g is running but p is not")
+		}
+		if gp.preemptscan {
+			gcphasework(gp)
+			casgstatus(gp, _Gwaiting, _Grunning)
+			gp.stackguard0 = gp.stack.lo + _StackGuard
+			gp.preempt = false
+			gp.preemptscan = false // Tells the GC premption was successful.
+			gogo(&gp.sched)        // never return
+		}
+
+		// Be conservative about where we preempt.
+		// We are interested in preempting user Go code, not runtime code.
+		if thisg.m.locks != 0 || thisg.m.mallocing != 0 || thisg.m.gcing != 0 || thisg.m.p.status != _Prunning {
+			// Let the goroutine keep running for now.
+			// gp->preempt is set, so it will be preempted next time.
+			gp.stackguard0 = gp.stack.lo + _StackGuard
+			casgstatus(gp, _Gwaiting, _Grunning)
+			gogo(&gp.sched) // never return
+		}
+
+		// Act like goroutine called runtime.Gosched.
+		casgstatus(gp, _Gwaiting, _Grunning)
+		gosched_m(gp) // never return
+	}
+
+	// Allocate a bigger segment and move the stack.
+	oldsize := int(gp.stack.hi - gp.stack.lo)
+	newsize := oldsize * 2
+	if uintptr(newsize) > maxstacksize {
+		print("runtime: goroutine stack exceeds ", maxstacksize, "-byte limit\n")
+		gothrow("stack overflow")
+	}
+
+	oldstatus := readgstatus(gp)
+	oldstatus &^= _Gscan
+	casgstatus(gp, oldstatus, _Gcopystack) // oldstatus is Gwaiting or Grunnable
+
+	// The concurrent GC will not scan the stack while we are doing the copy since
+	// the gp is in a Gcopystack status.
+	copystack(gp, uintptr(newsize))
+	if stackDebug >= 1 {
+		print("stack grow done\n")
+	}
+	casgstatus(gp, _Gcopystack, _Grunning)
+	gogo(&gp.sched)
+}
+
+//go:nosplit
+func nilfunc() {
+	*(*uint8)(nil) = 0
+}
+
+// adjust Gobuf as if it executed a call to fn
+// and then did an immediate gosave.
+func gostartcallfn(gobuf *gobuf, fv *funcval) {
+	var fn unsafe.Pointer
+	if fv != nil {
+		fn = (unsafe.Pointer)(fv.fn)
+	} else {
+		fn = unsafe.Pointer(funcPC(nilfunc))
+	}
+	gostartcall(gobuf, fn, (unsafe.Pointer)(fv))
+}
+
+// Maybe shrink the stack being used by gp.
+// Called at garbage collection time.
+func shrinkstack(gp *g) {
+	if readgstatus(gp) == _Gdead {
+		if gp.stack.lo != 0 {
+			// Free whole stack - it will get reallocated
+			// if G is used again.
+			stackfree(gp.stack)
+			gp.stack.lo = 0
+			gp.stack.hi = 0
+		}
+		return
+	}
+	if gp.stack.lo == 0 {
+		gothrow("missing stack in shrinkstack")
+	}
+
+	oldsize := gp.stack.hi - gp.stack.lo
+	newsize := oldsize / 2
+	if newsize < _FixedStack {
+		return // don't shrink below the minimum-sized stack
+	}
+	used := gp.stack.hi - gp.sched.sp
+	if used >= oldsize/4 {
+		return // still using at least 1/4 of the segment.
+	}
+
+	// We can't copy the stack if we're in a syscall.
+	// The syscall might have pointers into the stack.
+	if gp.syscallsp != 0 {
+		return
+	}
+	if _Windows != 0 && gp.m != nil && gp.m.libcallsp != 0 {
+		return
+	}
+
+	if stackDebug > 0 {
+		print("shrinking stack ", oldsize, "->", newsize, "\n")
+	}
+
+	// This is being done in a Gscan state and was initiated by the GC so no need to move to
+	// the Gcopystate.
+	// The world is stopped, so the goroutine must be Gwaiting or Grunnable,
+	// and what it is is not changing underfoot.
+	oldstatus := readgstatus(gp) &^ _Gscan
+	if oldstatus != _Gwaiting && oldstatus != _Grunnable {
+		gothrow("status is not Gwaiting or Grunnable")
+	}
+	casgstatus(gp, oldstatus, _Gcopystack)
+	copystack(gp, newsize)
+	casgstatus(gp, _Gcopystack, oldstatus)
+}
+
+// Do any delayed stack freeing that was queued up during GC.
+func shrinkfinish() {
+	lock(&stackpoolmu)
+	s := stackfreequeue
+	stackfreequeue = stack{}
+	unlock(&stackpoolmu)
+	for s.lo != 0 {
+		t := *(*stack)(unsafe.Pointer(s.lo))
+		stackfree(s)
+		s = t
+	}
+}
+
+//go:nosplit
+func morestackc() {
+	systemstack(func() {
+		gothrow("attempt to execute C code on Go stack")
+	})
+}
diff --git a/src/runtime/stack2.go b/src/runtime/stack2.go
new file mode 100644
index 0000000..c3718c2
--- /dev/null
+++ b/src/runtime/stack2.go
@@ -0,0 +1,106 @@
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+/*
+Stack layout parameters.
+Included both by runtime (compiled via 6c) and linkers (compiled via gcc).
+
+The per-goroutine g->stackguard is set to point StackGuard bytes
+above the bottom of the stack.  Each function compares its stack
+pointer against g->stackguard to check for overflow.  To cut one
+instruction from the check sequence for functions with tiny frames,
+the stack is allowed to protrude StackSmall bytes below the stack
+guard.  Functions with large frames don't bother with the check and
+always call morestack.  The sequences are (for amd64, others are
+similar):
+
+	guard = g->stackguard
+	frame = function's stack frame size
+	argsize = size of function arguments (call + return)
+
+	stack frame size <= StackSmall:
+		CMPQ guard, SP
+		JHI 3(PC)
+		MOVQ m->morearg, $(argsize << 32)
+		CALL morestack(SB)
+
+	stack frame size > StackSmall but < StackBig
+		LEAQ (frame-StackSmall)(SP), R0
+		CMPQ guard, R0
+		JHI 3(PC)
+		MOVQ m->morearg, $(argsize << 32)
+		CALL morestack(SB)
+
+	stack frame size >= StackBig:
+		MOVQ m->morearg, $((argsize << 32) | frame)
+		CALL morestack(SB)
+
+The bottom StackGuard - StackSmall bytes are important: there has
+to be enough room to execute functions that refuse to check for
+stack overflow, either because they need to be adjacent to the
+actual caller's frame (deferproc) or because they handle the imminent
+stack overflow (morestack).
+
+For example, deferproc might call malloc, which does one of the
+above checks (without allocating a full frame), which might trigger
+a call to morestack.  This sequence needs to fit in the bottom
+section of the stack.  On amd64, morestack's frame is 40 bytes, and
+deferproc's frame is 56 bytes.  That fits well within the
+StackGuard - StackSmall bytes at the bottom.
+The linkers explore all possible call traces involving non-splitting
+functions to make sure that this limit cannot be violated.
+*/
+
+const (
+	// StackSystem is a number of additional bytes to add
+	// to each stack below the usual guard area for OS-specific
+	// purposes like signal handling. Used on Windows and on
+	// Plan 9 because they do not use a separate stack.
+	_StackSystem = _Windows*512*ptrSize + _Plan9*512
+
+	// The minimum size of stack used by Go code
+	_StackMin = 2048
+
+	// The minimum stack size to allocate.
+	// The hackery here rounds FixedStack0 up to a power of 2.
+	_FixedStack0 = _StackMin + _StackSystem
+	_FixedStack1 = _FixedStack0 - 1
+	_FixedStack2 = _FixedStack1 | (_FixedStack1 >> 1)
+	_FixedStack3 = _FixedStack2 | (_FixedStack2 >> 2)
+	_FixedStack4 = _FixedStack3 | (_FixedStack3 >> 4)
+	_FixedStack5 = _FixedStack4 | (_FixedStack4 >> 8)
+	_FixedStack6 = _FixedStack5 | (_FixedStack5 >> 16)
+	_FixedStack  = _FixedStack6 + 1
+
+	// Functions that need frames bigger than this use an extra
+	// instruction to do the stack split check, to avoid overflow
+	// in case SP - framesize wraps below zero.
+	// This value can be no bigger than the size of the unmapped
+	// space at zero.
+	_StackBig = 4096
+
+	// The stack guard is a pointer this many bytes above the
+	// bottom of the stack.
+	_StackGuard = 512 + _StackSystem
+
+	// After a stack split check the SP is allowed to be this
+	// many bytes below the stack guard.  This saves an instruction
+	// in the checking sequence for tiny frames.
+	_StackSmall = 128
+
+	// The maximum number of bytes that a chain of NOSPLIT
+	// functions can use.
+	_StackLimit = _StackGuard - _StackSystem - _StackSmall
+)
+
+// Goroutine preemption request.
+// Stored into g->stackguard0 to cause split stack check failure.
+// Must be greater than any real sp.
+// 0xfffffade in hex.
+const (
+	_StackPreempt = uintptrMask & -1314
+	_StackFork    = uintptrMask & -1234
+)
diff --git a/src/runtime/string.c b/src/runtime/string.c
deleted file mode 100644
index 475ea2d..0000000
--- a/src/runtime/string.c
+++ /dev/null
@@ -1,226 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "arch_GOARCH.h"
-#include "malloc.h"
-#include "race.h"
-#include "textflag.h"
-
-String	runtime·emptystring;
-
-#pragma textflag NOSPLIT
-intgo
-runtime·findnull(byte *s)
-{
-	intgo l;
-
-	if(s == nil)
-		return 0;
-	for(l=0; s[l]!=0; l++)
-		;
-	return l;
-}
-
-intgo
-runtime·findnullw(uint16 *s)
-{
-	intgo l;
-
-	if(s == nil)
-		return 0;
-	for(l=0; s[l]!=0; l++)
-		;
-	return l;
-}
-
-uintptr runtime·maxstring = 256; // a hint for print
-
-#pragma textflag NOSPLIT
-String
-runtime·gostringnocopy(byte *str)
-{
-	String s;
-	uintptr ms;
-	
-	s.str = str;
-	s.len = runtime·findnull(str);
-	while(true) {
-		ms = runtime·maxstring;
-		if(s.len <= ms || runtime·casuintptr(&runtime·maxstring, ms, s.len))
-			return s;
-	}
-}
-
-// TODO: move this elsewhere
-enum
-{
-	Bit1	= 7,
-	Bitx	= 6,
-	Bit2	= 5,
-	Bit3	= 4,
-	Bit4	= 3,
-	Bit5	= 2,
-
-	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
-	T2	= ((1<<(Bit2+1))-1) ^ 0xFF,	/* 1100 0000 */
-	T3	= ((1<<(Bit3+1))-1) ^ 0xFF,	/* 1110 0000 */
-	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
-
-	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
-	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
-	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
-
-	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
-
-	Runeerror	= 0xFFFD,
-
-	SurrogateMin = 0xD800,
-	SurrogateMax = 0xDFFF,
-
-	Runemax	= 0x10FFFF,	/* maximum rune value */
-};
-
-static int32
-runetochar(byte *str, int32 rune)  /* note: in original, arg2 was pointer */
-{
-	/* Runes are signed, so convert to unsigned for range check. */
-	uint32 c;
-
-	/*
-	 * one character sequence
-	 *	00000-0007F => 00-7F
-	 */
-	c = rune;
-	if(c <= Rune1) {
-		str[0] = c;
-		return 1;
-	}
-
-	/*
-	 * two character sequence
-	 *	0080-07FF => T2 Tx
-	 */
-	if(c <= Rune2) {
-		str[0] = T2 | (c >> 1*Bitx);
-		str[1] = Tx | (c & Maskx);
-		return 2;
-	}
-
-	/*
-	 * If the Rune is out of range or a surrogate half, convert it to the error rune.
-	 * Do this test here because the error rune encodes to three bytes.
-	 * Doing it earlier would duplicate work, since an out of range
-	 * Rune wouldn't have fit in one or two bytes.
-	 */
-	if (c > Runemax)
-		c = Runeerror;
-	if (SurrogateMin <= c && c <= SurrogateMax)
-		c = Runeerror;
-
-	/*
-	 * three character sequence
-	 *	0800-FFFF => T3 Tx Tx
-	 */
-	if (c <= Rune3) {
-		str[0] = T3 |  (c >> 2*Bitx);
-		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
-		str[2] = Tx |  (c & Maskx);
-		return 3;
-	}
-
-	/*
-	 * four character sequence (21-bit value)
-	 *     10000-1FFFFF => T4 Tx Tx Tx
-	 */
-	str[0] = T4 | (c >> 3*Bitx);
-	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
-	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
-	str[3] = Tx | (c & Maskx);
-	return 4;
-}
-
-String runtime·gostringsize(intgo);
-
-String
-runtime·gostringw(uint16 *str)
-{
-	intgo n1, n2, i;
-	byte buf[8];
-	String s;
-
-	n1 = 0;
-	for(i=0; str[i]; i++)
-		n1 += runetochar(buf, str[i]);
-	s = runtime·gostringsize(n1+4);
-	n2 = 0;
-	for(i=0; str[i]; i++) {
-		// check for race
-		if(n2 >= n1)
-			break;
-		n2 += runetochar(s.str+n2, str[i]);
-	}
-	s.len = n2;
-	s.str[s.len] = 0;
-	return s;
-}
-
-int32
-runtime·strcmp(byte *s1, byte *s2)
-{
-	uintptr i;
-	byte c1, c2;
-
-	for(i=0;; i++) {
-		c1 = s1[i];
-		c2 = s2[i];
-		if(c1 < c2)
-			return -1;
-		if(c1 > c2)
-			return +1;
-		if(c1 == 0)
-			return 0;
-	}
-}
-
-int32
-runtime·strncmp(byte *s1, byte *s2, uintptr n)
-{
-	uintptr i;
-	byte c1, c2;
-
-	for(i=0; i<n; i++) {
-		c1 = s1[i];
-		c2 = s2[i];
-		if(c1 < c2)
-			return -1;
-		if(c1 > c2)
-			return +1;
-		if(c1 == 0)
-			break;
-	}
-	return 0;
-}
-
-byte*
-runtime·strstr(byte *s1, byte *s2)
-{
-	byte *sp1, *sp2;
-
-	if(*s2 == 0)
-		return s1;
-	for(; *s1; s1++) {
-		if(*s1 != *s2)
-			continue;
-		sp1 = s1;
-		sp2 = s2;
-		for(;;) {
-			if(*sp2 == 0)
-				return s1;
-			if(*sp1++ != *sp2++)
-				break;
-		}
-	}
-	return nil;
-}
diff --git a/src/runtime/string.go b/src/runtime/string.go
index 8822816..e01bc3b 100644
--- a/src/runtime/string.go
+++ b/src/runtime/string.go
@@ -221,7 +221,7 @@
 
 // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
 func rawruneslice(size int) (b []rune) {
-	if uintptr(size) > maxmem/4 {
+	if uintptr(size) > _MaxMem/4 {
 		gothrow("out of memory")
 	}
 	mem := goroundupsize(uintptr(size) * 4)
@@ -251,9 +251,6 @@
 	return s
 }
 
-//go:noescape
-func findnull(*byte) int
-
 func gostring(p *byte) string {
 	l := findnull(p)
 	if l == 0 {
@@ -292,3 +289,12 @@
 func hasprefix(s, t string) bool {
 	return len(s) >= len(t) && s[:len(t)] == t
 }
+
+func goatoi(s string) int {
+	n := 0
+	for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
+		n = n*10 + int(s[0]) - '0'
+		s = s[1:]
+	}
+	return n
+}
diff --git a/src/runtime/string1.go b/src/runtime/string1.go
new file mode 100644
index 0000000..35cde43
--- /dev/null
+++ b/src/runtime/string1.go
@@ -0,0 +1,108 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+//go:nosplit
+func findnull(s *byte) int {
+	if s == nil {
+		return 0
+	}
+	p := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s))
+	l := 0
+	for p[l] != 0 {
+		l++
+	}
+	return l
+}
+
+func findnullw(s *uint16) int {
+	if s == nil {
+		return 0
+	}
+	p := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(s))
+	l := 0
+	for p[l] != 0 {
+		l++
+	}
+	return l
+}
+
+var maxstring uintptr = 256 // a hint for print
+
+//go:nosplit
+func gostringnocopy(str *byte) string {
+	var s string
+	sp := (*stringStruct)(unsafe.Pointer(&s))
+	sp.str = unsafe.Pointer(str)
+	sp.len = findnull(str)
+	for {
+		ms := maxstring
+		if uintptr(len(s)) <= ms || casuintptr(&maxstring, ms, uintptr(len(s))) {
+			break
+		}
+	}
+	return s
+}
+
+func gostringw(strw *uint16) string {
+	var buf [8]byte
+	str := (*[_MaxMem/2/2 - 1]uint16)(unsafe.Pointer(strw))
+	n1 := 0
+	for i := 0; str[i] != 0; i++ {
+		n1 += runetochar(buf[:], rune(str[i]))
+	}
+	s, b := rawstring(n1 + 4)
+	n2 := 0
+	for i := 0; str[i] != 0; i++ {
+		// check for race
+		if n2 >= n1 {
+			break
+		}
+		n2 += runetochar(b[n2:], rune(str[i]))
+	}
+	b[n2] = 0 // for luck
+	return s[:n2]
+}
+
+func strcmp(s1, s2 *byte) int32 {
+	p1 := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s1))
+	p2 := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s2))
+
+	for i := uintptr(0); ; i++ {
+		c1 := p1[i]
+		c2 := p2[i]
+		if c1 < c2 {
+			return -1
+		}
+		if c1 > c2 {
+			return +1
+		}
+		if c1 == 0 {
+			return 0
+		}
+	}
+}
+
+func strncmp(s1, s2 *byte, n uintptr) int32 {
+	p1 := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s1))
+	p2 := (*[_MaxMem/2 - 1]byte)(unsafe.Pointer(s2))
+
+	for i := uintptr(0); i < n; i++ {
+		c1 := p1[i]
+		c2 := p2[i]
+		if c1 < c2 {
+			return -1
+		}
+		if c1 > c2 {
+			return +1
+		}
+		if c1 == 0 {
+			break
+		}
+	}
+	return 0
+}
diff --git a/src/runtime/stubs.go b/src/runtime/stubs.go
index 9889567..217307a 100644
--- a/src/runtime/stubs.go
+++ b/src/runtime/stubs.go
@@ -23,12 +23,7 @@
 	return unsafe.Pointer(uintptr(p) + delta)
 }
 
-// in runtime.c
 func getg() *g
-func acquirem() *m
-func releasem(mp *m)
-func gomcache() *mcache
-func readgstatus(*g) uint32 // proc.c
 
 // mcall switches from the g to the g0 stack and invokes fn(g),
 // where g is the goroutine that made the call.
@@ -43,95 +38,30 @@
 //go:noescape
 func mcall(fn func(*g))
 
-// onM switches from the g to the g0 stack and invokes fn().
-// When fn returns, onM switches back to the g and returns,
-// continuing execution on the g stack.
-// If arguments must be passed to fn, they can be written to
-// g->m->ptrarg (pointers) and g->m->scalararg (non-pointers)
-// before the call and then consulted during fn.
-// Similarly, fn can pass return values back in those locations.
-// If fn is written in Go, it can be a closure, which avoids the need for
-// ptrarg and scalararg entirely.
-// After reading values out of ptrarg and scalararg it is conventional
-// to zero them to avoid (memory or information) leaks.
+// systemstack runs fn on a system stack.
+// If systemstack is called from the per-OS-thread (g0) stack, or
+// if systemstack is called from the signal handling (gsignal) stack,
+// systemstack calls fn directly and returns.
+// Otherwise, systemstack is being called from the limited stack
+// of an ordinary goroutine. In this case, systemstack switches
+// to the per-OS-thread stack, calls fn, and switches back.
+// It is common to use a func literal as the argument, in order
+// to share inputs and outputs with the code around the call
+// to system stack:
 //
-// If onM is called from a g0 stack, it invokes fn and returns,
-// without any stack switches.
-//
-// If onM is called from a gsignal stack, it crashes the program.
-// The implication is that functions used in signal handlers must
-// not use onM.
-//
-// NOTE(rsc): We could introduce a separate onMsignal that is
-// like onM but if called from a gsignal stack would just run fn on
-// that stack. The caller of onMsignal would be required to save the
-// old values of ptrarg/scalararg and restore them when the call
-// was finished, in case the signal interrupted an onM sequence
-// in progress on the g or g0 stacks. Until there is a clear need for this,
-// we just reject onM in signal handling contexts entirely.
+//	... set up y ...
+//	systemstack(func() {
+//		x = bigcall(y)
+//	})
+//	... use x ...
 //
 //go:noescape
-func onM(fn func())
+func systemstack(fn func())
 
-// onMsignal is like onM but is allowed to be used in code that
-// might run on the gsignal stack. Code running on a signal stack
-// may be interrupting an onM sequence on the main stack, so
-// if the onMsignal calling sequence writes to ptrarg/scalararg,
-// it must first save the old values and then restore them when
-// finished. As an exception to the rule, it is fine not to save and
-// restore the values if the program is trying to crash rather than
-// return from the signal handler.
-// Once all the runtime is written in Go, there will be no ptrarg/scalararg
-// and the distinction between onM and onMsignal (and perhaps mcall)
-// can go away.
-//
-// If onMsignal is called from a gsignal stack, it invokes fn directly,
-// without a stack switch. Otherwise onMsignal behaves like onM.
-//
-//go:noescape
-func onM_signalok(fn func())
-
-func badonm() {
-	gothrow("onM called from signal goroutine")
+func badsystemstack() {
+	gothrow("systemstack called from unexpected goroutine")
 }
 
-// C functions that run on the M stack.
-// Call using mcall.
-func gosched_m(*g)
-func park_m(*g)
-func recovery_m(*g)
-
-// More C functions that run on the M stack.
-// Call using onM.
-func mcacheRefill_m()
-func largeAlloc_m()
-func gc_m()
-func gcscan_m()
-func gcmark_m()
-func gccheckmark_m()
-func gccheckmarkenable_m()
-func gccheckmarkdisable_m()
-func gcinstallmarkwb_m()
-func gcinstalloffwb_m()
-func gcmarknewobject_m()
-func gcmarkwb_m()
-func finishsweep_m()
-func scavenge_m()
-func setFinalizer_m()
-func removeFinalizer_m()
-func markallocated_m()
-func unrollgcprog_m()
-func unrollgcproginplace_m()
-func setgcpercent_m()
-func setmaxthreads_m()
-func ready_m()
-func deferproc_m()
-func goexit_m()
-func startpanic_m()
-func dopanic_m()
-func readmemstats_m()
-func writeheapdump_m()
-
 // memclr clears n bytes starting at ptr.
 // in memclr_*.s
 //go:noescape
@@ -142,12 +72,6 @@
 //go:noescape
 func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr)
 
-func starttheworld()
-func stoptheworld()
-func newextram()
-func lockOSThread()
-func unlockOSThread()
-
 // exported value for testing
 var hashLoad = loadFactor
 
@@ -169,16 +93,9 @@
 	return unsafe.Pointer(x ^ 0)
 }
 
-func entersyscall()
-func reentersyscall(pc uintptr, sp unsafe.Pointer)
-func entersyscallblock()
-func exitsyscall()
-
 func cgocallback(fn, frame unsafe.Pointer, framesize uintptr)
 func gogo(buf *gobuf)
 func gosave(buf *gobuf)
-func read(fd int32, p unsafe.Pointer, n int32) int32
-func close(fd int32) int32
 func mincore(addr unsafe.Pointer, n uintptr, dst *byte) int32
 
 //go:noescape
@@ -186,33 +103,32 @@
 func exit1(code int32)
 func asminit()
 func setg(gg *g)
-func exit(code int32)
 func breakpoint()
-func nanotime() int64
-func usleep(usec uint32)
 
-// careful: cputicks is not guaranteed to be monotonic!  In particular, we have
-// noticed drift between cpus on certain os/arch combinations.  See issue 8976.
-func cputicks() int64
-
-func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
-func munmap(addr unsafe.Pointer, n uintptr)
-func madvise(addr unsafe.Pointer, n uintptr, flags int32)
 func reflectcall(fn, arg unsafe.Pointer, n uint32, retoffset uint32)
-func osyield()
 func procyield(cycles uint32)
 func cgocallback_gofunc(fv *funcval, frame unsafe.Pointer, framesize uintptr)
-func readgogc() int32
-func purgecachedstats(c *mcache)
-func gostringnocopy(b *byte) string
 func goexit()
 
 //go:noescape
-func write(fd uintptr, p unsafe.Pointer, n int32) int32
-
-//go:noescape
 func cas(ptr *uint32, old, new uint32) bool
 
+// casp cannot have a go:noescape annotation, because
+// while ptr and old do not escape, new does. If new is marked as
+// not escaping, the compiler will make incorrect escape analysis
+// decisions about the value being xchg'ed.
+// Instead, make casp a wrapper around the actual atomic.
+// When calling the wrapper we mark ptr as noescape explicitly.
+
+//go:nosplit
+func casp(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool {
+	return casp1((*unsafe.Pointer)(noescape(unsafe.Pointer(ptr))), noescape(old), new)
+}
+
+func casp1(ptr *unsafe.Pointer, old, new unsafe.Pointer) bool
+
+func nop() // call to prevent inlining of function body
+
 //go:noescape
 func casuintptr(ptr *uintptr, old, new uintptr) bool
 
@@ -268,18 +184,10 @@
 //go:noescape
 func asmcgocall_errno(fn, arg unsafe.Pointer) int32
 
-//go:noescape
-func open(name *byte, mode, perm int32) int32
-
-//go:noescape
-func gotraceback(*bool) int32
-
+// argp used in Defer structs when there is no argp.
 const _NoArgs = ^uintptr(0)
 
-func newstack()
-func newproc()
 func morestack()
-func mstart()
 func rt0_go()
 
 // return0 is a stub used to return 0 from deferproc.
@@ -321,3 +229,5 @@
 func call268435456(fn, arg unsafe.Pointer, n, retoffset uint32)
 func call536870912(fn, arg unsafe.Pointer, n, retoffset uint32)
 func call1073741824(fn, arg unsafe.Pointer, n, retoffset uint32)
+
+func systemstack_switch()
diff --git a/src/runtime/stubs2.go b/src/runtime/stubs2.go
new file mode 100644
index 0000000..526b3c5
--- /dev/null
+++ b/src/runtime/stubs2.go
@@ -0,0 +1,27 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !solaris
+
+package runtime
+
+import "unsafe"
+
+func read(fd int32, p unsafe.Pointer, n int32) int32
+func close(fd int32) int32
+
+func exit(code int32)
+func nanotime() int64
+func usleep(usec uint32)
+
+func mmap(addr unsafe.Pointer, n uintptr, prot, flags, fd int32, off uint32) unsafe.Pointer
+func munmap(addr unsafe.Pointer, n uintptr)
+
+//go:noescape
+func write(fd uintptr, p unsafe.Pointer, n int32) int32
+
+//go:noescape
+func open(name *byte, mode, perm int32) int32
+
+func madvise(addr unsafe.Pointer, n uintptr, flags int32)
diff --git a/src/runtime/symtab.go b/src/runtime/symtab.go
index 45d107b..749a289 100644
--- a/src/runtime/symtab.go
+++ b/src/runtime/symtab.go
@@ -22,8 +22,7 @@
 
 // funcdata.h
 const (
-	_PCDATA_ArgSize             = 0
-	_PCDATA_StackMapIndex       = 1
+	_PCDATA_StackMapIndex       = 0
 	_FUNCDATA_ArgsPointerMaps   = 0
 	_FUNCDATA_LocalsPointerMaps = 1
 	_FUNCDATA_DeadValueMaps     = 2
@@ -122,8 +121,8 @@
 func (f *Func) FileLine(pc uintptr) (file string, line int) {
 	// Pass strict=false here, because anyone can call this function,
 	// and they might just be wrong about targetpc belonging to f.
-	line = int(funcline1(f.raw(), pc, &file, false))
-	return file, line
+	file, line32 := funcline1(f.raw(), pc, false)
+	return file, int(line32)
 }
 
 func findfunc(pc uintptr) *_func {
@@ -208,20 +207,19 @@
 	return gostringnocopy(funcname(f))
 }
 
-func funcline1(f *_func, targetpc uintptr, file *string, strict bool) int32 {
-	*file = "?"
+func funcline1(f *_func, targetpc uintptr, strict bool) (file string, line int32) {
 	fileno := int(pcvalue(f, f.pcfile, targetpc, strict))
-	line := pcvalue(f, f.pcln, targetpc, strict)
+	line = pcvalue(f, f.pcln, targetpc, strict)
 	if fileno == -1 || line == -1 || fileno >= len(filetab) {
 		// print("looking for ", hex(targetpc), " in ", gofuncname(f), " got file=", fileno, " line=", lineno, "\n")
-		return 0
+		return "?", 0
 	}
-	*file = gostringnocopy(&pclntable[filetab[fileno]])
-	return line
+	file = gostringnocopy(&pclntable[filetab[fileno]])
+	return
 }
 
-func funcline(f *_func, targetpc uintptr, file *string) int32 {
-	return funcline1(f, targetpc, file, true)
+func funcline(f *_func, targetpc uintptr) (file string, line int32) {
+	return funcline1(f, targetpc, true)
 }
 
 func funcspdelta(f *_func, targetpc uintptr) int32 {
diff --git a/src/runtime/sys_arm.c b/src/runtime/sys_arm.c
deleted file mode 100644
index a65560e..0000000
--- a/src/runtime/sys_arm.c
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright 2013 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-
-// adjust Gobuf as if it executed a call to fn with context ctxt
-// and then did an immediate Gosave.
-void
-runtime·gostartcall(Gobuf *gobuf, void (*fn)(void), void *ctxt)
-{
-	if(gobuf->lr != 0)
-		runtime·throw("invalid use of gostartcall");
-	gobuf->lr = gobuf->pc;
-	gobuf->pc = (uintptr)fn;
-	gobuf->ctxt = ctxt;
-}
-
-// Called to rewind context saved during morestack back to beginning of function.
-// To help us, the linker emits a jmp back to the beginning right after the
-// call to morestack. We just have to decode and apply that jump.
-void
-runtime·rewindmorestack(Gobuf *gobuf)
-{
-	uint32 inst;
-
-	inst = *(uint32*)gobuf->pc;
-	if((gobuf->pc&3) == 0 && (inst>>24) == 0x9a) {
-		//runtime·printf("runtime: rewind pc=%p to pc=%p\n", gobuf->pc, gobuf->pc + ((int32)(inst<<8)>>6) + 8);
-		gobuf->pc += ((int32)(inst<<8)>>6) + 8;
-		return;
-	}
-	runtime·printf("runtime: pc=%p %x\n", gobuf->pc, inst);
-	runtime·throw("runtime: misuse of rewindmorestack");
-}
diff --git a/src/runtime/sys_arm.go b/src/runtime/sys_arm.go
new file mode 100644
index 0000000..81777c7
--- /dev/null
+++ b/src/runtime/sys_arm.go
@@ -0,0 +1,35 @@
+// Copyright 2013 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// adjust Gobuf as if it executed a call to fn with context ctxt
+// and then did an immediate Gosave.
+func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
+	if buf.lr != 0 {
+		gothrow("invalid use of gostartcall")
+	}
+	buf.lr = buf.pc
+	buf.pc = uintptr(fn)
+	buf.ctxt = ctxt
+}
+
+// Called to rewind context saved during morestack back to beginning of function.
+// To help us, the linker emits a jmp back to the beginning right after the
+// call to morestack. We just have to decode and apply that jump.
+func rewindmorestack(buf *gobuf) {
+	var inst uint32
+	if buf.pc&3 == 0 && buf.pc != 0 {
+		inst = *(*uint32)(unsafe.Pointer(buf.pc))
+		if inst>>24 == 0x9a {
+			buf.pc += uintptr(int32(inst<<8)>>6) + 8
+			return
+		}
+	}
+
+	print("runtime: pc=", hex(buf.pc), " ", hex(inst), "\n")
+	gothrow("runtime: misuse of rewindmorestack")
+}
diff --git a/src/runtime/sys_darwin_386.s b/src/runtime/sys_darwin_386.s
index 3bf8b1d..7cb5695 100644
--- a/src/runtime/sys_darwin_386.s
+++ b/src/runtime/sys_darwin_386.s
@@ -6,7 +6,8 @@
 // See http://fxr.watson.org/fxr/source/bsd/kern/syscalls.c?v=xnu-1228
 // or /usr/include/sys/syscall.h (on a Mac) for system call numbers.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // Exit the entire program (like C exit)
diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s
index 8a8928e..0a955f9 100644
--- a/src/runtime/sys_darwin_amd64.s
+++ b/src/runtime/sys_darwin_amd64.s
@@ -11,7 +11,8 @@
 // The high 8 bits specify the kind of system call: 1=Mach, 2=BSD, 3=Machine-Dependent.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // Exit the entire program (like C exit)
diff --git a/src/runtime/sys_dragonfly_386.s b/src/runtime/sys_dragonfly_386.s
index 71ece9e..bb4903e 100644
--- a/src/runtime/sys_dragonfly_386.s
+++ b/src/runtime/sys_dragonfly_386.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 	
 TEXT runtime·sys_umtx_sleep(SB),NOSPLIT,$-4
diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s
index 2c75601..db07ed7 100644
--- a/src/runtime/sys_dragonfly_amd64.s
+++ b/src/runtime/sys_dragonfly_amd64.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 	
 TEXT runtime·sys_umtx_sleep(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_freebsd_386.s b/src/runtime/sys_freebsd_386.s
index 66d03c2..d1f67c3 100644
--- a/src/runtime/sys_freebsd_386.s
+++ b/src/runtime/sys_freebsd_386.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 	
 TEXT runtime·sys_umtx_op(SB),NOSPLIT,$-4
diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s
index 65f8c1a..84cee51 100644
--- a/src/runtime/sys_freebsd_amd64.s
+++ b/src/runtime/sys_freebsd_amd64.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // FreeBSD 8, FreeBSD 9, and older versions that I have checked
diff --git a/src/runtime/sys_freebsd_arm.s b/src/runtime/sys_freebsd_arm.s
index d875138..198b427 100644
--- a/src/runtime/sys_freebsd_arm.s
+++ b/src/runtime/sys_freebsd_arm.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // for EABI, as we don't support OABI
@@ -362,7 +363,7 @@
 	SWI $0
 	RET
 
-TEXT runtime·casp(SB),NOSPLIT,$0
+TEXT runtime·casp1(SB),NOSPLIT,$0
 	B	runtime·cas(SB)
 
 // TODO(minux): this is only valid for ARMv6+
diff --git a/src/runtime/sys_linux_386.s b/src/runtime/sys_linux_386.s
index 0f6d4bb..1861f23 100644
--- a/src/runtime/sys_linux_386.s
+++ b/src/runtime/sys_linux_386.s
@@ -6,7 +6,8 @@
 // System calls and other sys.stuff for 386, Linux
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 TEXT runtime·exit(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s
index d8d86ff..6d4dfdb 100644
--- a/src/runtime/sys_linux_amd64.s
+++ b/src/runtime/sys_linux_amd64.s
@@ -6,7 +6,8 @@
 // System calls and other sys.stuff for AMD64, Linux
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 TEXT runtime·exit(SB),NOSPLIT,$0-4
diff --git a/src/runtime/sys_linux_arm.s b/src/runtime/sys_linux_arm.s
index 033a036..21d97fd 100644
--- a/src/runtime/sys_linux_arm.s
+++ b/src/runtime/sys_linux_arm.s
@@ -6,7 +6,8 @@
 // System calls and other sys.stuff for arm, Linux
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // for EABI, as we don't support OABI
@@ -391,7 +392,7 @@
 	MOVB	R0, ret+12(FP)
 	RET
 
-TEXT runtime·casp(SB),NOSPLIT,$0
+TEXT runtime·casp1(SB),NOSPLIT,$0
 	B	runtime·cas(SB)
 
 TEXT runtime·osyield(SB),NOSPLIT,$0
diff --git a/src/runtime/sys_nacl_386.s b/src/runtime/sys_nacl_386.s
index 16cd721..85c8175 100644
--- a/src/runtime/sys_nacl_386.s
+++ b/src/runtime/sys_nacl_386.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 #include "syscall_nacl.h"
 
diff --git a/src/runtime/sys_nacl_amd64p32.s b/src/runtime/sys_nacl_amd64p32.s
index 9cfbef6..f5624ca 100644
--- a/src/runtime/sys_nacl_amd64p32.s
+++ b/src/runtime/sys_nacl_amd64p32.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 #include "syscall_nacl.h"
 
diff --git a/src/runtime/sys_nacl_arm.s b/src/runtime/sys_nacl_arm.s
index 432dead..ded95a8 100644
--- a/src/runtime/sys_nacl_arm.s
+++ b/src/runtime/sys_nacl_arm.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 #include "syscall_nacl.h"
 
@@ -300,7 +301,7 @@
 TEXT runtime·nacl_sysinfo(SB),NOSPLIT,$16
 	RET
 
-TEXT runtime·casp(SB),NOSPLIT,$0
+TEXT runtime·casp1(SB),NOSPLIT,$0
 	B	runtime·cas(SB)
 
 // This is only valid for ARMv6+, however, NaCl/ARM is only defined
diff --git a/src/runtime/sys_netbsd_386.s b/src/runtime/sys_netbsd_386.s
index 23f2f6b..509d6d4 100644
--- a/src/runtime/sys_netbsd_386.s
+++ b/src/runtime/sys_netbsd_386.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // Exit the entire program (like C exit)
diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s
index eb9766d..e26d606 100644
--- a/src/runtime/sys_netbsd_amd64.s
+++ b/src/runtime/sys_netbsd_amd64.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // int32 lwp_create(void *context, uintptr flags, void *lwpid)
diff --git a/src/runtime/sys_netbsd_arm.s b/src/runtime/sys_netbsd_arm.s
index 039a083..fa9bc57 100644
--- a/src/runtime/sys_netbsd_arm.s
+++ b/src/runtime/sys_netbsd_arm.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // Exit the entire program (like C exit)
@@ -330,7 +331,7 @@
 	SWI $0xa0005c	// sys_fcntl
 	RET
 
-TEXT runtime·casp(SB),NOSPLIT,$0
+TEXT runtime·casp1(SB),NOSPLIT,$0
 	B	runtime·cas(SB)
 
 // TODO(minux): this is only valid for ARMv6+
diff --git a/src/runtime/sys_openbsd_386.s b/src/runtime/sys_openbsd_386.s
index b1ae5ec..9390757 100644
--- a/src/runtime/sys_openbsd_386.s
+++ b/src/runtime/sys_openbsd_386.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 #define	CLOCK_MONOTONIC	$3
diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s
index 4e9db23..9dc0fb6 100644
--- a/src/runtime/sys_openbsd_amd64.s
+++ b/src/runtime/sys_openbsd_amd64.s
@@ -6,7 +6,8 @@
 // /usr/src/sys/kern/syscalls.master for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 #define CLOCK_MONOTONIC	$3
diff --git a/src/runtime/sys_plan9_386.s b/src/runtime/sys_plan9_386.s
index a41b562..b9db8cb 100644
--- a/src/runtime/sys_plan9_386.s
+++ b/src/runtime/sys_plan9_386.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // setldt(int entry, int address, int limit)
diff --git a/src/runtime/sys_plan9_amd64.s b/src/runtime/sys_plan9_amd64.s
index 3a96c2b..02c7c87 100644
--- a/src/runtime/sys_plan9_amd64.s
+++ b/src/runtime/sys_plan9_amd64.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // setldt(int entry, int address, int limit)
diff --git a/src/runtime/sys_solaris_amd64.s b/src/runtime/sys_solaris_amd64.s
index 3981893..54aeaea 100644
--- a/src/runtime/sys_solaris_amd64.s
+++ b/src/runtime/sys_solaris_amd64.s
@@ -6,14 +6,15 @@
 // /usr/include/sys/syscall.h for syscall numbers.
 //
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // This is needed by asm_amd64.s
 TEXT runtime·settls(SB),NOSPLIT,$8
 	RET
 
-// void libc·miniterrno(void *(*___errno)(void));
+// void libc_miniterrno(void *(*___errno)(void));
 //
 // Set the TLS errno pointer in M.
 //
@@ -40,7 +41,7 @@
 	SUBQ	$64, SP	// 16 bytes will do, but who knows in the future?
 	MOVQ	$3, DI	// CLOCK_REALTIME from <sys/time_impl.h>
 	MOVQ	SP, SI
-	MOVQ	libc·clock_gettime(SB), AX
+	MOVQ	libc_clock_gettime(SB), AX
 	CALL	AX
 	MOVQ	(SP), AX	// tv_sec from struct timespec
 	IMULQ	$1000000000, AX	// multiply into nanoseconds
@@ -53,7 +54,7 @@
 TEXT runtime·pipe1(SB),NOSPLIT,$0
 	SUBQ	$16, SP // 8 bytes will do, but stack has to be 16-byte alligned
 	MOVQ	SP, DI
-	MOVQ	libc·pipe(SB), AX
+	MOVQ	libc_pipe(SB), AX
 	CALL	AX
 	MOVL	0(SP), AX
 	MOVL	4(SP), DX
@@ -132,7 +133,7 @@
 	MOVQ	AX, (g_stack+stack_hi)(DX)
 	SUBQ	$(0x100000), AX		// stack size
 	MOVQ	AX, (g_stack+stack_lo)(DX)
-	ADDQ	$const_StackGuard, AX
+	ADDQ	$const__StackGuard, AX
 	MOVQ	AX, g_stackguard0(DX)
 	MOVQ	AX, g_stackguard1(DX)
 
@@ -320,13 +321,13 @@
 
 // Runs on OS stack. duration (in µs units) is in DI.
 TEXT runtime·usleep2(SB),NOSPLIT,$0
-	MOVQ	libc·usleep(SB), AX
+	MOVQ	libc_usleep(SB), AX
 	CALL	AX
 	RET
 
 // Runs on OS stack, called from runtime·osyield.
 TEXT runtime·osyield1(SB),NOSPLIT,$0
-	MOVQ	libc·sched_yield(SB), AX
+	MOVQ	libc_sched_yield(SB), AX
 	CALL	AX
 	RET
 
diff --git a/src/runtime/sys_windows_386.s b/src/runtime/sys_windows_386.s
index 13fb5bd..2793e52 100644
--- a/src/runtime/sys_windows_386.s
+++ b/src/runtime/sys_windows_386.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // void runtime·asmstdcall(void *c);
diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s
index 8b95f6d..5e5c2e7 100644
--- a/src/runtime/sys_windows_amd64.s
+++ b/src/runtime/sys_windows_amd64.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 // maxargs should be divisible by 2, as Windows stack
diff --git a/src/runtime/sys_x86.c b/src/runtime/sys_x86.c
deleted file mode 100644
index edbe47f..0000000
--- a/src/runtime/sys_x86.c
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright 2013 The Go Authors.  All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build amd64 amd64p32 386
-
-#include "runtime.h"
-
-// adjust Gobuf as it if executed a call to fn with context ctxt
-// and then did an immediate gosave.
-void
-runtime·gostartcall(Gobuf *gobuf, void (*fn)(void), void *ctxt)
-{
-	uintptr *sp;
-	
-	sp = (uintptr*)gobuf->sp;
-	if(sizeof(uintreg) > sizeof(uintptr))
-		*--sp = 0;
-	*--sp = (uintptr)gobuf->pc;
-	gobuf->sp = (uintptr)sp;
-	gobuf->pc = (uintptr)fn;
-	gobuf->ctxt = ctxt;
-	runtime·writebarrierptr_nostore(&gobuf->ctxt, ctxt);
-}
-
-// Called to rewind context saved during morestack back to beginning of function.
-// To help us, the linker emits a jmp back to the beginning right after the
-// call to morestack. We just have to decode and apply that jump.
-void
-runtime·rewindmorestack(Gobuf *gobuf)
-{
-	byte *pc;
-
-	pc = (byte*)gobuf->pc;
-	if(pc[0] == 0xe9) { // jmp 4-byte offset
-		gobuf->pc = gobuf->pc + 5 + *(int32*)(pc+1);
-		return;
-	}
-	if(pc[0] == 0xeb) { // jmp 1-byte offset
-		gobuf->pc = gobuf->pc + 2 + *(int8*)(pc+1);
-		return;
-	}
-	if(pc[0] == 0xcc) {
-		// This is a breakpoint inserted by gdb.  We could use
-		// runtime·findfunc to find the function.  But if we
-		// do that, then we will continue execution at the
-		// function entry point, and we will not hit the gdb
-		// breakpoint.  So for this case we don't change
-		// gobuf->pc, so that when we return we will execute
-		// the jump instruction and carry on.  This means that
-		// stack unwinding may not work entirely correctly
-		// (http://golang.org/issue/5723) but the user is
-		// running under gdb anyhow.
-		return;
-	}
-	runtime·printf("runtime: pc=%p %x %x %x %x %x\n", pc, pc[0], pc[1], pc[2], pc[3], pc[4]);
-	runtime·throw("runtime: misuse of rewindmorestack");
-}
diff --git a/src/runtime/sys_x86.go b/src/runtime/sys_x86.go
new file mode 100644
index 0000000..086af8f
--- /dev/null
+++ b/src/runtime/sys_x86.go
@@ -0,0 +1,54 @@
+// Copyright 2013 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build amd64 amd64p32 386
+
+package runtime
+
+import "unsafe"
+
+// adjust Gobuf as it if executed a call to fn with context ctxt
+// and then did an immediate gosave.
+func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) {
+	sp := buf.sp
+	if regSize > ptrSize {
+		sp -= ptrSize
+		*(*uintptr)(unsafe.Pointer(sp)) = 0
+	}
+	sp -= ptrSize
+	*(*uintptr)(unsafe.Pointer(sp)) = buf.pc
+	buf.sp = sp
+	buf.pc = uintptr(fn)
+	buf.ctxt = ctxt
+}
+
+// Called to rewind context saved during morestack back to beginning of function.
+// To help us, the linker emits a jmp back to the beginning right after the
+// call to morestack. We just have to decode and apply that jump.
+func rewindmorestack(buf *gobuf) {
+	pc := (*[8]byte)(unsafe.Pointer(buf.pc))
+	if pc[0] == 0xe9 { // jmp 4-byte offset
+		buf.pc = buf.pc + 5 + uintptr(int64(*(*int32)(unsafe.Pointer(&pc[1]))))
+		return
+	}
+	if pc[0] == 0xeb { // jmp 1-byte offset
+		buf.pc = buf.pc + 2 + uintptr(int64(*(*int8)(unsafe.Pointer(&pc[1]))))
+		return
+	}
+	if pc[0] == 0xcc {
+		// This is a breakpoint inserted by gdb.  We could use
+		// runtime·findfunc to find the function.  But if we
+		// do that, then we will continue execution at the
+		// function entry point, and we will not hit the gdb
+		// breakpoint.  So for this case we don't change
+		// buf.pc, so that when we return we will execute
+		// the jump instruction and carry on.  This means that
+		// stack unwinding may not work entirely correctly
+		// (http://golang.org/issue/5723) but the user is
+		// running under gdb anyhow.
+		return
+	}
+	print("runtime: pc=", pc, " ", hex(pc[0]), " ", hex(pc[1]), " ", hex(pc[2]), " ", hex(pc[3]), " ", hex(pc[4]), "\n")
+	gothrow("runtime: misuse of rewindmorestack")
+}
diff --git a/src/runtime/syscall2_solaris.go b/src/runtime/syscall2_solaris.go
new file mode 100644
index 0000000..f4ffa74
--- /dev/null
+++ b/src/runtime/syscall2_solaris.go
@@ -0,0 +1,47 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import _ "unsafe"
+
+//go:cgo_import_dynamic libc_chdir chdir "libc.so"
+//go:cgo_import_dynamic libc_chroot chroot "libc.so"
+//go:cgo_import_dynamic libc_close close "libc.so"
+//go:cgo_import_dynamic libc_dlclose dlclose "libc.so"
+//go:cgo_import_dynamic libc_dlopen dlopen "libc.so"
+//go:cgo_import_dynamic libc_dlsym dlsym "libc.so"
+//go:cgo_import_dynamic libc_execve execve "libc.so"
+//go:cgo_import_dynamic libc_fcntl fcntl "libc.so"
+//go:cgo_import_dynamic libc_gethostname gethostname "libc.so"
+//go:cgo_import_dynamic libc_ioctl ioctl "libc.so"
+//go:cgo_import_dynamic libc_pipe pipe "libc.so"
+//go:cgo_import_dynamic libc_setgid setgid "libc.so"
+//go:cgo_import_dynamic libc_setgroups setgroups "libc.so"
+//go:cgo_import_dynamic libc_setsid setsid "libc.so"
+//go:cgo_import_dynamic libc_setuid setuid "libc.so"
+//go:cgo_import_dynamic libc_setpgid setsid "libc.so"
+//go:cgo_import_dynamic libc_syscall syscall "libc.so"
+//go:cgo_import_dynamic libc_forkx forkx "libc.so"
+//go:cgo_import_dynamic libc_wait4 wait4 "libc.so"
+
+//go:linkname libc_chdir libc_chdir
+//go:linkname libc_chroot libc_chroot
+//go:linkname libc_close libc_close
+//go:linkname libc_dlclose libc_dlclose
+//go:linkname libc_dlopen libc_dlopen
+//go:linkname libc_dlsym libc_dlsym
+//go:linkname libc_execve libc_execve
+//go:linkname libc_fcntl libc_fcntl
+//go:linkname libc_gethostname libc_gethostname
+//go:linkname libc_ioctl libc_ioctl
+//go:linkname libc_pipe libc_pipe
+//go:linkname libc_setgid libc_setgid
+//go:linkname libc_setgroups libc_setgroups
+//go:linkname libc_setsid libc_setsid
+//go:linkname libc_setuid libc_setuid
+//go:linkname libc_setpgid libc_setpgid
+//go:linkname libc_syscall libc_syscall
+//go:linkname libc_forkx libc_forkx
+//go:linkname libc_wait4 libc_wait4
diff --git a/src/runtime/syscall_solaris.c b/src/runtime/syscall_solaris.c
deleted file mode 100644
index 13ac31b..0000000
--- a/src/runtime/syscall_solaris.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#pragma dynimport libc·chdir chdir "libc.so"
-#pragma dynimport libc·chroot chroot "libc.so"
-#pragma dynimport libc·close close "libc.so"
-#pragma dynimport libc·dlclose dlclose "libc.so"
-#pragma dynimport libc·dlopen dlopen "libc.so"
-#pragma dynimport libc·dlsym dlsym "libc.so"
-#pragma dynimport libc·execve execve "libc.so"
-#pragma dynimport libc·fcntl fcntl "libc.so"
-#pragma dynimport libc·gethostname gethostname "libc.so"
-#pragma dynimport libc·ioctl ioctl "libc.so"
-#pragma dynimport libc·pipe pipe "libc.so"
-#pragma dynimport libc·setgid setgid "libc.so"
-#pragma dynimport libc·setgroups setgroups "libc.so"
-#pragma dynimport libc·setsid setsid "libc.so"
-#pragma dynimport libc·setuid setuid "libc.so"
-#pragma dynimport libc·setpgid setsid "libc.so"
-#pragma dynimport libc·syscall syscall "libc.so"
-#pragma dynimport libc·forkx forkx "libc.so"
-#pragma dynimport libc·wait4 wait4 "libc.so"
diff --git a/src/runtime/syscall_solaris.go b/src/runtime/syscall_solaris.go
index 50d3a1d..9b99716 100644
--- a/src/runtime/syscall_solaris.go
+++ b/src/runtime/syscall_solaris.go
@@ -9,12 +9,10 @@
 var (
 	libc_chdir,
 	libc_chroot,
-	libc_close,
 	libc_dlopen,
 	libc_dlclose,
 	libc_dlsym,
 	libc_execve,
-	libc_exit,
 	libc_fcntl,
 	libc_forkx,
 	libc_gethostname,
@@ -27,7 +25,6 @@
 	libc_setpgid,
 	libc_syscall,
 	libc_wait4,
-	libc_write,
 	pipe1 libcFunc
 )
 
@@ -38,9 +35,9 @@
 		n:    nargs,
 		args: uintptr(unsafe.Pointer(&a1)),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	return call.r1, call.r2, call.err
 }
 
@@ -62,7 +59,7 @@
 //go:nosplit
 func syscall_chdir(path uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_chdir)),
+		fn:   uintptr(unsafe.Pointer(libc_chdir)),
 		n:    1,
 		args: uintptr(unsafe.Pointer(&path)),
 	}
@@ -73,7 +70,7 @@
 //go:nosplit
 func syscall_chroot(path uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_chroot)),
+		fn:   uintptr(unsafe.Pointer(libc_chroot)),
 		n:    1,
 		args: uintptr(unsafe.Pointer(&path)),
 	}
@@ -84,18 +81,18 @@
 // like close, but must not split stack, for forkx.
 //go:nosplit
 func syscall_close(fd int32) int32 {
-	return int32(sysvicall1(&libc_close, uintptr(fd)))
+	return int32(sysvicall1(libc_close, uintptr(fd)))
 }
 
 func syscall_dlopen(name *byte, mode uintptr) (handle uintptr, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_dlopen)),
+		fn:   uintptr(unsafe.Pointer(libc_dlopen)),
 		n:    2,
 		args: uintptr(unsafe.Pointer(&name)),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	if call.r1 == 0 {
 		return call.r1, call.err
 	}
@@ -104,25 +101,25 @@
 
 func syscall_dlclose(handle uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_dlclose)),
+		fn:   uintptr(unsafe.Pointer(libc_dlclose)),
 		n:    1,
 		args: uintptr(unsafe.Pointer(&handle)),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	return call.r1
 }
 
 func syscall_dlsym(handle uintptr, name *byte) (proc uintptr, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_dlsym)),
+		fn:   uintptr(unsafe.Pointer(libc_dlsym)),
 		n:    2,
 		args: uintptr(unsafe.Pointer(&handle)),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	if call.r1 == 0 {
 		return call.r1, call.err
 	}
@@ -132,7 +129,7 @@
 //go:nosplit
 func syscall_execve(path, argv, envp uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_execve)),
+		fn:   uintptr(unsafe.Pointer(libc_execve)),
 		n:    3,
 		args: uintptr(unsafe.Pointer(&path)),
 	}
@@ -143,13 +140,13 @@
 // like exit, but must not split stack, for forkx.
 //go:nosplit
 func syscall_exit(code uintptr) {
-	sysvicall1(&libc_exit, code)
+	sysvicall1(libc_exit, code)
 }
 
 //go:nosplit
 func syscall_fcntl(fd, cmd, arg uintptr) (val, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_fcntl)),
+		fn:   uintptr(unsafe.Pointer(libc_fcntl)),
 		n:    3,
 		args: uintptr(unsafe.Pointer(&fd)),
 	}
@@ -160,7 +157,7 @@
 //go:nosplit
 func syscall_forkx(flags uintptr) (pid uintptr, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_forkx)),
+		fn:   uintptr(unsafe.Pointer(libc_forkx)),
 		n:    1,
 		args: uintptr(unsafe.Pointer(&flags)),
 	}
@@ -172,13 +169,13 @@
 	cname := new([_MAXHOSTNAMELEN]byte)
 	var args = [2]uintptr{uintptr(unsafe.Pointer(&cname[0])), _MAXHOSTNAMELEN}
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_gethostname)),
+		fn:   uintptr(unsafe.Pointer(libc_gethostname)),
 		n:    2,
 		args: uintptr(unsafe.Pointer(&args[0])),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	if call.r1 != 0 {
 		return "", call.err
 	}
@@ -189,7 +186,7 @@
 //go:nosplit
 func syscall_ioctl(fd, req, arg uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_ioctl)),
+		fn:   uintptr(unsafe.Pointer(libc_ioctl)),
 		n:    3,
 		args: uintptr(unsafe.Pointer(&fd)),
 	}
@@ -203,9 +200,9 @@
 		n:    0,
 		args: uintptr(unsafe.Pointer(&pipe1)), // it's unused but must be non-nil, otherwise crashes
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	return call.r1, call.r2, call.err
 }
 
@@ -217,7 +214,7 @@
 // TODO(aram): make this panic once we stop calling fcntl(2) in net using it.
 func syscall_rawsyscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_syscall)),
+		fn:   uintptr(unsafe.Pointer(libc_syscall)),
 		n:    4,
 		args: uintptr(unsafe.Pointer(&trap)),
 	}
@@ -228,7 +225,7 @@
 //go:nosplit
 func syscall_setgid(gid uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_setgid)),
+		fn:   uintptr(unsafe.Pointer(libc_setgid)),
 		n:    1,
 		args: uintptr(unsafe.Pointer(&gid)),
 	}
@@ -239,7 +236,7 @@
 //go:nosplit
 func syscall_setgroups(ngid, gid uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_setgroups)),
+		fn:   uintptr(unsafe.Pointer(libc_setgroups)),
 		n:    2,
 		args: uintptr(unsafe.Pointer(&ngid)),
 	}
@@ -250,9 +247,9 @@
 //go:nosplit
 func syscall_setsid() (pid, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_setsid)),
+		fn:   uintptr(unsafe.Pointer(libc_setsid)),
 		n:    0,
-		args: uintptr(unsafe.Pointer(&libc_setsid)), // it's unused but must be non-nil, otherwise crashes
+		args: uintptr(unsafe.Pointer(libc_setsid)), // it's unused but must be non-nil, otherwise crashes
 	}
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
 	return call.r1, call.err
@@ -261,7 +258,7 @@
 //go:nosplit
 func syscall_setuid(uid uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_setuid)),
+		fn:   uintptr(unsafe.Pointer(libc_setuid)),
 		n:    1,
 		args: uintptr(unsafe.Pointer(&uid)),
 	}
@@ -272,7 +269,7 @@
 //go:nosplit
 func syscall_setpgid(pid, pgid uintptr) (err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_setpgid)),
+		fn:   uintptr(unsafe.Pointer(libc_setpgid)),
 		n:    2,
 		args: uintptr(unsafe.Pointer(&pid)),
 	}
@@ -288,32 +285,32 @@
 // TODO(aram): make this panic once we stop calling fcntl(2) in net using it.
 func syscall_syscall(trap, a1, a2, a3 uintptr) (r1, r2, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_syscall)),
+		fn:   uintptr(unsafe.Pointer(libc_syscall)),
 		n:    4,
 		args: uintptr(unsafe.Pointer(&trap)),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	return call.r1, call.r2, call.err
 }
 
 func syscall_wait4(pid uintptr, wstatus *uint32, options uintptr, rusage unsafe.Pointer) (wpid int, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_wait4)),
+		fn:   uintptr(unsafe.Pointer(libc_wait4)),
 		n:    4,
 		args: uintptr(unsafe.Pointer(&pid)),
 	}
-	entersyscallblock()
+	entersyscallblock(0)
 	asmcgocall(unsafe.Pointer(&asmsysvicall6), unsafe.Pointer(&call))
-	exitsyscall()
+	exitsyscall(0)
 	return int(call.r1), call.err
 }
 
 //go:nosplit
 func syscall_write(fd, buf, nbyte uintptr) (n, err uintptr) {
 	call := libcall{
-		fn:   uintptr(unsafe.Pointer(&libc_write)),
+		fn:   uintptr(unsafe.Pointer(libc_write)),
 		n:    3,
 		args: uintptr(unsafe.Pointer(&fd)),
 	}
diff --git a/src/runtime/thunk.s b/src/runtime/thunk.s
index 1a5b655..241dd90 100644
--- a/src/runtime/thunk.s
+++ b/src/runtime/thunk.s
@@ -4,7 +4,6 @@
 
 // This file exposes various internal runtime functions to other packages in std lib.
 
-#include "zasm_GOOS_GOARCH.h"
 #include "textflag.h"
 
 #ifdef GOARCH_arm
@@ -187,3 +186,18 @@
 
 TEXT os·runtime_args(SB),NOSPLIT,$0-0
 	JMP	runtime·runtime_args(SB)
+
+TEXT sync·runtime_procUnpin(SB),NOSPLIT,$0-0
+	JMP	runtime·sync_procUnpin(SB)
+
+TEXT sync·runtime_procPin(SB),NOSPLIT,$0-0
+	JMP	runtime·sync_procPin(SB)
+
+TEXT syscall·runtime_BeforeFork(SB),NOSPLIT,$0-0
+	JMP	runtime·syscall_BeforeFork(SB)
+
+TEXT syscall·runtime_AfterFork(SB),NOSPLIT,$0-0
+	JMP	runtime·syscall_AfterFork(SB)
+
+TEXT reflect·typelinks(SB),NOSPLIT,$0-0
+	JMP	runtime·typelinks(SB)
diff --git a/src/runtime/thunk_solaris_amd64.s b/src/runtime/thunk_solaris_amd64.s
deleted file mode 100644
index f61188c..0000000
--- a/src/runtime/thunk_solaris_amd64.s
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file exposes various external library functions to Go code in the runtime.
-
-#include "zasm_GOOS_GOARCH.h"
-#include "textflag.h"
-
-TEXT runtime·libc_chdir(SB),NOSPLIT,$0
-	MOVQ	libc·chdir(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_chroot(SB),NOSPLIT,$0
-	MOVQ	libc·chroot(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_close(SB),NOSPLIT,$0
-	MOVQ	libc·close(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_dlopen(SB),NOSPLIT,$0
-	MOVQ	libc·dlopen(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_dlclose(SB),NOSPLIT,$0
-	MOVQ	libc·dlclose(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_dlsym(SB),NOSPLIT,$0
-	MOVQ	libc·dlsym(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_execve(SB),NOSPLIT,$0
-	MOVQ	libc·execve(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_exit(SB),NOSPLIT,$0
-	MOVQ	libc·exit(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_fcntl(SB),NOSPLIT,$0
-	MOVQ	libc·fcntl(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_forkx(SB),NOSPLIT,$0
-	MOVQ	libc·forkx(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_gethostname(SB),NOSPLIT,$0
-	MOVQ	libc·gethostname(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_ioctl(SB),NOSPLIT,$0
-	MOVQ	libc·ioctl(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_setgid(SB),NOSPLIT,$0
-	MOVQ	libc·setgid(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_setgroups(SB),NOSPLIT,$0
-	MOVQ	libc·setgroups(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_setsid(SB),NOSPLIT,$0
-	MOVQ	libc·setsid(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_setuid(SB),NOSPLIT,$0
-	MOVQ	libc·setuid(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_setpgid(SB),NOSPLIT,$0
-	MOVQ	libc·setpgid(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_syscall(SB),NOSPLIT,$0
-	MOVQ	libc·syscall(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_wait4(SB),NOSPLIT,$0
-	MOVQ	libc·wait4(SB), AX
-	JMP	AX
-
-TEXT runtime·libc_write(SB),NOSPLIT,$0
-	MOVQ	libc·write(SB), AX
-	JMP	AX
diff --git a/src/runtime/thunk_windows.s b/src/runtime/thunk_windows.s
index 7ccb98f..b1d5d57 100644
--- a/src/runtime/thunk_windows.s
+++ b/src/runtime/thunk_windows.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 TEXT syscall·Syscall(SB),NOSPLIT,$0-0
diff --git a/src/runtime/tls_arm.s b/src/runtime/tls_arm.s
index 85c3940..7c5c0e2 100644
--- a/src/runtime/tls_arm.s
+++ b/src/runtime/tls_arm.s
@@ -2,7 +2,8 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "funcdata.h"
 #include "textflag.h"
 
diff --git a/src/runtime/traceback.go b/src/runtime/traceback.go
index 1c6ce6e..e1cc912 100644
--- a/src/runtime/traceback.go
+++ b/src/runtime/traceback.go
@@ -32,15 +32,16 @@
 
 var (
 	// initialized in tracebackinit
-	deferprocPC uintptr
-	goexitPC    uintptr
-	jmpdeferPC  uintptr
-	mcallPC     uintptr
-	morestackPC uintptr
-	mstartPC    uintptr
-	newprocPC   uintptr
-	rt0_goPC    uintptr
-	sigpanicPC  uintptr
+	deferprocPC          uintptr
+	goexitPC             uintptr
+	jmpdeferPC           uintptr
+	mcallPC              uintptr
+	morestackPC          uintptr
+	mstartPC             uintptr
+	newprocPC            uintptr
+	rt0_goPC             uintptr
+	sigpanicPC           uintptr
+	systemstack_switchPC uintptr
 
 	externalthreadhandlerp uintptr // initialized elsewhere
 )
@@ -59,6 +60,7 @@
 	newprocPC = funcPC(newproc)
 	rt0_goPC = funcPC(rt0_go)
 	sigpanicPC = funcPC(sigpanic)
+	systemstack_switchPC = funcPC(systemstack_switch)
 }
 
 // Traceback over the deferred function calls.
@@ -335,8 +337,7 @@
 					print(hex(argp[i]))
 				}
 				print(")\n")
-				var file string
-				line := funcline(f, tracepc, &file)
+				file, line := funcline(f, tracepc)
 				print("\t", file, ":", line)
 				if frame.pc > f.entry {
 					print(" +", hex(frame.pc-f.entry))
@@ -480,8 +481,7 @@
 		if pc > f.entry {
 			tracepc -= _PCQuantum
 		}
-		var file string
-		line := funcline(f, tracepc, &file)
+		file, line := funcline(f, tracepc)
 		print("\t", file, ":", line)
 		if pc > f.entry {
 			print(" +", hex(pc-f.entry))
@@ -528,7 +528,7 @@
 	sp := getcallersp(unsafe.Pointer(&skip))
 	pc := uintptr(getcallerpc(unsafe.Pointer(&skip)))
 	var n int
-	onM(func() {
+	systemstack(func() {
 		n = gentraceback(pc, sp, 0, getg(), skip, pcbuf, m, nil, nil, 0)
 	})
 	return n
diff --git a/src/runtime/type.go b/src/runtime/type.go
new file mode 100644
index 0000000..cbd5c9e
--- /dev/null
+++ b/src/runtime/type.go
@@ -0,0 +1,99 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Runtime _type representation.
+
+package runtime
+
+import "unsafe"
+
+// Needs to be in sync with ../../cmd/ld/decodesym.c:/^commonsize and pkg/reflect/type.go:/type.
+type _type struct {
+	size       uintptr
+	hash       uint32
+	_unused    uint8
+	align      uint8
+	fieldalign uint8
+	kind       uint8
+	alg        unsafe.Pointer
+	// gc stores _type info required for garbage collector.
+	// If (kind&KindGCProg)==0, then gc[0] points at sparse GC bitmap
+	// (no indirection), 4 bits per word.
+	// If (kind&KindGCProg)!=0, then gc[1] points to a compiler-generated
+	// read-only GC program; and gc[0] points to BSS space for sparse GC bitmap.
+	// For huge _types (>MaxGCMask), runtime unrolls the program directly into
+	// GC bitmap and gc[0] is not used. For moderately-sized _types, runtime
+	// unrolls the program into gc[0] space on first use. The first byte of gc[0]
+	// (gc[0][0]) contains 'unroll' flag saying whether the program is already
+	// unrolled into gc[0] or not.
+	gc      [2]uintptr
+	_string *string
+	x       *uncommontype
+	ptrto   *_type
+	zero    *byte // ptr to the zero value for this _type
+}
+
+type method struct {
+	name    *string
+	pkgpath *string
+	mtyp    *_type
+	typ     *_type
+	ifn     unsafe.Pointer
+	tfn     unsafe.Pointer
+}
+
+type uncommontype struct {
+	name    *string
+	pkgpath *string
+	mhdr    []method
+	m       [0]method
+}
+
+type imethod struct {
+	name    *string
+	pkgpath *string
+	_type   *_type
+}
+
+type interfacetype struct {
+	typ  _type
+	mhdr []imethod
+	m    [0]imethod
+}
+
+type maptype struct {
+	typ           _type
+	key           *_type
+	elem          *_type
+	bucket        *_type // internal _type representing a hash bucket
+	hmap          *_type // internal _type representing a hmap
+	keysize       uint8  // size of key slot
+	indirectkey   bool   // store ptr to key instead of key itself
+	valuesize     uint8  // size of value slot
+	indirectvalue bool   // store ptr to value instead of value itself
+	bucketsize    uint16 // size of bucket
+}
+
+type chantype struct {
+	typ  _type
+	elem *_type
+	dir  uintptr
+}
+
+type slicetype struct {
+	typ  _type
+	elem *_type
+}
+
+type functype struct {
+	typ       _type
+	dotdotdot bool
+	in        slice
+	out       slice
+}
+
+type ptrtype struct {
+	typ  _type
+	elem *_type
+}
diff --git a/src/runtime/type.h b/src/runtime/type.h
deleted file mode 100644
index f5b4f9d..0000000
--- a/src/runtime/type.h
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Runtime type representation.
-
-typedef struct Type Type;
-typedef struct UncommonType UncommonType;
-typedef struct InterfaceType InterfaceType;
-typedef struct Method Method;
-typedef struct IMethod IMethod;
-typedef struct SliceType SliceType;
-typedef struct FuncType FuncType;
-
-// Needs to be in sync with ../../cmd/ld/decodesym.c:/^commonsize and pkg/reflect/type.go:/type.
-struct Type
-{
-	uintptr size;
-	uint32 hash;
-	uint8 _unused;
-	uint8 align;
-	uint8 fieldAlign;
-	uint8 kind;
-	void* alg;
-	// gc stores type info required for garbage collector.
-	// If (kind&KindGCProg)==0, then gc[0] points at sparse GC bitmap
-	// (no indirection), 4 bits per word.
-	// If (kind&KindGCProg)!=0, then gc[1] points to a compiler-generated
-	// read-only GC program; and gc[0] points to BSS space for sparse GC bitmap.
-	// For huge types (>MaxGCMask), runtime unrolls the program directly into
-	// GC bitmap and gc[0] is not used. For moderately-sized types, runtime
-	// unrolls the program into gc[0] space on first use. The first byte of gc[0]
-	// (gc[0][0]) contains 'unroll' flag saying whether the program is already
-	// unrolled into gc[0] or not.
-	uintptr gc[2];
-	String *string;
-	UncommonType *x;
-	Type *ptrto;
-	byte *zero;  // ptr to the zero value for this type
-};
-
-struct Method
-{
-	String *name;
-	String *pkgPath;
-	Type	*mtyp;
-	Type *typ;
-	void (*ifn)(void);
-	void (*tfn)(void);
-};
-
-struct UncommonType
-{
-	String *name;
-	String *pkgPath;
-	Slice mhdr;
-	Method m[];
-};
-
-struct IMethod
-{
-	String *name;
-	String *pkgPath;
-	Type *type;
-};
-
-struct InterfaceType
-{
-	Type  typ;
-	Slice mhdr;
-	IMethod m[];
-};
-
-struct MapType
-{
-	Type typ;
-	Type *key;
-	Type *elem;
-	Type *bucket;		// internal type representing a hash bucket
-	Type *hmap;		// internal type representing a Hmap
-	uint8 keysize;		// size of key slot
-	bool indirectkey;	// store ptr to key instead of key itself
-	uint8 valuesize;	// size of value slot
-	bool indirectvalue;	// store ptr to value instead of value itself
-	uint16 bucketsize;	// size of bucket
-};
-
-struct ChanType
-{
-	Type typ;
-	Type *elem;
-	uintptr dir;
-};
-
-struct SliceType
-{
-	Type typ;
-	Type *elem;
-};
-
-struct FuncType
-{
-	Type typ;
-	bool dotdotdot;
-	Slice in;
-	Slice out;
-};
-
-struct PtrType
-{
-	Type typ;
-	Type *elem;
-};
diff --git a/src/runtime/typekind.h b/src/runtime/typekind.h
index e0fe177..39cd45c 100644
--- a/src/runtime/typekind.h
+++ b/src/runtime/typekind.h
@@ -2,6 +2,9 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
+// Must match runtime and reflect.
+// Included by cmd/gc.
+
 enum {
 	KindBool = 1,
 	KindInt,
@@ -30,9 +33,8 @@
 	KindStruct,
 	KindUnsafePointer,
 
-	KindDirectIface = 1<<5,
-	KindGCProg = 1<<6,	// Type.gc points to GC program
-	KindNoPointers = 1<<7,
-	KindMask = (1<<5)-1,
+	KindDirectIface = 1 << 5,
+	KindGCProg      = 1 << 6, // Type.gc points to GC program
+	KindNoPointers  = 1 << 7,
+	KindMask        = (1 << 5) - 1,
 };
-
diff --git a/src/runtime/typekind1.go b/src/runtime/typekind1.go
new file mode 100644
index 0000000..73028d6f4
--- /dev/null
+++ b/src/runtime/typekind1.go
@@ -0,0 +1,39 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+const (
+	_KindBool = 1 + iota
+	_KindInt
+	_KindInt8
+	_KindInt16
+	_KindInt32
+	_KindInt64
+	_KindUint
+	_KindUint8
+	_KindUint16
+	_KindUint32
+	_KindUint64
+	_KindUintptr
+	_KindFloat32
+	_KindFloat64
+	_KindComplex64
+	_KindComplex128
+	_KindArray
+	_KindChan
+	_KindFunc
+	_KindInterface
+	_KindMap
+	_KindPtr
+	_KindSlice
+	_KindString
+	_KindStruct
+	_KindUnsafePointer
+
+	_KindDirectIface = 1 << 5
+	_KindGCProg      = 1 << 6 // Type.gc points to GC program
+	_KindNoPointers  = 1 << 7
+	_KindMask        = (1 << 5) - 1
+)
diff --git a/src/runtime/vdso_linux_amd64.c b/src/runtime/vdso_linux_amd64.c
deleted file mode 100644
index 681340c..0000000
--- a/src/runtime/vdso_linux_amd64.c
+++ /dev/null
@@ -1,371 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-#include "runtime.h"
-#include "textflag.h"
-
-// Look up symbols in the Linux vDSO.
-
-// This code was originally based on the sample Linux vDSO parser at
-// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/vDSO/parse_vdso.c
-
-// This implements the ELF dynamic linking spec at
-// http://sco.com/developers/gabi/latest/ch5.dynamic.html
-
-// The version section is documented at
-// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html
-
-#define AT_RANDOM 25
-#define AT_SYSINFO_EHDR 33
-#define AT_NULL	0    /* End of vector */
-#define PT_LOAD	1    /* Loadable program segment */
-#define PT_DYNAMIC 2 /* Dynamic linking information */
-#define DT_NULL 0    /* Marks end of dynamic section */
-#define DT_HASH 4    /* Dynamic symbol hash table */
-#define DT_STRTAB 5  /* Address of string table */
-#define DT_SYMTAB 6  /* Address of symbol table */
-#define DT_VERSYM 0x6ffffff0
-#define	DT_VERDEF 0x6ffffffc
-
-#define VER_FLG_BASE 0x1 /* Version definition of file itself */
-#define SHN_UNDEF 0      /* Undefined section */
-#define SHT_DYNSYM 11    /* Dynamic linker symbol table */
-#define STT_FUNC 2       /* Symbol is a code object */
-#define STB_GLOBAL 1     /* Global symbol */
-#define STB_WEAK 2       /* Weak symbol */
-
-/* How to extract and insert information held in the st_info field.  */
-#define ELF64_ST_BIND(val) (((byte) (val)) >> 4)
-#define ELF64_ST_TYPE(val) ((val) & 0xf)
-
-#define EI_NIDENT (16)
-
-typedef uint16 Elf64_Half;
-typedef uint32 Elf64_Word;
-typedef	int32  Elf64_Sword;
-typedef uint64 Elf64_Xword;
-typedef	int64  Elf64_Sxword;
-typedef uint64 Elf64_Addr;
-typedef uint64 Elf64_Off;
-typedef uint16 Elf64_Section;
-typedef Elf64_Half Elf64_Versym;
-
-
-typedef struct Elf64_Sym
-{
-	Elf64_Word st_name;
-	byte st_info;
-	byte st_other;
-	Elf64_Section st_shndx;
-	Elf64_Addr st_value;
-	Elf64_Xword st_size;
-} Elf64_Sym;
-
-typedef struct Elf64_Verdef
-{
-	Elf64_Half vd_version; /* Version revision */
-	Elf64_Half vd_flags;   /* Version information */
-	Elf64_Half vd_ndx;     /* Version Index */
-	Elf64_Half vd_cnt;     /* Number of associated aux entries */
-	Elf64_Word vd_hash;    /* Version name hash value */
-	Elf64_Word vd_aux;     /* Offset in bytes to verdaux array */
-	Elf64_Word vd_next;    /* Offset in bytes to next verdef entry */
-} Elf64_Verdef;
-
-typedef struct Elf64_Ehdr
-{
-	byte e_ident[EI_NIDENT]; /* Magic number and other info */
-	Elf64_Half e_type;       /* Object file type */
-	Elf64_Half e_machine;    /* Architecture */
-	Elf64_Word e_version;    /* Object file version */
-	Elf64_Addr e_entry;      /* Entry point virtual address */
-	Elf64_Off e_phoff;       /* Program header table file offset */
-	Elf64_Off e_shoff;       /* Section header table file offset */
-	Elf64_Word e_flags;      /* Processor-specific flags */
-	Elf64_Half e_ehsize;     /* ELF header size in bytes */
-	Elf64_Half e_phentsize;  /* Program header table entry size */
-	Elf64_Half e_phnum;      /* Program header table entry count */
-	Elf64_Half e_shentsize;  /* Section header table entry size */
-	Elf64_Half e_shnum;      /* Section header table entry count */
-	Elf64_Half e_shstrndx;   /* Section header string table index */
-} Elf64_Ehdr;
-
-typedef struct Elf64_Phdr
-{
-	Elf64_Word p_type;    /* Segment type */
-	Elf64_Word p_flags;   /* Segment flags */
-	Elf64_Off p_offset;   /* Segment file offset */
-	Elf64_Addr p_vaddr;   /* Segment virtual address */
-	Elf64_Addr p_paddr;   /* Segment physical address */
-	Elf64_Xword p_filesz; /* Segment size in file */
-	Elf64_Xword p_memsz;  /* Segment size in memory */
-	Elf64_Xword p_align;  /* Segment alignment */
-} Elf64_Phdr;
-
-typedef struct Elf64_Shdr
-{
-	Elf64_Word sh_name;       /* Section name (string tbl index) */
-	Elf64_Word sh_type;       /* Section type */
-	Elf64_Xword sh_flags;     /* Section flags */
-	Elf64_Addr sh_addr;       /* Section virtual addr at execution */
-	Elf64_Off sh_offset;      /* Section file offset */
-	Elf64_Xword sh_size;      /* Section size in bytes */
-	Elf64_Word sh_link;       /* Link to another section */
-	Elf64_Word sh_info;       /* Additional section information */
-	Elf64_Xword sh_addralign; /* Section alignment */
-	Elf64_Xword sh_entsize;   /* Entry size if section holds table */
-} Elf64_Shdr;
-
-typedef struct Elf64_Dyn
-{
-	Elf64_Sxword d_tag; /* Dynamic entry type */
-	union
-	{
-		Elf64_Xword d_val;  /* Integer value */
-		Elf64_Addr d_ptr;   /* Address value */
-	} d_un;
-} Elf64_Dyn;
-
-typedef struct Elf64_Verdaux
-{
-	Elf64_Word vda_name; /* Version or dependency names */
-	Elf64_Word vda_next; /* Offset in bytes to next verdaux entry */
-} Elf64_Verdaux;
-
-typedef struct Elf64_auxv_t
-{
-	uint64 a_type;        /* Entry type */
-	union
-	{
-		uint64 a_val; /* Integer value */
-	} a_un;
-} Elf64_auxv_t;
-
-
-typedef struct symbol_key {
-	byte* name;
-	int32 sym_hash;
-	void** var_ptr;
-} symbol_key;
-
-typedef struct version_key {
-	byte* version;
-	int32 ver_hash;
-} version_key;
-
-struct vdso_info {
-	bool valid;
-
-	/* Load information */
-	uintptr load_addr;
-	uintptr load_offset;  /* load_addr - recorded vaddr */
-
-	/* Symbol table */
-	Elf64_Sym *symtab;
-	const byte *symstrings;
-	Elf64_Word *bucket, *chain;
-	Elf64_Word nbucket, nchain;
-
-	/* Version table */
-	Elf64_Versym *versym;
-	Elf64_Verdef *verdef;
-};
-
-#pragma dataflag NOPTR
-static version_key linux26 = { (byte*)"LINUX_2.6", 0x3ae75f6 };
-
-// initialize with vsyscall fallbacks
-#pragma dataflag NOPTR
-void* runtime·__vdso_time_sym = (void*)0xffffffffff600400ULL;
-#pragma dataflag NOPTR
-void* runtime·__vdso_gettimeofday_sym = (void*)0xffffffffff600000ULL;
-#pragma dataflag NOPTR
-void* runtime·__vdso_clock_gettime_sym = (void*)0;
-
-#pragma dataflag NOPTR
-static symbol_key sym_keys[] = {
-	{ (byte*)"__vdso_time", 0xa33c485, &runtime·__vdso_time_sym },
-	{ (byte*)"__vdso_gettimeofday", 0x315ca59, &runtime·__vdso_gettimeofday_sym },
-	{ (byte*)"__vdso_clock_gettime", 0xd35ec75, &runtime·__vdso_clock_gettime_sym },
-};
-
-static void
-vdso_init_from_sysinfo_ehdr(struct vdso_info *vdso_info, Elf64_Ehdr* hdr)
-{
-	uint64 i;
-	bool found_vaddr = false;
-	Elf64_Phdr *pt;
-	Elf64_Dyn *dyn;
-	Elf64_Word *hash;
-
-	vdso_info->valid = false;
-	vdso_info->load_addr = (uintptr) hdr;
-
-	pt = (Elf64_Phdr*)(vdso_info->load_addr + hdr->e_phoff);
-	dyn = nil;
-
-	// We need two things from the segment table: the load offset
-	// and the dynamic table.
-	for(i=0; i<hdr->e_phnum; i++) {
-		if(pt[i].p_type == PT_LOAD && found_vaddr == false) {
-			found_vaddr = true;
-			vdso_info->load_offset =	(uintptr)hdr
-				+ (uintptr)pt[i].p_offset
-				- (uintptr)pt[i].p_vaddr;
-		} else if(pt[i].p_type == PT_DYNAMIC) {
-			dyn = (Elf64_Dyn*)((uintptr)hdr + pt[i].p_offset);
-		}
-	}
-
-	if(found_vaddr == false || dyn == nil)
-		return;  // Failed
-
-	// Fish out the useful bits of the dynamic table.
-	hash = nil;
-	vdso_info->symstrings = nil;
-	vdso_info->symtab = nil;
-	vdso_info->versym = nil;
-	vdso_info->verdef = nil;
-	for(i=0; dyn[i].d_tag!=DT_NULL; i++) {
-		switch(dyn[i].d_tag) {
-		case DT_STRTAB:
-			vdso_info->symstrings = (const byte *)
-				((uintptr)dyn[i].d_un.d_ptr
-				 + vdso_info->load_offset);
-			break;
-		case DT_SYMTAB:
-			vdso_info->symtab = (Elf64_Sym *)
-				((uintptr)dyn[i].d_un.d_ptr
-				 + vdso_info->load_offset);
-			break;
-		case DT_HASH:
-			hash = (Elf64_Word *)
-			  ((uintptr)dyn[i].d_un.d_ptr
-			   + vdso_info->load_offset);
-			break;
-		case DT_VERSYM:
-			vdso_info->versym = (Elf64_Versym *)
-				((uintptr)dyn[i].d_un.d_ptr
-				 + vdso_info->load_offset);
-			break;
-		case DT_VERDEF:
-			vdso_info->verdef = (Elf64_Verdef *)
-				((uintptr)dyn[i].d_un.d_ptr
-				 + vdso_info->load_offset);
-			break;
-		}
-	}
-	if(vdso_info->symstrings == nil || vdso_info->symtab == nil || hash == nil)
-		return;  // Failed
-
-	if(vdso_info->verdef == nil)
-		vdso_info->versym = 0;
-
-	// Parse the hash table header.
-	vdso_info->nbucket = hash[0];
-	vdso_info->nchain = hash[1];
-	vdso_info->bucket = &hash[2];
-	vdso_info->chain = &hash[vdso_info->nbucket + 2];
-
-	// That's all we need.
-	vdso_info->valid = true;
-}
-
-static int32
-vdso_find_version(struct vdso_info *vdso_info, version_key* ver)
-{
-	if(vdso_info->valid == false) {
-		return 0;
-	}
-	Elf64_Verdef *def = vdso_info->verdef;
-	while(true) {
-		if((def->vd_flags & VER_FLG_BASE) == 0) {
-			Elf64_Verdaux *aux = (Elf64_Verdaux*)((byte *)def + def->vd_aux);
-			if(def->vd_hash == ver->ver_hash &&
-				runtime·strcmp(ver->version, vdso_info->symstrings + aux->vda_name) == 0) {
-				return def->vd_ndx & 0x7fff;
-			}
-		}
-
-		if(def->vd_next == 0) {
-			break;
-		}
-		def = (Elf64_Verdef *)((byte *)def + def->vd_next);
-	}
-	return -1; // can not match any version
-}
-
-static void
-vdso_parse_symbols(struct vdso_info *vdso_info, int32 version)
-{
-	int32 i;
-	Elf64_Word chain;
-	Elf64_Sym *sym;
-
-	if(vdso_info->valid == false)
-		return;
-
-	for(i=0; i<nelem(sym_keys); i++) {
-		for(chain = vdso_info->bucket[sym_keys[i].sym_hash % vdso_info->nbucket];
-			chain != 0; chain = vdso_info->chain[chain]) {
-
-			sym = &vdso_info->symtab[chain];
-			if(ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
-				continue;
-			if(ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
-				 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
-				continue;
-			if(sym->st_shndx == SHN_UNDEF)
-				continue;
-			if(runtime·strcmp(sym_keys[i].name, vdso_info->symstrings + sym->st_name) != 0)
-				continue;
-
-			// Check symbol version.
-			if(vdso_info->versym != nil && version != 0
-				&& vdso_info->versym[chain] & 0x7fff != version)
-				continue;
-
-			*sym_keys[i].var_ptr = (void *)(vdso_info->load_offset + sym->st_value);
-			break;
-		}
-	}
-}
-
-static void
-runtime·linux_setup_vdso(int32 argc, uint8** argv)
-{
-	struct vdso_info vdso_info;
-
-	// skip argvc
-	byte **p = argv;
-	p = &p[argc+1];
-
-	// skip envp to get to ELF auxiliary vector.
-	for(; *p!=0; p++) {}
-
-	// skip NULL separator
-	p++;
-
-	// now, p points to auxv
-	Elf64_auxv_t *elf_auxv = (Elf64_auxv_t*) p;
-
-	for(int32 i=0; elf_auxv[i].a_type!=AT_NULL; i++) {
-		if(elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
-			if(elf_auxv[i].a_un.a_val == 0) {
-				// Something went wrong
-				continue;
-			}
-			vdso_init_from_sysinfo_ehdr(&vdso_info, (Elf64_Ehdr*)elf_auxv[i].a_un.a_val);
-			vdso_parse_symbols(&vdso_info, vdso_find_version(&vdso_info, &linux26));
-			continue;
-		}
-		if(elf_auxv[i].a_type == AT_RANDOM) {
-		        runtime·startup_random_data = (byte*)elf_auxv[i].a_un.a_val;
-		        runtime·startup_random_data_len = 16;
-			continue;
-		}
-	}
-}
-
-void (*runtime·sysargs)(int32, uint8**) = runtime·linux_setup_vdso;
diff --git a/src/runtime/vdso_linux_amd64.go b/src/runtime/vdso_linux_amd64.go
new file mode 100644
index 0000000..7eb6988
--- /dev/null
+++ b/src/runtime/vdso_linux_amd64.go
@@ -0,0 +1,328 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import "unsafe"
+
+// Look up symbols in the Linux vDSO.
+
+// This code was originally based on the sample Linux vDSO parser at
+// https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/vDSO/parse_vdso.c
+
+// This implements the ELF dynamic linking spec at
+// http://sco.com/developers/gabi/latest/ch5.dynamic.html
+
+// The version section is documented at
+// http://refspecs.linuxfoundation.org/LSB_3.2.0/LSB-Core-generic/LSB-Core-generic/symversion.html
+
+const (
+	_AT_RANDOM       = 25
+	_AT_SYSINFO_EHDR = 33
+	_AT_NULL         = 0 /* End of vector */
+
+	_PT_LOAD    = 1 /* Loadable program segment */
+	_PT_DYNAMIC = 2 /* Dynamic linking information */
+
+	_DT_NULL   = 0 /* Marks end of dynamic section */
+	_DT_HASH   = 4 /* Dynamic symbol hash table */
+	_DT_STRTAB = 5 /* Address of string table */
+	_DT_SYMTAB = 6 /* Address of symbol table */
+	_DT_VERSYM = 0x6ffffff0
+	_DT_VERDEF = 0x6ffffffc
+
+	_VER_FLG_BASE = 0x1 /* Version definition of file itself */
+
+	_SHN_UNDEF = 0 /* Undefined section */
+
+	_SHT_DYNSYM = 11 /* Dynamic linker symbol table */
+
+	_STT_FUNC = 2 /* Symbol is a code object */
+
+	_STB_GLOBAL = 1 /* Global symbol */
+	_STB_WEAK   = 2 /* Weak symbol */
+
+	_EI_NIDENT = 16
+)
+
+/* How to extract and insert information held in the st_info field.  */
+func _ELF64_ST_BIND(val byte) byte { return val >> 4 }
+func _ELF64_ST_TYPE(val byte) byte { return val & 0xf }
+
+type elf64Sym struct {
+	st_name  uint32
+	st_info  byte
+	st_other byte
+	st_shndx uint16
+	st_value uint64
+	st_size  uint64
+}
+
+type elf64Verdef struct {
+	vd_version uint16 /* Version revision */
+	vd_flags   uint16 /* Version information */
+	vd_ndx     uint16 /* Version Index */
+	vd_cnt     uint16 /* Number of associated aux entries */
+	vd_hash    uint32 /* Version name hash value */
+	vd_aux     uint32 /* Offset in bytes to verdaux array */
+	vd_next    uint32 /* Offset in bytes to next verdef entry */
+}
+
+type elf64Ehdr struct {
+	e_ident     [_EI_NIDENT]byte /* Magic number and other info */
+	e_type      uint16           /* Object file type */
+	e_machine   uint16           /* Architecture */
+	e_version   uint32           /* Object file version */
+	e_entry     uint64           /* Entry point virtual address */
+	e_phoff     uint64           /* Program header table file offset */
+	e_shoff     uint64           /* Section header table file offset */
+	e_flags     uint32           /* Processor-specific flags */
+	e_ehsize    uint16           /* ELF header size in bytes */
+	e_phentsize uint16           /* Program header table entry size */
+	e_phnum     uint16           /* Program header table entry count */
+	e_shentsize uint16           /* Section header table entry size */
+	e_shnum     uint16           /* Section header table entry count */
+	e_shstrndx  uint16           /* Section header string table index */
+}
+
+type elf64Phdr struct {
+	p_type   uint32 /* Segment type */
+	p_flags  uint32 /* Segment flags */
+	p_offset uint64 /* Segment file offset */
+	p_vaddr  uint64 /* Segment virtual address */
+	p_paddr  uint64 /* Segment physical address */
+	p_filesz uint64 /* Segment size in file */
+	p_memsz  uint64 /* Segment size in memory */
+	p_align  uint64 /* Segment alignment */
+}
+
+type elf64Shdr struct {
+	sh_name      uint32 /* Section name (string tbl index) */
+	sh_type      uint32 /* Section type */
+	sh_flags     uint64 /* Section flags */
+	sh_addr      uint64 /* Section virtual addr at execution */
+	sh_offset    uint64 /* Section file offset */
+	sh_size      uint64 /* Section size in bytes */
+	sh_link      uint32 /* Link to another section */
+	sh_info      uint32 /* Additional section information */
+	sh_addralign uint64 /* Section alignment */
+	sh_entsize   uint64 /* Entry size if section holds table */
+}
+
+type elf64Dyn struct {
+	d_tag int64  /* Dynamic entry type */
+	d_val uint64 /* Integer value */
+}
+
+type elf64Verdaux struct {
+	vda_name uint32 /* Version or dependency names */
+	vda_next uint32 /* Offset in bytes to next verdaux entry */
+}
+
+type elf64Auxv struct {
+	a_type uint64 /* Entry type */
+	a_val  uint64 /* Integer value */
+}
+
+type symbol_key struct {
+	name     string
+	sym_hash uint32
+	ptr      *uintptr
+}
+
+type version_key struct {
+	version  string
+	ver_hash uint32
+}
+
+type vdso_info struct {
+	valid bool
+
+	/* Load information */
+	load_addr   uintptr
+	load_offset uintptr /* load_addr - recorded vaddr */
+
+	/* Symbol table */
+	symtab     *[1 << 32]elf64Sym
+	symstrings *[1 << 32]byte
+	chain      []uint32
+	bucket     []uint32
+
+	/* Version table */
+	versym *[1 << 32]uint16
+	verdef *elf64Verdef
+}
+
+var linux26 = version_key{"LINUX_2.6", 0x3ae75f6}
+
+var sym_keys = []symbol_key{
+	{"__vdso_time", 0xa33c485, &__vdso_time_sym},
+	{"__vdso_gettimeofday", 0x315ca59, &__vdso_gettimeofday_sym},
+	{"__vdso_clock_gettime", 0xd35ec75, &__vdso_clock_gettime_sym},
+}
+
+// initialize with vsyscall fallbacks
+var (
+	__vdso_time_sym          uintptr = 0xffffffffff600400
+	__vdso_gettimeofday_sym  uintptr = 0xffffffffff600000
+	__vdso_clock_gettime_sym uintptr = 0
+)
+
+func vdso_init_from_sysinfo_ehdr(info *vdso_info, hdr *elf64Ehdr) {
+	info.valid = false
+	info.load_addr = uintptr(unsafe.Pointer(hdr))
+
+	pt := unsafe.Pointer(info.load_addr + uintptr(hdr.e_phoff))
+
+	// We need two things from the segment table: the load offset
+	// and the dynamic table.
+	var found_vaddr bool
+	var dyn *[1 << 20]elf64Dyn
+	for i := uint16(0); i < hdr.e_phnum; i++ {
+		pt := (*elf64Phdr)(add(pt, uintptr(i)*unsafe.Sizeof(elf64Phdr{})))
+		switch pt.p_type {
+		case _PT_LOAD:
+			if !found_vaddr {
+				found_vaddr = true
+				info.load_offset = info.load_addr + uintptr(pt.p_offset-pt.p_vaddr)
+			}
+
+		case _PT_DYNAMIC:
+			dyn = (*[1 << 20]elf64Dyn)(unsafe.Pointer(info.load_addr + uintptr(pt.p_offset)))
+		}
+	}
+
+	if !found_vaddr || dyn == nil {
+		return // Failed
+	}
+
+	// Fish out the useful bits of the dynamic table.
+
+	var hash *[1 << 30]uint32
+	hash = nil
+	info.symstrings = nil
+	info.symtab = nil
+	info.versym = nil
+	info.verdef = nil
+	for i := 0; dyn[i].d_tag != _DT_NULL; i++ {
+		dt := &dyn[i]
+		p := info.load_offset + uintptr(dt.d_val)
+		switch dt.d_tag {
+		case _DT_STRTAB:
+			info.symstrings = (*[1 << 32]byte)(unsafe.Pointer(p))
+		case _DT_SYMTAB:
+			info.symtab = (*[1 << 32]elf64Sym)(unsafe.Pointer(p))
+		case _DT_HASH:
+			hash = (*[1 << 30]uint32)(unsafe.Pointer(p))
+		case _DT_VERSYM:
+			info.versym = (*[1 << 32]uint16)(unsafe.Pointer(p))
+		case _DT_VERDEF:
+			info.verdef = (*elf64Verdef)(unsafe.Pointer(p))
+		}
+	}
+
+	if info.symstrings == nil || info.symtab == nil || hash == nil {
+		return // Failed
+	}
+
+	if info.verdef == nil {
+		info.versym = nil
+	}
+
+	// Parse the hash table header.
+	nbucket := hash[0]
+	nchain := hash[1]
+	info.bucket = hash[2 : 2+nbucket]
+	info.chain = hash[2+nbucket : 2+nbucket+nchain]
+
+	// That's all we need.
+	info.valid = true
+}
+
+func vdso_find_version(info *vdso_info, ver *version_key) int32 {
+	if !info.valid {
+		return 0
+	}
+
+	def := info.verdef
+	for {
+		if def.vd_flags&_VER_FLG_BASE == 0 {
+			aux := (*elf64Verdaux)(add(unsafe.Pointer(def), uintptr(def.vd_aux)))
+			if def.vd_hash == ver.ver_hash && ver.version == gostringnocopy(&info.symstrings[aux.vda_name]) {
+				return int32(def.vd_ndx & 0x7fff)
+			}
+		}
+
+		if def.vd_next == 0 {
+			break
+		}
+		def = (*elf64Verdef)(add(unsafe.Pointer(def), uintptr(def.vd_next)))
+	}
+
+	return -1 // can not match any version
+}
+
+func vdso_parse_symbols(info *vdso_info, version int32) {
+	if !info.valid {
+		return
+	}
+
+	for _, k := range sym_keys {
+		for chain := info.bucket[k.sym_hash%uint32(len(info.bucket))]; chain != 0; chain = info.chain[chain] {
+			sym := &info.symtab[chain]
+			typ := _ELF64_ST_TYPE(sym.st_info)
+			bind := _ELF64_ST_BIND(sym.st_info)
+			if typ != _STT_FUNC || bind != _STB_GLOBAL && bind != _STB_WEAK || sym.st_shndx == _SHN_UNDEF {
+				continue
+			}
+			if k.name != gostringnocopy(&info.symstrings[sym.st_name]) {
+				continue
+			}
+
+			// Check symbol version.
+			if info.versym != nil && version != 0 && int32(info.versym[chain]&0x7fff) != version {
+				continue
+			}
+
+			*k.ptr = info.load_offset + uintptr(sym.st_value)
+			break
+		}
+	}
+}
+
+func sysargs(argc int32, argv **byte) {
+	n := argc + 1
+
+	// skip envp to get to ELF auxiliary vector.
+	for argv_index(argv, n) != nil {
+		n++
+	}
+
+	// skip NULL separator
+	n++
+
+	// now argv+n is auxv
+	auxv := (*[1 << 32]elf64Auxv)(add(unsafe.Pointer(argv), uintptr(n)*ptrSize))
+
+	for i := 0; auxv[i].a_type != _AT_NULL; i++ {
+		av := &auxv[i]
+		switch av.a_type {
+		case _AT_SYSINFO_EHDR:
+			if av.a_val == 0 {
+				// Something went wrong
+				continue
+			}
+			var info vdso_info
+			// TODO(rsc): I don't understand why the compiler thinks info escapes
+			// when passed to the three functions below.
+			info1 := (*vdso_info)(noescape(unsafe.Pointer(&info)))
+			vdso_init_from_sysinfo_ehdr(info1, (*elf64Ehdr)(unsafe.Pointer(uintptr(av.a_val))))
+			vdso_parse_symbols(info1, vdso_find_version(info1, &linux26))
+
+		case _AT_RANDOM:
+			startup_random_data = (*byte)(unsafe.Pointer(uintptr(av.a_val)))
+			startup_random_data_len = 16
+		}
+	}
+}
diff --git a/src/runtime/vdso_none.go b/src/runtime/vdso_none.go
new file mode 100644
index 0000000..6f83ecc
--- /dev/null
+++ b/src/runtime/vdso_none.go
@@ -0,0 +1,11 @@
+// Copyright 2014 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !linux !amd64
+// +build !linux !386
+
+package runtime
+
+func sysargs(argc int32, argv **byte) {
+}
diff --git a/src/runtime/vlop_arm.s b/src/runtime/vlop_arm.s
index b4b905b..5354bf9 100644
--- a/src/runtime/vlop_arm.s
+++ b/src/runtime/vlop_arm.s
@@ -23,7 +23,8 @@
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 
-#include "zasm_GOOS_GOARCH.h"
+#include "go_asm.h"
+#include "go_tls.h"
 #include "textflag.h"
 
 arg=0
@@ -100,7 +101,7 @@
 // load the signal fault address into LR, and jump
 // to the real sigpanic.
 // This simulates what sighandler does for a memory fault.
-TEXT _sfloatpanic(SB),NOSPLIT,$-4
+TEXT runtime·_sfloatpanic(SB),NOSPLIT,$-4
 	MOVW	$0, R0
 	MOVW.W	R0, -4(R13)
 	MOVW	g_sigpc(g), LR
diff --git a/src/runtime/vlrt.c b/src/runtime/vlrt.c
deleted file mode 100644
index cb0d147..0000000
--- a/src/runtime/vlrt.c
+++ /dev/null
@@ -1,914 +0,0 @@
-// Inferno's libkern/vlrt-386.c
-// http://code.google.com/p/inferno-os/source/browse/libkern/vlrt-386.c
-//
-//         Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
-//         Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com).  All rights reserved.
-//         Portions Copyright 2009 The Go Authors. All rights reserved.
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-// The above copyright notice and this permission notice shall be included in
-// all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE.
-
-// +build arm 386
-
-#include "textflag.h"
-
-/*
- * C runtime for 64-bit divide, others.
- *
- * TODO(rsc): The simple functions are dregs--8c knows how
- * to generate the code directly now.  Find and remove.
- */
-
-void	runtime·panicdivide(void);
-
-typedef	unsigned long	ulong;
-typedef	unsigned int	uint;
-typedef	unsigned short	ushort;
-typedef	unsigned char	uchar;
-typedef	signed char	schar;
-
-#define	SIGN(n)	(1UL<<(n-1))
-
-typedef	struct	Vlong	Vlong;
-struct	Vlong
-{
-	ulong	lo;
-	ulong	hi;
-};
-
-typedef	union	Vlong64	Vlong64;
-union	Vlong64
-{
-	long long	v;
-	Vlong	v2;
-};
-
-void	runtime·abort(void);
-
-#pragma textflag NOSPLIT
-Vlong
-_addv(Vlong a, Vlong b)
-{
-	Vlong r;
-
-	r.lo = a.lo + b.lo;
-	r.hi = a.hi + b.hi;
-	if(r.lo < a.lo)
-		r.hi++;
-	return r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_subv(Vlong a, Vlong b)
-{
-	Vlong r;
-
-	r.lo = a.lo - b.lo;
-	r.hi = a.hi - b.hi;
-	if(r.lo > a.lo)
-		r.hi--;
-	return r;
-}
-
-Vlong
-_d2v(double d)
-{
-	union { double d; Vlong vl; } x;
-	ulong xhi, xlo, ylo, yhi;
-	int sh;
-	Vlong y;
-
-	x.d = d;
-
-	xhi = (x.vl.hi & 0xfffff) | 0x100000;
-	xlo = x.vl.lo;
-	sh = 1075 - ((x.vl.hi >> 20) & 0x7ff);
-
-	ylo = 0;
-	yhi = 0;
-	if(sh >= 0) {
-		/* v = (hi||lo) >> sh */
-		if(sh < 32) {
-			if(sh == 0) {
-				ylo = xlo;
-				yhi = xhi;
-			} else {
-				ylo = (xlo >> sh) | (xhi << (32-sh));
-				yhi = xhi >> sh;
-			}
-		} else {
-			if(sh == 32) {
-				ylo = xhi;
-			} else
-			if(sh < 64) {
-				ylo = xhi >> (sh-32);
-			}
-		}
-	} else {
-		/* v = (hi||lo) << -sh */
-		sh = -sh;
-		if(sh <= 10) { /* NOTE: sh <= 11 on ARM??? */
-			ylo = xlo << sh;
-			yhi = (xhi << sh) | (xlo >> (32-sh));
-		} else {
-			/* overflow */
-			yhi = d;	/* causes something awful */
-		}
-	}
-	if(x.vl.hi & SIGN(32)) {
-		if(ylo != 0) {
-			ylo = -ylo;
-			yhi = ~yhi;
-		} else
-			yhi = -yhi;
-	}
-
-	y.hi = yhi;
-	y.lo = ylo;
-	return y;
-}
-
-Vlong
-_f2v(float f)
-{
-	return _d2v(f);
-}
-
-double
-_ul2d(ulong u)
-{
-	// compensate for bug in c
-	if(u & SIGN(32)) {
-		u ^= SIGN(32);
-		return 2147483648. + u;
-	}
-	return u;
-}
-
-double
-_v2d(Vlong x)
-{
-	if(x.hi & SIGN(32)) {
-		if(x.lo) {
-			x.lo = -x.lo;
-			x.hi = ~x.hi;
-		} else
-			x.hi = -x.hi;
-		return -(_ul2d(x.hi)*4294967296. + _ul2d(x.lo));
-	}
-	return (long)x.hi*4294967296. + x.lo;
-}
-
-float
-_v2f(Vlong x)
-{
-	return _v2d(x);
-}
-
-ulong	runtime·_div64by32(Vlong, ulong, ulong*);
-int	runtime·_mul64by32(Vlong*, Vlong, ulong);
-
-static void
-slowdodiv(Vlong num, Vlong den, Vlong *q, Vlong *r)
-{
-	ulong numlo, numhi, denhi, denlo, quohi, quolo, t;
-	int i;
-
-	numhi = num.hi;
-	numlo = num.lo;
-	denhi = den.hi;
-	denlo = den.lo;
-
-	/*
-	 * get a divide by zero
-	 */
-	if(denlo==0 && denhi==0) {
-		runtime·panicdivide();
-	}
-
-	/*
-	 * set up the divisor and find the number of iterations needed
-	 */
-	if(numhi >= SIGN(32)) {
-		quohi = SIGN(32);
-		quolo = 0;
-	} else {
-		quohi = numhi;
-		quolo = numlo;
-	}
-	i = 0;
-	while(denhi < quohi || (denhi == quohi && denlo < quolo)) {
-		denhi = (denhi<<1) | (denlo>>31);
-		denlo <<= 1;
-		i++;
-	}
-
-	quohi = 0;
-	quolo = 0;
-	for(; i >= 0; i--) {
-		quohi = (quohi<<1) | (quolo>>31);
-		quolo <<= 1;
-		if(numhi > denhi || (numhi == denhi && numlo >= denlo)) {
-			t = numlo;
-			numlo -= denlo;
-			if(numlo > t)
-				numhi--;
-			numhi -= denhi;
-			quolo |= 1;
-		}
-		denlo = (denlo>>1) | (denhi<<31);
-		denhi >>= 1;
-	}
-
-	if(q) {
-		q->lo = quolo;
-		q->hi = quohi;
-	}
-	if(r) {
-		r->lo = numlo;
-		r->hi = numhi;
-	}
-}
-
-#ifdef GOARCH_arm
-static void
-dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
-{
-	slowdodiv(num, den, qp, rp);
-}
-#endif
-
-#ifdef GOARCH_386
-static void
-dodiv(Vlong num, Vlong den, Vlong *qp, Vlong *rp)
-{
-	ulong n;
-	Vlong x, q, r;
-	
-	if(den.hi > num.hi || (den.hi == num.hi && den.lo > num.lo)){
-		if(qp) {
-			qp->hi = 0;
-			qp->lo = 0;
-		}
-		if(rp) {
-			rp->hi = num.hi;
-			rp->lo = num.lo;
-		}
-		return;
-	}
-
-	if(den.hi != 0){
-		q.hi = 0;
-		n = num.hi/den.hi;
-		if(runtime·_mul64by32(&x, den, n) || x.hi > num.hi || (x.hi == num.hi && x.lo > num.lo))
-			slowdodiv(num, den, &q, &r);
-		else {
-			q.lo = n;
-			*(long long*)&r = *(long long*)&num - *(long long*)&x;
-		}
-	} else {
-		if(num.hi >= den.lo){
-			if(den.lo == 0)
-				runtime·panicdivide();
-			q.hi = n = num.hi/den.lo;
-			num.hi -= den.lo*n;
-		} else {
-			q.hi = 0;
-		}
-		q.lo = runtime·_div64by32(num, den.lo, &r.lo);
-		r.hi = 0;
-	}
-	if(qp) {
-		qp->lo = q.lo;
-		qp->hi = q.hi;
-	}
-	if(rp) {
-		rp->lo = r.lo;
-		rp->hi = r.hi;
-	}
-}
-#endif
-
-Vlong
-_divvu(Vlong n, Vlong d)
-{
-	Vlong q;
-
-	if(n.hi == 0 && d.hi == 0) {
-		if(d.lo == 0)
-			runtime·panicdivide();
-		q.hi = 0;
-		q.lo = n.lo / d.lo;
-		return q;
-	}
-	dodiv(n, d, &q, 0);
-	return q;
-}
-
-Vlong
-_modvu(Vlong n, Vlong d)
-{
-	Vlong r;
-
-	if(n.hi == 0 && d.hi == 0) {
-		if(d.lo == 0)
-			runtime·panicdivide();
-		r.hi = 0;
-		r.lo = n.lo % d.lo;
-		return r;
-	}
-	dodiv(n, d, 0, &r);
-	return r;
-}
-
-static void
-vneg(Vlong *v)
-{
-
-	if(v->lo == 0) {
-		v->hi = -v->hi;
-		return;
-	}
-	v->lo = -v->lo;
-	v->hi = ~v->hi;
-}
-
-Vlong
-_divv(Vlong n, Vlong d)
-{
-	long nneg, dneg;
-	Vlong q;
-
-	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
-		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
-			// special case: 32-bit -0x80000000 / -1 causes divide error,
-			// but it's okay in this 64-bit context.
-			q.lo = 0x80000000;
-			q.hi = 0;
-			return q;
-		}
-		if(d.lo == 0)
-			runtime·panicdivide();
-		q.lo = (long)n.lo / (long)d.lo;
-		q.hi = ((long)q.lo) >> 31;
-		return q;
-	}
-	nneg = n.hi >> 31;
-	if(nneg)
-		vneg(&n);
-	dneg = d.hi >> 31;
-	if(dneg)
-		vneg(&d);
-	dodiv(n, d, &q, 0);
-	if(nneg != dneg)
-		vneg(&q);
-	return q;
-}
-
-Vlong
-_modv(Vlong n, Vlong d)
-{
-	long nneg, dneg;
-	Vlong r;
-
-	if(n.hi == (((long)n.lo)>>31) && d.hi == (((long)d.lo)>>31)) {
-		if((long)n.lo == -0x80000000 && (long)d.lo == -1) {
-			// special case: 32-bit -0x80000000 % -1 causes divide error,
-			// but it's okay in this 64-bit context.
-			r.lo = 0;
-			r.hi = 0;
-			return r;
-		}
-		if(d.lo == 0)
-			runtime·panicdivide();
-		r.lo = (long)n.lo % (long)d.lo;
-		r.hi = ((long)r.lo) >> 31;
-		return r;
-	}
-	nneg = n.hi >> 31;
-	if(nneg)
-		vneg(&n);
-	dneg = d.hi >> 31;
-	if(dneg)
-		vneg(&d);
-	dodiv(n, d, 0, &r);
-	if(nneg)
-		vneg(&r);
-	return r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_rshav(Vlong a, int b)
-{
-	long t;
-	Vlong r;
-
-	t = a.hi;
-	if(b >= 32) {
-		r.hi = t>>31;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r.lo = t>>31;
-			return r;
-		}
-		r.lo = t >> (b-32);
-		return r;
-	}
-	if(b <= 0) {
-		r.hi = t;
-		r.lo = a.lo;
-		return r;
-	}
-	r.hi = t >> b;
-	r.lo = (t << (32-b)) | (a.lo >> b);
-	return r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_rshlv(Vlong a, int b)
-{
-	ulong t;
-	Vlong r;
-
-	t = a.hi;
-	if(b >= 32) {
-		r.hi = 0;
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			r.lo = 0;
-			return r;
-		}
-		r.lo = t >> (b-32);
-		return r;
-	}
-	if(b <= 0) {
-		r.hi = t;
-		r.lo = a.lo;
-		return r;
-	}
-	r.hi = t >> b;
-	r.lo = (t << (32-b)) | (a.lo >> b);
-	return r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_lshv(Vlong a, int b)
-{
-	ulong t;
-
-	t = a.lo;
-	if(b >= 32) {
-		if(b >= 64) {
-			/* this is illegal re C standard */
-			return (Vlong){0, 0};
-		}
-		return (Vlong){0, t<<(b-32)};
-	}
-	if(b <= 0) {
-		return (Vlong){t, a.hi};
-	}
-	return (Vlong){t<<b, (t >> (32-b)) | (a.hi << b)};
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_andv(Vlong a, Vlong b)
-{
-	Vlong r;
-
-	r.hi = a.hi & b.hi;
-	r.lo = a.lo & b.lo;
-	return r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_orv(Vlong a, Vlong b)
-{
-	Vlong r;
-
-	r.hi = a.hi | b.hi;
-	r.lo = a.lo | b.lo;
-	return r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_xorv(Vlong a, Vlong b)
-{
-	Vlong r;
-
-	r.hi = a.hi ^ b.hi;
-	r.lo = a.lo ^ b.lo;
-	return r;
-}
-
-Vlong
-_vpp(Vlong *r)
-{
-	Vlong l;
-
-	l = *r;
-	r->lo++;
-	if(r->lo == 0)
-		r->hi++;
-	return l;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_vmm(Vlong *r)
-{
-	Vlong l;
-
-	l = *r;
-	if(r->lo == 0)
-		r->hi--;
-	r->lo--;
-	return l;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_ppv(Vlong *r)
-{
-
-	r->lo++;
-	if(r->lo == 0)
-		r->hi++;
-	return *r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_mmv(Vlong *r)
-{
-
-	if(r->lo == 0)
-		r->hi--;
-	r->lo--;
-	return *r;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_vasop(void *lv, Vlong fn(Vlong, Vlong), int type, Vlong rv)
-{
-	Vlong t, u;
-
-	u.lo = 0;
-	u.hi = 0;
-	switch(type) {
-	default:
-		runtime·abort();
-		break;
-
-	case 1:	/* schar */
-		t.lo = *(schar*)lv;
-		t.hi = t.lo >> 31;
-		u = fn(t, rv);
-		*(schar*)lv = u.lo;
-		break;
-
-	case 2:	/* uchar */
-		t.lo = *(uchar*)lv;
-		t.hi = 0;
-		u = fn(t, rv);
-		*(uchar*)lv = u.lo;
-		break;
-
-	case 3:	/* short */
-		t.lo = *(short*)lv;
-		t.hi = t.lo >> 31;
-		u = fn(t, rv);
-		*(short*)lv = u.lo;
-		break;
-
-	case 4:	/* ushort */
-		t.lo = *(ushort*)lv;
-		t.hi = 0;
-		u = fn(t, rv);
-		*(ushort*)lv = u.lo;
-		break;
-
-	case 9:	/* int */
-		t.lo = *(int*)lv;
-		t.hi = t.lo >> 31;
-		u = fn(t, rv);
-		*(int*)lv = u.lo;
-		break;
-
-	case 10:	/* uint */
-		t.lo = *(uint*)lv;
-		t.hi = 0;
-		u = fn(t, rv);
-		*(uint*)lv = u.lo;
-		break;
-
-	case 5:	/* long */
-		t.lo = *(long*)lv;
-		t.hi = t.lo >> 31;
-		u = fn(t, rv);
-		*(long*)lv = u.lo;
-		break;
-
-	case 6:	/* ulong */
-		t.lo = *(ulong*)lv;
-		t.hi = 0;
-		u = fn(t, rv);
-		*(ulong*)lv = u.lo;
-		break;
-
-	case 7:	/* vlong */
-	case 8:	/* uvlong */
-		if((void*)fn == _lshv || (void*)fn == _rshav || (void*)fn == _rshlv)
-			u = ((Vlong(*)(Vlong,int))fn)(*(Vlong*)lv, *(int*)&rv);
-		else
-			u = fn(*(Vlong*)lv, rv);
-		*(Vlong*)lv = u;
-		break;
-	}
-	return u;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_p2v(void *p)
-{
-	long t;
-	Vlong ret;
-
-	t = (ulong)p;
-	ret.lo = t;
-	ret.hi = 0;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_sl2v(long sl)
-{
-	long t;
-	Vlong ret;
-
-	t = sl;
-	ret.lo = t;
-	ret.hi = t >> 31;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_ul2v(ulong ul)
-{
-	long t;
-	Vlong ret;
-
-	t = ul;
-	ret.lo = t;
-	ret.hi = 0;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_si2v(int si)
-{
-	return (Vlong){si, si>>31};
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_ui2v(uint ui)
-{
-	long t;
-	Vlong ret;
-
-	t = ui;
-	ret.lo = t;
-	ret.hi = 0;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_sh2v(long sh)
-{
-	long t;
-	Vlong ret;
-
-	t = (sh << 16) >> 16;
-	ret.lo = t;
-	ret.hi = t >> 31;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_uh2v(ulong ul)
-{
-	long t;
-	Vlong ret;
-
-	t = ul & 0xffff;
-	ret.lo = t;
-	ret.hi = 0;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_sc2v(long uc)
-{
-	long t;
-	Vlong ret;
-
-	t = (uc << 24) >> 24;
-	ret.lo = t;
-	ret.hi = t >> 31;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-Vlong
-_uc2v(ulong ul)
-{
-	long t;
-	Vlong ret;
-
-	t = ul & 0xff;
-	ret.lo = t;
-	ret.hi = 0;
-	return ret;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2sc(Vlong rv)
-{
-	long t;
-
-	t = rv.lo & 0xff;
-	return (t << 24) >> 24;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2uc(Vlong rv)
-{
-
-	return rv.lo & 0xff;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2sh(Vlong rv)
-{
-	long t;
-
-	t = rv.lo & 0xffff;
-	return (t << 16) >> 16;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2uh(Vlong rv)
-{
-
-	return rv.lo & 0xffff;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2sl(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2ul(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2si(Vlong rv)
-{
-	return rv.lo;
-}
-
-#pragma textflag NOSPLIT
-long
-_v2ui(Vlong rv)
-{
-
-	return rv.lo;
-}
-
-#pragma textflag NOSPLIT
-int
-_testv(Vlong rv)
-{
-	return rv.lo || rv.hi;
-}
-
-#pragma textflag NOSPLIT
-int
-_eqv(Vlong lv, Vlong rv)
-{
-	return lv.lo == rv.lo && lv.hi == rv.hi;
-}
-
-#pragma textflag NOSPLIT
-int
-_nev(Vlong lv, Vlong rv)
-{
-	return lv.lo != rv.lo || lv.hi != rv.hi;
-}
-
-#pragma textflag NOSPLIT
-int
-_ltv(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi < (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo < rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_lev(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi < (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo <= rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_gtv(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi > (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo > rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_gev(Vlong lv, Vlong rv)
-{
-	return (long)lv.hi > (long)rv.hi ||
-		(lv.hi == rv.hi && lv.lo >= rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_lov(Vlong lv, Vlong rv)
-{
-	return lv.hi < rv.hi ||
-		(lv.hi == rv.hi && lv.lo < rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_lsv(Vlong lv, Vlong rv)
-{
-	return lv.hi < rv.hi ||
-		(lv.hi == rv.hi && lv.lo <= rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_hiv(Vlong lv, Vlong rv)
-{
-	return lv.hi > rv.hi ||
-		(lv.hi == rv.hi && lv.lo > rv.lo);
-}
-
-#pragma textflag NOSPLIT
-int
-_hsv(Vlong lv, Vlong rv)
-{
-	return lv.hi > rv.hi ||
-		(lv.hi == rv.hi && lv.lo >= rv.lo);
-}