runtime: size arena to fit in virtual address space limit

For Brad.
Now FreeBSD/386 binaries run on nearlyfreespeech.net.

Fixes #2302.

R=golang-dev, r
CC=golang-dev
https://golang.org/cl/5700060
diff --git a/src/pkg/runtime/malloc.goc b/src/pkg/runtime/malloc.goc
index 932e3d9..af03f80 100644
--- a/src/pkg/runtime/malloc.goc
+++ b/src/pkg/runtime/malloc.goc
@@ -262,6 +262,7 @@
 	uintptr arena_size, bitmap_size;
 	extern byte end[];
 	byte *want;
+	uintptr limit;
 
 	p = nil;
 	arena_size = 0;
@@ -274,10 +275,12 @@
 
 	runtime·InitSizes();
 
+	limit = runtime·memlimit();
+
 	// Set up the allocation arena, a contiguous area of memory where
 	// allocated data will be found.  The arena begins with a bitmap large
 	// enough to hold 4 bits per allocated word.
-	if(sizeof(void*) == 8) {
+	if(sizeof(void*) == 8 && (limit == 0 || limit > (1<<30))) {
 		// On a 64-bit machine, allocate from a single contiguous reservation.
 		// 16 GB should be big enough for now.
 		//
@@ -326,6 +329,10 @@
 		// of address space, which is probably too much in a 32-bit world.
 		bitmap_size = MaxArena32 / (sizeof(void*)*8/4);
 		arena_size = 512<<20;
+		if(limit > 0 && arena_size+bitmap_size > limit) {
+			bitmap_size = (limit / 9) & ~((1<<PageShift) - 1);
+			arena_size = bitmap_size * 8;
+		}
 		
 		// SysReserve treats the address we ask for, end, as a hint,
 		// not as an absolute requirement.  If we ask for the end
@@ -340,6 +347,8 @@
 		p = runtime·SysReserve(want, bitmap_size + arena_size);
 		if(p == nil)
 			runtime·throw("runtime: cannot reserve arena virtual address space");
+		if((uintptr)p & (((uintptr)1<<PageShift)-1))
+			runtime·printf("runtime: SysReserve returned unaligned address %p; asked for %p", p, bitmap_size+arena_size);
 	}
 	if((uintptr)p & (((uintptr)1<<PageShift)-1))
 		runtime·throw("runtime: SysReserve returned unaligned address");
diff --git a/src/pkg/runtime/os_freebsd.h b/src/pkg/runtime/os_freebsd.h
index 194d963..da1d8de 100644
--- a/src/pkg/runtime/os_freebsd.h
+++ b/src/pkg/runtime/os_freebsd.h
@@ -17,3 +17,11 @@
 
 #define	NSIG 33
 #define	SI_USER	0
+
+#define RLIMIT_AS 10
+typedef struct Rlimit Rlimit;
+struct Rlimit {
+	int64	rlim_cur;
+	int64	rlim_max;
+};
+int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/pkg/runtime/os_linux.h b/src/pkg/runtime/os_linux.h
index ab948dd..87daa3b 100644
--- a/src/pkg/runtime/os_linux.h
+++ b/src/pkg/runtime/os_linux.h
@@ -33,3 +33,11 @@
 };
 void	runtime·rtsigprocmask(int32, Sigset*, Sigset*, int32);
 #define SIG_SETMASK 2
+
+#define RLIMIT_AS 9
+typedef struct Rlimit Rlimit;
+struct Rlimit {
+	uintptr	rlim_cur;
+	uintptr	rlim_max;
+};
+int32	runtime·getrlimit(int32, Rlimit*);
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 1f4407a..3b0f505 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -729,3 +729,4 @@
 
 void	runtime·ifaceE2I(struct InterfaceType*, Eface, Iface*);
 
+uintptr	runtime·memlimit(void);
diff --git a/src/pkg/runtime/sys_freebsd_386.s b/src/pkg/runtime/sys_freebsd_386.s
index 0e03eac..aab4444 100644
--- a/src/pkg/runtime/sys_freebsd_386.s
+++ b/src/pkg/runtime/sys_freebsd_386.s
@@ -60,6 +60,11 @@
 	INT	$0x80
 	RET
 
+TEXT runtime·getrlimit(SB),7,$-4
+	MOVL	$194, AX
+	INT	$0x80
+	RET
+
 TEXT runtime·raisesigpipe(SB),7,$12
 	// thr_self(&8(SP))
 	LEAL	8(SP), AX
diff --git a/src/pkg/runtime/sys_freebsd_amd64.s b/src/pkg/runtime/sys_freebsd_amd64.s
index 8021a42..3984ef4 100644
--- a/src/pkg/runtime/sys_freebsd_amd64.s
+++ b/src/pkg/runtime/sys_freebsd_amd64.s
@@ -65,6 +65,13 @@
 	SYSCALL
 	RET
 
+TEXT runtime·getrlimit(SB),7,$-8
+	MOVL	8(SP), DI
+	MOVQ	16(SP), SI
+	MOVL	$194, AX
+	SYSCALL
+	RET
+
 TEXT runtime·raisesigpipe(SB),7,$16
 	// thr_self(&8(SP))
 	LEAQ	8(SP), DI	// arg 1 &8(SP)
diff --git a/src/pkg/runtime/sys_linux_386.s b/src/pkg/runtime/sys_linux_386.s
index 32a18ed..b4cefc5 100644
--- a/src/pkg/runtime/sys_linux_386.s
+++ b/src/pkg/runtime/sys_linux_386.s
@@ -52,6 +52,13 @@
 	CALL	*runtime·_vdso(SB)
 	RET
 
+TEXT runtime·getrlimit(SB),7,$0
+	MOVL	$191, AX		// syscall - ugetrlimit
+	MOVL	4(SP), BX
+	MOVL	8(SP), CX
+	CALL	*runtime·_vdso(SB)
+	RET
+
 TEXT runtime·usleep(SB),7,$8
 	MOVL	$0, DX
 	MOVL	usec+0(FP), AX
diff --git a/src/pkg/runtime/sys_linux_amd64.s b/src/pkg/runtime/sys_linux_amd64.s
index 84972b4..0de5b2a 100644
--- a/src/pkg/runtime/sys_linux_amd64.s
+++ b/src/pkg/runtime/sys_linux_amd64.s
@@ -50,6 +50,13 @@
 	SYSCALL
 	RET
 
+TEXT runtime·getrlimit(SB),7,$0-24
+	MOVL	8(SP), DI
+	MOVQ	16(SP), SI
+	MOVL	$97, AX			// syscall entry
+	SYSCALL
+	RET
+
 TEXT runtime·usleep(SB),7,$16
 	MOVL	$0, DX
 	MOVL	usec+0(FP), AX
diff --git a/src/pkg/runtime/sys_linux_arm.s b/src/pkg/runtime/sys_linux_arm.s
index 26101d7..439df3a 100644
--- a/src/pkg/runtime/sys_linux_arm.s
+++ b/src/pkg/runtime/sys_linux_arm.s
@@ -33,6 +33,7 @@
 #define SYS_tkill (SYS_BASE + 238)
 #define SYS_sched_yield (SYS_BASE + 158)
 #define SYS_select (SYS_BASE + 142) // newselect
+#define SYS_ugetrlimit (SYS_BASE + 191)
 
 #define ARM_BASE (SYS_BASE + 0x0f0000)
 #define SYS_ARM_cacheflush (ARM_BASE + 2)
@@ -72,6 +73,13 @@
 	SWI	$0
 	RET
 
+TEXT runtime·getrlimit(SB),7,$0
+	MOVW	0(FP), R0
+	MOVW	4(FP), R1
+	MOVW	$SYS_ugetrlimit, R7
+	SWI	$0
+	RET
+
 TEXT runtime·exit(SB),7,$-4
 	MOVW	0(FP), R0
 	MOVW	$SYS_exit_group, R7
diff --git a/src/pkg/runtime/thread_darwin.c b/src/pkg/runtime/thread_darwin.c
index 42fb770..d170dfb 100644
--- a/src/pkg/runtime/thread_darwin.c
+++ b/src/pkg/runtime/thread_darwin.c
@@ -424,3 +424,13 @@
 runtime·osyield(void)
 {
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	// NOTE(rsc): Could use getrlimit here,
+	// like on FreeBSD or Linux, but Darwin doesn't enforce
+	// ulimit -v, so it's unclear why we'd try to stay within
+	// the limit.
+	return 0;
+}
diff --git a/src/pkg/runtime/thread_freebsd.c b/src/pkg/runtime/thread_freebsd.c
index 04de037..7871827 100644
--- a/src/pkg/runtime/thread_freebsd.c
+++ b/src/pkg/runtime/thread_freebsd.c
@@ -161,3 +161,31 @@
 	}
 	runtime·panicstring(runtime·sigtab[g->sig].name);
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	Rlimit rl;
+	extern byte text[], end[];
+	uintptr used;
+	
+	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+		return 0;
+	if(rl.rlim_cur >= 0x7fffffff)
+		return 0;
+
+	// Estimate our VM footprint excluding the heap.
+	// Not an exact science: use size of binary plus
+	// some room for thread stacks.
+	used = end - text + (64<<20);
+	if(used >= rl.rlim_cur)
+		return 0;
+
+	// If there's not at least 16 MB left, we're probably
+	// not going to be able to do much.  Treat as no limit.
+	rl.rlim_cur -= used;
+	if(rl.rlim_cur < (16<<20))
+		return 0;
+
+	return rl.rlim_cur - used;
+}
diff --git a/src/pkg/runtime/thread_linux.c b/src/pkg/runtime/thread_linux.c
index 005fb1d..d406a71 100644
--- a/src/pkg/runtime/thread_linux.c
+++ b/src/pkg/runtime/thread_linux.c
@@ -221,3 +221,31 @@
 	}
 	runtime·panicstring(runtime·sigtab[g->sig].name);
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	Rlimit rl;
+	extern byte text[], end[];
+	uintptr used;
+	
+	if(runtime·getrlimit(RLIMIT_AS, &rl) != 0)
+		return 0;
+	if(rl.rlim_cur >= 0x7fffffff)
+		return 0;
+
+	// Estimate our VM footprint excluding the heap.
+	// Not an exact science: use size of binary plus
+	// some room for thread stacks.
+	used = end - text + (64<<20);
+	if(used >= rl.rlim_cur)
+		return 0;
+
+	// If there's not at least 16 MB left, we're probably
+	// not going to be able to do much.  Treat as no limit.
+	rl.rlim_cur -= used;
+	if(rl.rlim_cur < (16<<20))
+		return 0;
+
+	return rl.rlim_cur - used;
+}
diff --git a/src/pkg/runtime/thread_netbsd.c b/src/pkg/runtime/thread_netbsd.c
index cba7ade..7d14e5c 100644
--- a/src/pkg/runtime/thread_netbsd.c
+++ b/src/pkg/runtime/thread_netbsd.c
@@ -201,3 +201,9 @@
 	}
 	runtime·panicstring(runtime·sigtab[g->sig].name);
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
diff --git a/src/pkg/runtime/thread_openbsd.c b/src/pkg/runtime/thread_openbsd.c
index efe03e3..704d95a 100644
--- a/src/pkg/runtime/thread_openbsd.c
+++ b/src/pkg/runtime/thread_openbsd.c
@@ -201,3 +201,9 @@
 	}
 	runtime·panicstring(runtime·sigtab[g->sig].name);
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
diff --git a/src/pkg/runtime/thread_plan9.c b/src/pkg/runtime/thread_plan9.c
index 1180fc8..7d5c38f 100644
--- a/src/pkg/runtime/thread_plan9.c
+++ b/src/pkg/runtime/thread_plan9.c
@@ -235,3 +235,9 @@
 {
 	return runtime·pwrite(fd, buf, nbytes, -1LL);
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}
diff --git a/src/pkg/runtime/thread_windows.c b/src/pkg/runtime/thread_windows.c
index fb3f39d..8feac97 100644
--- a/src/pkg/runtime/thread_windows.c
+++ b/src/pkg/runtime/thread_windows.c
@@ -425,3 +425,9 @@
 {
 	runtime·throw("too many writes on closed pipe");
 }
+
+uintptr
+runtime·memlimit(void)
+{
+	return 0;
+}