runtime: instrument malloc + garbage collector.
add simple garbage collection benchmark.

R=iant
CC=golang-dev
https://golang.org/cl/204053
diff --git a/src/pkg/runtime/darwin/386/sys.s b/src/pkg/runtime/darwin/386/sys.s
index 326cc23..79628a4 100644
--- a/src/pkg/runtime/darwin/386/sys.s
+++ b/src/pkg/runtime/darwin/386/sys.s
@@ -42,6 +42,22 @@
 	CALL	notok(SB)
 	RET
 
+// void gettime(int64 *sec, int32 *usec)
+TEXT gettime(SB), 7, $32
+	LEAL	12(SP), AX	// must be non-nil, unused
+	MOVL	AX, 4(SP)
+	MOVL	$0, 8(SP)	// time zone pointer
+	MOVL	$116, AX
+	INT	$0x80
+
+	MOVL	sec+0(FP), DI
+	MOVL	AX, (DI)
+	MOVL	$0, 4(DI)	// zero extend 32 -> 64
+
+	MOVL	usec+4(FP), DI
+	MOVL	DX, (DI)
+	RET
+
 TEXT sigaction(SB),7,$0
 	MOVL	$46, AX
 	INT	$0x80
diff --git a/src/pkg/runtime/darwin/amd64/sys.s b/src/pkg/runtime/darwin/amd64/sys.s
index 223790a..50b50d5 100644
--- a/src/pkg/runtime/darwin/amd64/sys.s
+++ b/src/pkg/runtime/darwin/amd64/sys.s
@@ -37,6 +37,18 @@
 	CALL	notok(SB)
 	RET
 
+// void gettime(int64 *sec, int32 *usec)
+TEXT gettime(SB), 7, $32
+	MOVQ	SP, DI	// must be non-nil, unused
+	MOVQ	$0, SI
+	MOVQ	$(0x2000000+116), AX
+	SYSCALL
+	MOVQ	sec+0(FP), DI
+	MOVQ	AX, (DI)
+	MOVQ	usec+8(FP), DI
+	MOVL	DX, (DI)
+	RET
+
 TEXT	sigaction(SB),7,$0
 	MOVL	8(SP), DI		// arg 1 sig
 	MOVQ	16(SP), SI		// arg 2 act
@@ -226,4 +238,3 @@
 	MOVL	$(0x1000000+34), AX	// semaphore_signal_all_trap
 	SYSCALL
 	RET
-
diff --git a/src/pkg/runtime/extern.go b/src/pkg/runtime/extern.go
index a397c3b..0834f78 100644
--- a/src/pkg/runtime/extern.go
+++ b/src/pkg/runtime/extern.go
@@ -73,13 +73,22 @@
 
 type MemStatsType struct {
 	Alloc      uint64
+	TotalAlloc uint64
 	Sys        uint64
 	Stacks     uint64
 	InusePages uint64
 	NextGC     uint64
 	Lookups    uint64
 	Mallocs    uint64
+	PauseNs    uint64
+	NumGC      uint32
 	EnableGC   bool
+	DebugGC    bool
+	BySize     [67]struct {
+		Size    uint32
+		Mallocs uint64
+		Frees   uint64
+	}
 }
 
 // MemStats holds statistics about the memory system.
diff --git a/src/pkg/runtime/freebsd/386/sys.s b/src/pkg/runtime/freebsd/386/sys.s
index d0afeae..a0860db 100644
--- a/src/pkg/runtime/freebsd/386/sys.s
+++ b/src/pkg/runtime/freebsd/386/sys.s
@@ -73,6 +73,23 @@
 	CALL	notok(SB)
 	RET
 
+TEXT	gettime(SB), 7, $32
+	MOVL	$116, AX
+	LEAL	12(SP), BX
+	MOVL	BX, 4(SP)
+	MOVL	$0, 8(SP)
+	INT	$0x80
+
+	MOVL	12(SP), BX	// sec
+	MOVL	sec+0(FP), DI
+	MOVL	BX, (DI)
+	MOVL	$0, 4(DI)	// zero extend 32 -> 64 bits
+
+	MOVL	16(SP), BX	// usec
+	MOVL	usec+4(FP), DI
+	MOVL	BX, (DI)
+	RET
+
 TEXT sigaction(SB),7,$-4
 	MOVL	$416, AX
 	INT	$0x80
diff --git a/src/pkg/runtime/freebsd/amd64/sys.s b/src/pkg/runtime/freebsd/amd64/sys.s
index 53773b9..02c3e91 100644
--- a/src/pkg/runtime/freebsd/amd64/sys.s
+++ b/src/pkg/runtime/freebsd/amd64/sys.s
@@ -58,6 +58,21 @@
 	CALL	notok(SB)
 	RET
 
+TEXT gettime(SB), 7, $32
+	MOVL	$116, AX
+	LEAQ	8(SP), DI
+	SYSCALL
+
+	MOVQ	8(SP), BX	// sec
+	MOVQ	sec+0(FP), DI
+	MOVQ	BX, (DI)
+
+	MOVL	16(SP), BX	// usec
+	MOVQ	usec+8(FP), DI
+	MOVL	BX, (DI)
+	RET
+
+
 TEXT	sigaction(SB),7,$-8
 	MOVL	8(SP), DI		// arg 1 sig
 	MOVQ	16(SP), SI		// arg 2 act
diff --git a/src/pkg/runtime/linux/386/sys.s b/src/pkg/runtime/linux/386/sys.s
index 7f644cb..ed7c155 100644
--- a/src/pkg/runtime/linux/386/sys.s
+++ b/src/pkg/runtime/linux/386/sys.s
@@ -30,6 +30,23 @@
 	INT	$0x80
 	RET
 
+TEXT	gettime(SB), 7, $32
+	MOVL	$78, AX			// syscall - gettimeofday
+	LEAL	8(SP), BX
+	MOVL	$0, CX
+	MOVL	$0, DX
+	INT	$0x80
+
+	MOVL	8(SP), BX	// sec
+	MOVL	sec+0(FP), DI
+	MOVL	BX, (DI)
+	MOVL	$0, 4(DI)	// zero extend 32 -> 64 bits
+
+	MOVL	12(SP), BX	// usec
+	MOVL	usec+4(FP), DI
+	MOVL	BX, (DI)
+	RET
+
 TEXT rt_sigaction(SB),7,$0
 	MOVL	$174, AX		// syscall - rt_sigaction
 	MOVL	4(SP), BX
diff --git a/src/pkg/runtime/linux/amd64/sys.s b/src/pkg/runtime/linux/amd64/sys.s
index 8e0905e..18bf5b5 100644
--- a/src/pkg/runtime/linux/amd64/sys.s
+++ b/src/pkg/runtime/linux/amd64/sys.s
@@ -44,6 +44,21 @@
 	SYSCALL
 	RET
 
+TEXT gettime(SB), 7, $32
+	LEAQ	8(SP), DI
+	MOVQ	$0, SI
+	MOVQ	$0xffffffffff600000, AX
+	CALL	AX
+
+	MOVQ	8(SP), BX	// sec
+	MOVQ	sec+0(FP), DI
+	MOVQ	BX, (DI)
+
+	MOVL	16(SP), BX	// usec
+	MOVQ	usec+8(FP), DI
+	MOVL	BX, (DI)
+	RET
+
 TEXT	rt_sigaction(SB),7,$0-32
 	MOVL	8(SP), DI
 	MOVQ	16(SP), SI
diff --git a/src/pkg/runtime/malloc.cgo b/src/pkg/runtime/malloc.cgo
index d7e3e41..286aa2b 100644
--- a/src/pkg/runtime/malloc.cgo
+++ b/src/pkg/runtime/malloc.cgo
@@ -46,6 +46,8 @@
 		if(v == nil)
 			throw("out of memory");
 		mstats.alloc += size;
+		mstats.total_alloc += size;
+		mstats.by_size[sizeclass].nmalloc++;
 	} else {
 		// TODO(rsc): Report tracebacks for very large allocations.
 
@@ -57,6 +59,7 @@
 		if(s == nil)
 			throw("out of memory");
 		mstats.alloc += npages<<PageShift;
+		mstats.total_alloc += npages<<PageShift;
 		v = (void*)(s->start << PageShift);
 	}
 
@@ -127,6 +130,7 @@
 	size = class_to_size[sizeclass];
 	runtime_memclr(v, size);
 	mstats.alloc -= size;
+	mstats.by_size[sizeclass].nfree++;
 	MCache_Free(c, v, sizeclass, size);
 
 out:
diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h
index 133ed02..05f500a 100644
--- a/src/pkg/runtime/malloc.h
+++ b/src/pkg/runtime/malloc.h
@@ -156,17 +156,26 @@
 
 
 // Statistics.
-// Shared with Go: if you edit this structure, also edit ../malloc/malloc.go.
+// Shared with Go: if you edit this structure, also edit extern.go.
 struct MStats
 {
 	uint64	alloc;
+	uint64	total_alloc;
 	uint64	sys;
 	uint64	stacks;
 	uint64	inuse_pages;	// protected by mheap.Lock
 	uint64	next_gc;	// protected by mheap.Lock
 	uint64	nlookup;	// unprotected (approximate)
 	uint64	nmalloc;	// unprotected (approximate)
+	uint64	pause_ns;
+	uint32	numgc;
 	bool	enablegc;
+	bool	debuggc;
+	struct {
+		uint32 size;
+		uint64 nmalloc;
+		uint64 nfree;
+	} by_size[NumSizeClasses];
 };
 
 #define mstats ·MemStats	/* name shared with Go */
diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c
index d8a943e..83d2173 100644
--- a/src/pkg/runtime/mgc0.c
+++ b/src/pkg/runtime/mgc0.c
@@ -240,6 +240,7 @@
 void
 gc(int32 force)
 {
+	int64 t0, t1;
 	byte *p;
 	void **fp;
 
@@ -268,6 +269,7 @@
 
 //printf("gc...\n");
 	semacquire(&gcsema);
+	t0 = nanotime();
 	m->gcing = 1;
 	stoptheworld();
 	if(mheap.Lock.key != 0)
@@ -289,6 +291,11 @@
 	pfinq = finq;
 	m->locks--;
 
+	t1 = nanotime();
+	mstats.numgc++;
+	mstats.pause_ns += t1 - t0;
+	if(mstats.debuggc)
+		printf("pause %D\n", t1-t0);
 	semrelease(&gcsema);
 	starttheworld();
 }
diff --git a/src/pkg/runtime/msize.c b/src/pkg/runtime/msize.c
index 25e2263..aebc154 100644
--- a/src/pkg/runtime/msize.c
+++ b/src/pkg/runtime/msize.c
@@ -134,6 +134,10 @@
 		}
 	}
 
+	// Copy out for statistics table.
+	for(i=0; i<nelem(class_to_size); i++)
+		mstats.by_size[i].size = class_to_size[i];
+
 	// Initialize the class_to_transfercount table.
 	for(sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
 		n = 64*1024 / class_to_size[sizeclass];
diff --git a/src/pkg/runtime/runtime.c b/src/pkg/runtime/runtime.c
index 2d840aa..3a94c8b 100644
--- a/src/pkg/runtime/runtime.c
+++ b/src/pkg/runtime/runtime.c
@@ -461,3 +461,14 @@
 	USED(v);
 }
 
+int64
+nanotime(void)
+{
+	int64 sec;
+	int32 usec;
+	
+	sec = 0;
+	usec = 0;
+	gettime(&sec, &usec);
+	return sec*1000000000 + (int64)usec*1000;
+}
diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h
index 2d6d42e..a526c04 100644
--- a/src/pkg/runtime/runtime.h
+++ b/src/pkg/runtime/runtime.h
@@ -393,6 +393,8 @@
 void	·newproc(int32, byte*, byte*);
 void	siginit(void);
 bool	sigsend(int32 sig);
+void	gettime(int64*, int32*);
+int64	nanotime(void);
 
 #pragma	varargck	argpos	printf	1