runtime: faster allocator, garbage collector

GC is still single-threaded.
Multiple threads will happen in another CL.

Garbage collection pauses are typically
about half as long as they were before this CL.

R=brainman, iant, r
CC=golang-dev
https://golang.org/cl/3975046
diff --git a/src/pkg/runtime/mcentral.c b/src/pkg/runtime/mcentral.c
index f1ad119..29b03b5 100644
--- a/src/pkg/runtime/mcentral.c
+++ b/src/pkg/runtime/mcentral.c
@@ -113,8 +113,7 @@
 MCentral_Free(MCentral *c, void *v)
 {
 	MSpan *s;
-	PageID page;
-	MLink *p, *next;
+	MLink *p;
 	int32 size;
 
 	// Find span for v.
@@ -138,16 +137,8 @@
 	if(--s->ref == 0) {
 		size = runtime·class_to_size[c->sizeclass];
 		runtime·MSpanList_Remove(s);
-		// The second word of each freed block indicates
-		// whether it needs to be zeroed.  The first word
-		// is the link pointer and must always be cleared.
-		for(p=s->freelist; p; p=next) {
-			next = p->next;
-			if(size > sizeof(uintptr) && ((uintptr*)p)[1] != 0)
-				runtime·memclr((byte*)p, size);
-			else
-				p->next = nil;
-		}
+		runtime·unmarkspan((byte*)(s->start<<PageShift), s->npages<<PageShift);
+		*(uintptr*)(s->start<<PageShift) = 1;  // needs zeroing
 		s->freelist = nil;
 		c->nfree -= (s->npages << PageShift) / size;
 		runtime·unlock(c);
@@ -157,7 +148,7 @@
 }
 
 void
-runtime·MGetSizeClassInfo(int32 sizeclass, int32 *sizep, int32 *npagesp, int32 *nobj)
+runtime·MGetSizeClassInfo(int32 sizeclass, uintptr *sizep, int32 *npagesp, int32 *nobj)
 {
 	int32 size;
 	int32 npages;
@@ -166,7 +157,7 @@
 	size = runtime·class_to_size[sizeclass];
 	*npagesp = npages;
 	*sizep = size;
-	*nobj = (npages << PageShift) / (size + RefcountOverhead);
+	*nobj = (npages << PageShift) / size;
 }
 
 // Fetch a new span from the heap and
@@ -174,7 +165,8 @@
 static bool
 MCentral_Grow(MCentral *c)
 {
-	int32 i, n, npages, size;
+	int32 i, n, npages;
+	uintptr size;
 	MLink **tailp, *v;
 	byte *p;
 	MSpan *s;
@@ -191,7 +183,7 @@
 	// Carve span into sequence of blocks.
 	tailp = &s->freelist;
 	p = (byte*)(s->start << PageShift);
-	s->gcref = (uint32*)(p + size*n);
+	s->limit = p + size*n;
 	for(i=0; i<n; i++) {
 		v = (MLink*)p;
 		*tailp = v;
@@ -199,6 +191,7 @@
 		p += size;
 	}
 	*tailp = nil;
+	runtime·markspan((byte*)(s->start<<PageShift), size, n, size*n < (s->npages<<PageShift));
 
 	runtime·lock(c);
 	c->nfree += n;