runtime: drop chan circular linked list in favor of circular buffer

The list elements are already being allocated out of a
single memory buffer.  We can drop the Link* pointer
following and the memory it requires, replacing it with
index operations.

The change also keeps a channel from containing a pointer
back into its own allocation block, which would create a
cycle.  Blocks involved in cycles are not guaranteed to be
finalized properly, and channels depend on finalizers to
free OS-level locks on some systems.  The self-reference
was keeping channels from being garbage collected.

runtime-gdb.py will need to be updated in order to dump
the content of buffered channels with the new data structure.

Fixes #1676.

R=ken2, r
CC=golang-dev
https://golang.org/cl/4411045
diff --git a/src/cmd/ld/dwarf.c b/src/cmd/ld/dwarf.c
index d0b6407..fa55fcb 100644
--- a/src/cmd/ld/dwarf.c
+++ b/src/cmd/ld/dwarf.c
@@ -1377,7 +1377,7 @@
 synthesizechantypes(DWDie *die)
 {
 	DWDie *sudog, *waitq, *link, *hchan,
-		*dws, *dww, *dwl, *dwh, *elemtype;
+		*dws, *dww, *dwh, *elemtype;
 	DWAttr *a;
 	int elemsize, linksize, sudogsize;
 
@@ -1416,21 +1416,10 @@
 		newattr(dww, DW_AT_byte_size, DW_CLS_CONSTANT,
 			getattr(waitq, DW_AT_byte_size)->value, NULL);
 
-		// link<T>
-		dwl = newdie(&dwtypes, DW_ABRV_STRUCTTYPE,
-			mkinternaltypename("link", getattr(elemtype, DW_AT_name)->data, NULL));
-		copychildren(dwl, link);
-		substitutetype(dwl, "link", defptrto(dwl));
-		substitutetype(dwl, "elem", elemtype);
-		newattr(dwl, DW_AT_byte_size, DW_CLS_CONSTANT,
-			linksize + (elemsize > 8 ? elemsize - 8 : 0), NULL);
-
 		// hchan<T>
 		dwh = newdie(&dwtypes, DW_ABRV_STRUCTTYPE,
 			mkinternaltypename("hchan", getattr(elemtype, DW_AT_name)->data, NULL));
 		copychildren(dwh, hchan);
-		substitutetype(dwh, "senddataq", defptrto(dwl));
-		substitutetype(dwh, "recvdataq", defptrto(dwl));
 		substitutetype(dwh, "recvq", dww);
 		substitutetype(dwh, "sendq", dww);
 		substitutetype(dwh, "free", dws);
diff --git a/src/pkg/runtime/chan.c b/src/pkg/runtime/chan.c
index abb1b69..8c45b07 100644
--- a/src/pkg/runtime/chan.c
+++ b/src/pkg/runtime/chan.c
@@ -39,14 +39,18 @@
 	bool	closed;
 	uint8	elemalign;
 	Alg*	elemalg;		// interface for element type
-	Link*	senddataq;		// pointer for sender
-	Link*	recvdataq;		// pointer for receiver
+	uint32	sendx;	// send index
+	uint32	recvx;	// receive index
 	WaitQ	recvq;			// list of recv waiters
 	WaitQ	sendq;			// list of send waiters
 	SudoG*	free;			// freelist
 	Lock;
 };
 
+// Buffer follows Hchan immediately in memory.
+// chanbuf(c, i) is pointer to the i'th slot in the buffer.
+#define chanbuf(c, i) ((byte*)((c)+1)+(uintptr)(c)->elemsize*(i))
+
 struct	Link
 {
 	Link*	link;			// asynch queue circular linked list
@@ -97,8 +101,7 @@
 runtime·makechan_c(Type *elem, int64 hint)
 {
 	Hchan *c;
-	int32 i, m, n;
-	Link *d, *b, *e;
+	int32 n;
 	byte *by;
 
 	if(hint < 0 || (int32)hint != hint || hint > ((uintptr)-1) / elem->size)
@@ -109,16 +112,13 @@
 		runtime·throw("runtime.makechan: unsupported elem type");
 	}
 
-	// calculate rounded sizes of Hchan and Link
+	// calculate rounded size of Hchan
 	n = sizeof(*c);
 	while(n & MAXALIGN)
 		n++;
-	m = sizeof(*d) + elem->size - sizeof(d->elem);
-	while(m & MAXALIGN)
-		m++;
 
 	// allocate memory in one call
-	by = runtime·mal(n + hint*m);
+	by = runtime·mal(n + hint*elem->size);
 
 	c = (Hchan*)by;
 	by += n;
@@ -127,26 +127,7 @@
 	c->elemsize = elem->size;
 	c->elemalg = &runtime·algarray[elem->alg];
 	c->elemalign = elem->align;
-
-	if(hint > 0) {
-
-		// make a circular q
-		b = nil;
-		e = nil;
-		for(i=0; i<hint; i++) {
-			d = (Link*)by;
-			by += m;
-			if(e == nil)
-				e = d;
-			d->link = b;
-			b = d;
-		}
-		e->link = b;
-		c->recvdataq = b;
-		c->senddataq = b;
-		c->qcount = 0;
-		c->dataqsiz = hint;
-	}
+	c->dataqsiz = hint;
 
 	if(debug)
 		runtime·printf("makechan: chan=%p; elemsize=%D; elemalg=%d; elemalign=%d; dataqsiz=%d\n",
@@ -268,8 +249,9 @@
 		goto asynch;
 	}
 	if(ep != nil)
-		c->elemalg->copy(c->elemsize, c->senddataq->elem, ep);
-	c->senddataq = c->senddataq->link;
+		c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), ep);
+	if(++c->sendx == c->dataqsiz)
+		c->sendx = 0;
 	c->qcount++;
 
 	sg = dequeue(&c->recvq, c);
@@ -380,9 +362,10 @@
 		goto asynch;
 	}
 	if(ep != nil)
-		c->elemalg->copy(c->elemsize, ep, c->recvdataq->elem);
-	c->elemalg->copy(c->elemsize, c->recvdataq->elem, nil);
-	c->recvdataq = c->recvdataq->link;
+		c->elemalg->copy(c->elemsize, ep, chanbuf(c, c->recvx));
+	c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil);
+	if(++c->recvx == c->dataqsiz)
+		c->recvx = 0;
 	c->qcount--;
 	sg = dequeue(&c->sendq, c);
 	if(sg != nil) {
@@ -940,9 +923,10 @@
 	if(cas->u.recv.receivedp != nil)
 		*cas->u.recv.receivedp = true;
 	if(cas->u.recv.elemp != nil)
-		c->elemalg->copy(c->elemsize, cas->u.recv.elemp, c->recvdataq->elem);
-	c->elemalg->copy(c->elemsize, c->recvdataq->elem, nil);
-	c->recvdataq = c->recvdataq->link;
+		c->elemalg->copy(c->elemsize, cas->u.recv.elemp, chanbuf(c, c->recvx));
+	c->elemalg->copy(c->elemsize, chanbuf(c, c->recvx), nil);
+	if(++c->recvx == c->dataqsiz)
+		c->recvx = 0;
 	c->qcount--;
 	sg = dequeue(&c->sendq, c);
 	if(sg != nil) {
@@ -955,8 +939,9 @@
 asyncsend:
 	// can send to buffer
 	if(cas->u.elem != nil)
-		c->elemalg->copy(c->elemsize, c->senddataq->elem, cas->u.elem);
-	c->senddataq = c->senddataq->link;
+		c->elemalg->copy(c->elemsize, chanbuf(c, c->sendx), cas->u.elem);
+	if(++c->sendx == c->dataqsiz)
+		c->sendx = 0;
 	c->qcount++;
 	sg = dequeue(&c->recvq, c);
 	if(sg != nil) {
diff --git a/test/gc2.go b/test/gc2.go
new file mode 100644
index 0000000..c5c6cbe
--- /dev/null
+++ b/test/gc2.go
@@ -0,0 +1,41 @@
+// $G $D/$F.go && $L $F.$A && ./$A.out
+
+// Copyright 2011 The Go Authors.  All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Check that buffered channels are garbage collected properly.
+// An interesting case because they have finalizers and used to
+// have self loops that kept them from being collected.
+// (Cyclic data with finalizers is never finalized, nor collected.)
+
+package main
+
+import (
+	"fmt"
+	"os"
+	"runtime"
+)
+
+func main() {
+	const N = 10000
+	st := runtime.MemStats
+	for i := 0; i < N; i++ {
+		c := make(chan int, 10)
+		_ = c
+		if i%100 == 0 {
+			for j := 0; j < 4; j++ {
+				runtime.GC()
+				runtime.Gosched()
+				runtime.GC()
+				runtime.Gosched()
+			}
+		}
+	}
+	
+	obj := runtime.MemStats.HeapObjects - st.HeapObjects
+	if obj > N/5 {
+		fmt.Println("too many objects left:", obj)
+		os.Exit(1)
+	}
+}