http2: replace fixedBuffer with dataBuffer

fixedBuffer was a bad idea for two reasons:

1. It was fixed at a constant 64KB (the current default flow-control
   window) which wastes memory on the server when clients upload many
   small request bodies.

2. A follow-up CL will allow configuring the server's connection and
   stream receive windows. We want to allow individual streams to use
   varying amounts of the available connection window. This is not
   possible when each stream uses a fixedBuffer.

dataBuffer grows and shrinks based on current usage. The worst-case
fragmentation of dataBuffer is 32KB wasted memory per stream, but I
expect that worst-case will be rare. In particular, if the declared
size of a stream's request body is under 1KB, then the server will not
allocate more than 1KB to process that stream's request body.

Updates golang/go#16512
Fixes golang/go#18509

Change-Id: Ibcb18007037e82518a65848ef3baf4937955ac9d
Reviewed-on: https://go-review.googlesource.com/37400
Run-TryBot: Tom Bergan <tombergan@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
diff --git a/http2/databuffer.go b/http2/databuffer.go
new file mode 100644
index 0000000..a3067f8
--- /dev/null
+++ b/http2/databuffer.go
@@ -0,0 +1,146 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http2
+
+import (
+	"errors"
+	"fmt"
+	"sync"
+)
+
+// Buffer chunks are allocated from a pool to reduce pressure on GC.
+// The maximum wasted space per dataBuffer is 2x the largest size class,
+// which happens when the dataBuffer has multiple chunks and there is
+// one unread byte in both the first and last chunks. We use a few size
+// classes to minimize overheads for servers that typically receive very
+// small request bodies.
+//
+// TODO: Benchmark to determine if the pools are necessary. The GC may have
+// improved enough that we can instead allocate chunks like this:
+// make([]byte, max(16<<10, expectedBytesRemaining))
+var (
+	dataChunkSizeClasses = []int{
+		1 << 10,
+		2 << 10,
+		4 << 10,
+		8 << 10,
+		16 << 10,
+	}
+	dataChunkPools = [...]sync.Pool{
+		{New: func() interface{} { return make([]byte, 1<<10) }},
+		{New: func() interface{} { return make([]byte, 2<<10) }},
+		{New: func() interface{} { return make([]byte, 4<<10) }},
+		{New: func() interface{} { return make([]byte, 8<<10) }},
+		{New: func() interface{} { return make([]byte, 16<<10) }},
+	}
+)
+
+func getDataBufferChunk(size int64) []byte {
+	i := 0
+	for ; i < len(dataChunkSizeClasses)-1; i++ {
+		if size <= int64(dataChunkSizeClasses[i]) {
+			break
+		}
+	}
+	return dataChunkPools[i].Get().([]byte)
+}
+
+func putDataBufferChunk(p []byte) {
+	for i, n := range dataChunkSizeClasses {
+		if len(p) == n {
+			dataChunkPools[i].Put(p)
+			return
+		}
+	}
+	panic(fmt.Sprintf("unexpected buffer len=%v", len(p)))
+}
+
+// dataBuffer is an io.ReadWriter backed by a list of data chunks.
+// Each dataBuffer is used to read DATA frames on a single stream.
+// The buffer is divided into chunks so the server can limit the
+// total memory used by a single connection without limiting the
+// request body size on any single stream.
+type dataBuffer struct {
+	chunks   [][]byte
+	r        int   // next byte to read is chunks[0][r]
+	w        int   // next byte to write is chunks[len(chunks)-1][w]
+	size     int   // total buffered bytes
+	expected int64 // we expect at least this many bytes in future Write calls (ignored if <= 0)
+}
+
+var errReadEmpty = errors.New("read from empty dataBuffer")
+
+// Read copies bytes from the buffer into p.
+// It is an error to read when no data is available.
+func (b *dataBuffer) Read(p []byte) (int, error) {
+	if b.size == 0 {
+		return 0, errReadEmpty
+	}
+	var ntotal int
+	for len(p) > 0 && b.size > 0 {
+		readFrom := b.bytesFromFirstChunk()
+		n := copy(p, readFrom)
+		p = p[n:]
+		ntotal += n
+		b.r += n
+		b.size -= n
+		// If the first chunk has been consumed, advance to the next chunk.
+		if b.r == len(b.chunks[0]) {
+			putDataBufferChunk(b.chunks[0])
+			end := len(b.chunks) - 1
+			copy(b.chunks[:end], b.chunks[1:])
+			b.chunks[end] = nil
+			b.chunks = b.chunks[:end]
+			b.r = 0
+		}
+	}
+	return ntotal, nil
+}
+
+func (b *dataBuffer) bytesFromFirstChunk() []byte {
+	if len(b.chunks) == 1 {
+		return b.chunks[0][b.r:b.w]
+	}
+	return b.chunks[0][b.r:]
+}
+
+// Len returns the number of bytes of the unread portion of the buffer.
+func (b *dataBuffer) Len() int {
+	return b.size
+}
+
+// Write appends p to the buffer.
+func (b *dataBuffer) Write(p []byte) (int, error) {
+	ntotal := len(p)
+	for len(p) > 0 {
+		// If the last chunk is empty, allocate a new chunk. Try to allocate
+		// enough to fully copy p plus any additional bytes we expect to
+		// receive. However, this may allocate less than len(p).
+		want := int64(len(p))
+		if b.expected > want {
+			want = b.expected
+		}
+		chunk := b.lastChunkOrAlloc(want)
+		n := copy(chunk[b.w:], p)
+		p = p[n:]
+		b.w += n
+		b.size += n
+		b.expected -= int64(n)
+	}
+	return ntotal, nil
+}
+
+func (b *dataBuffer) lastChunkOrAlloc(want int64) []byte {
+	if len(b.chunks) != 0 {
+		last := b.chunks[len(b.chunks)-1]
+		if b.w < len(last) {
+			return last
+		}
+	}
+	chunk := getDataBufferChunk(want)
+	b.chunks = append(b.chunks, chunk)
+	b.w = 0
+	return chunk
+}
diff --git a/http2/databuffer_test.go b/http2/databuffer_test.go
new file mode 100644
index 0000000..ca227b5
--- /dev/null
+++ b/http2/databuffer_test.go
@@ -0,0 +1,155 @@
+// Copyright 2017 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http2
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"testing"
+)
+
+func fmtDataChunk(chunk []byte) string {
+	out := ""
+	var last byte
+	var count int
+	for _, c := range chunk {
+		if c != last {
+			if count > 0 {
+				out += fmt.Sprintf(" x %d ", count)
+				count = 0
+			}
+			out += string([]byte{c})
+			last = c
+		}
+		count++
+	}
+	if count > 0 {
+		out += fmt.Sprintf(" x %d", count)
+	}
+	return out
+}
+
+func fmtDataChunks(chunks [][]byte) string {
+	var out string
+	for _, chunk := range chunks {
+		out += fmt.Sprintf("{%q}", fmtDataChunk(chunk))
+	}
+	return out
+}
+
+func testDataBuffer(t *testing.T, wantBytes []byte, setup func(t *testing.T) *dataBuffer) {
+	// Run setup, then read the remaining bytes from the dataBuffer and check
+	// that they match wantBytes. We use different read sizes to check corner
+	// cases in Read.
+	for _, readSize := range []int{1, 2, 1 * 1024, 32 * 1024} {
+		t.Run(fmt.Sprintf("ReadSize=%d", readSize), func(t *testing.T) {
+			b := setup(t)
+			buf := make([]byte, readSize)
+			var gotRead bytes.Buffer
+			for {
+				n, err := b.Read(buf)
+				gotRead.Write(buf[:n])
+				if err == errReadEmpty {
+					break
+				}
+				if err != nil {
+					t.Fatalf("error after %v bytes: %v", gotRead.Len(), err)
+				}
+			}
+			if got, want := gotRead.Bytes(), wantBytes; !bytes.Equal(got, want) {
+				t.Errorf("FinalRead=%q, want %q", fmtDataChunk(got), fmtDataChunk(want))
+			}
+		})
+	}
+}
+
+func TestDataBufferAllocation(t *testing.T) {
+	writes := [][]byte{
+		bytes.Repeat([]byte("a"), 1*1024-1),
+		[]byte{'a'},
+		bytes.Repeat([]byte("b"), 4*1024-1),
+		[]byte{'b'},
+		bytes.Repeat([]byte("c"), 8*1024-1),
+		[]byte{'c'},
+		bytes.Repeat([]byte("d"), 16*1024-1),
+		[]byte{'d'},
+		bytes.Repeat([]byte("e"), 32*1024),
+	}
+	var wantRead bytes.Buffer
+	for _, p := range writes {
+		wantRead.Write(p)
+	}
+
+	testDataBuffer(t, wantRead.Bytes(), func(t *testing.T) *dataBuffer {
+		b := &dataBuffer{}
+		for _, p := range writes {
+			if n, err := b.Write(p); n != len(p) || err != nil {
+				t.Fatalf("Write(%q x %d)=%v,%v want %v,nil", p[:1], len(p), n, err, len(p))
+			}
+		}
+		want := [][]byte{
+			bytes.Repeat([]byte("a"), 1*1024),
+			bytes.Repeat([]byte("b"), 4*1024),
+			bytes.Repeat([]byte("c"), 8*1024),
+			bytes.Repeat([]byte("d"), 16*1024),
+			bytes.Repeat([]byte("e"), 16*1024),
+			bytes.Repeat([]byte("e"), 16*1024),
+		}
+		if !reflect.DeepEqual(b.chunks, want) {
+			t.Errorf("dataBuffer.chunks\ngot:  %s\nwant: %s", fmtDataChunks(b.chunks), fmtDataChunks(want))
+		}
+		return b
+	})
+}
+
+func TestDataBufferAllocationWithExpected(t *testing.T) {
+	writes := [][]byte{
+		bytes.Repeat([]byte("a"), 1*1024), // allocates 16KB
+		bytes.Repeat([]byte("b"), 14*1024),
+		bytes.Repeat([]byte("c"), 15*1024), // allocates 16KB more
+		bytes.Repeat([]byte("d"), 2*1024),
+		bytes.Repeat([]byte("e"), 1*1024), // overflows 32KB expectation, allocates just 1KB
+	}
+	var wantRead bytes.Buffer
+	for _, p := range writes {
+		wantRead.Write(p)
+	}
+
+	testDataBuffer(t, wantRead.Bytes(), func(t *testing.T) *dataBuffer {
+		b := &dataBuffer{expected: 32 * 1024}
+		for _, p := range writes {
+			if n, err := b.Write(p); n != len(p) || err != nil {
+				t.Fatalf("Write(%q x %d)=%v,%v want %v,nil", p[:1], len(p), n, err, len(p))
+			}
+		}
+		want := [][]byte{
+			append(bytes.Repeat([]byte("a"), 1*1024), append(bytes.Repeat([]byte("b"), 14*1024), bytes.Repeat([]byte("c"), 1*1024)...)...),
+			append(bytes.Repeat([]byte("c"), 14*1024), bytes.Repeat([]byte("d"), 2*1024)...),
+			bytes.Repeat([]byte("e"), 1*1024),
+		}
+		if !reflect.DeepEqual(b.chunks, want) {
+			t.Errorf("dataBuffer.chunks\ngot:  %s\nwant: %s", fmtDataChunks(b.chunks), fmtDataChunks(want))
+		}
+		return b
+	})
+}
+
+func TestDataBufferWriteAfterPartialRead(t *testing.T) {
+	testDataBuffer(t, []byte("cdxyz"), func(t *testing.T) *dataBuffer {
+		b := &dataBuffer{}
+		if n, err := b.Write([]byte("abcd")); n != 4 || err != nil {
+			t.Fatalf("Write(\"abcd\")=%v,%v want 4,nil", n, err)
+		}
+		p := make([]byte, 2)
+		if n, err := b.Read(p); n != 2 || err != nil || !bytes.Equal(p, []byte("ab")) {
+			t.Fatalf("Read()=%q,%v,%v want \"ab\",2,nil", p, n, err)
+		}
+		if n, err := b.Write([]byte("xyz")); n != 3 || err != nil {
+			t.Fatalf("Write(\"xyz\")=%v,%v want 3,nil", n, err)
+		}
+		return b
+	})
+}
diff --git a/http2/fixed_buffer.go b/http2/fixed_buffer.go
deleted file mode 100644
index 47da0f0..0000000
--- a/http2/fixed_buffer.go
+++ /dev/null
@@ -1,60 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package http2
-
-import (
-	"errors"
-)
-
-// fixedBuffer is an io.ReadWriter backed by a fixed size buffer.
-// It never allocates, but moves old data as new data is written.
-type fixedBuffer struct {
-	buf  []byte
-	r, w int
-}
-
-var (
-	errReadEmpty = errors.New("read from empty fixedBuffer")
-	errWriteFull = errors.New("write on full fixedBuffer")
-)
-
-// Read copies bytes from the buffer into p.
-// It is an error to read when no data is available.
-func (b *fixedBuffer) Read(p []byte) (n int, err error) {
-	if b.r == b.w {
-		return 0, errReadEmpty
-	}
-	n = copy(p, b.buf[b.r:b.w])
-	b.r += n
-	if b.r == b.w {
-		b.r = 0
-		b.w = 0
-	}
-	return n, nil
-}
-
-// Len returns the number of bytes of the unread portion of the buffer.
-func (b *fixedBuffer) Len() int {
-	return b.w - b.r
-}
-
-// Write copies bytes from p into the buffer.
-// It is an error to write more data than the buffer can hold.
-func (b *fixedBuffer) Write(p []byte) (n int, err error) {
-	// Slide existing data to beginning.
-	if b.r > 0 && len(p) > len(b.buf)-b.w {
-		copy(b.buf, b.buf[b.r:b.w])
-		b.w -= b.r
-		b.r = 0
-	}
-
-	// Write new data.
-	n = copy(b.buf[b.w:], p)
-	b.w += n
-	if n < len(p) {
-		err = errWriteFull
-	}
-	return n, err
-}
diff --git a/http2/fixed_buffer_test.go b/http2/fixed_buffer_test.go
deleted file mode 100644
index f5432f8..0000000
--- a/http2/fixed_buffer_test.go
+++ /dev/null
@@ -1,128 +0,0 @@
-// Copyright 2014 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package http2
-
-import (
-	"reflect"
-	"testing"
-)
-
-var bufferReadTests = []struct {
-	buf      fixedBuffer
-	read, wn int
-	werr     error
-	wp       []byte
-	wbuf     fixedBuffer
-}{
-	{
-		fixedBuffer{[]byte{'a', 0}, 0, 1},
-		5, 1, nil, []byte{'a'},
-		fixedBuffer{[]byte{'a', 0}, 0, 0},
-	},
-	{
-		fixedBuffer{[]byte{0, 'a'}, 1, 2},
-		5, 1, nil, []byte{'a'},
-		fixedBuffer{[]byte{0, 'a'}, 0, 0},
-	},
-	{
-		fixedBuffer{[]byte{'a', 'b'}, 0, 2},
-		1, 1, nil, []byte{'a'},
-		fixedBuffer{[]byte{'a', 'b'}, 1, 2},
-	},
-	{
-		fixedBuffer{[]byte{}, 0, 0},
-		5, 0, errReadEmpty, []byte{},
-		fixedBuffer{[]byte{}, 0, 0},
-	},
-}
-
-func TestBufferRead(t *testing.T) {
-	for i, tt := range bufferReadTests {
-		read := make([]byte, tt.read)
-		n, err := tt.buf.Read(read)
-		if n != tt.wn {
-			t.Errorf("#%d: wn = %d want %d", i, n, tt.wn)
-			continue
-		}
-		if err != tt.werr {
-			t.Errorf("#%d: werr = %v want %v", i, err, tt.werr)
-			continue
-		}
-		read = read[:n]
-		if !reflect.DeepEqual(read, tt.wp) {
-			t.Errorf("#%d: read = %+v want %+v", i, read, tt.wp)
-		}
-		if !reflect.DeepEqual(tt.buf, tt.wbuf) {
-			t.Errorf("#%d: buf = %+v want %+v", i, tt.buf, tt.wbuf)
-		}
-	}
-}
-
-var bufferWriteTests = []struct {
-	buf       fixedBuffer
-	write, wn int
-	werr      error
-	wbuf      fixedBuffer
-}{
-	{
-		buf: fixedBuffer{
-			buf: []byte{},
-		},
-		wbuf: fixedBuffer{
-			buf: []byte{},
-		},
-	},
-	{
-		buf: fixedBuffer{
-			buf: []byte{1, 'a'},
-		},
-		write: 1,
-		wn:    1,
-		wbuf: fixedBuffer{
-			buf: []byte{0, 'a'},
-			w:   1,
-		},
-	},
-	{
-		buf: fixedBuffer{
-			buf: []byte{'a', 1},
-			r:   1,
-			w:   1,
-		},
-		write: 2,
-		wn:    2,
-		wbuf: fixedBuffer{
-			buf: []byte{0, 0},
-			w:   2,
-		},
-	},
-	{
-		buf: fixedBuffer{
-			buf: []byte{},
-		},
-		write: 5,
-		werr:  errWriteFull,
-		wbuf: fixedBuffer{
-			buf: []byte{},
-		},
-	},
-}
-
-func TestBufferWrite(t *testing.T) {
-	for i, tt := range bufferWriteTests {
-		n, err := tt.buf.Write(make([]byte, tt.write))
-		if n != tt.wn {
-			t.Errorf("#%d: wrote %d bytes; want %d", i, n, tt.wn)
-			continue
-		}
-		if err != tt.werr {
-			t.Errorf("#%d: error = %v; want %v", i, err, tt.werr)
-			continue
-		}
-		if !reflect.DeepEqual(tt.buf, tt.wbuf) {
-			t.Errorf("#%d: buf = %+v; want %+v", i, tt.buf, tt.wbuf)
-		}
-	}
-}
diff --git a/http2/server.go b/http2/server.go
index 3c641a8..7e523f8 100644
--- a/http2/server.go
+++ b/http2/server.go
@@ -463,10 +463,9 @@
 	numTrailerValues int64
 	weight           uint8
 	state            streamState
-	resetQueued      bool   // RST_STREAM queued for write; set by sc.resetStream
-	gotTrailerHeader bool   // HEADER frame for trailers was seen
-	wroteHeaders     bool   // whether we wrote headers (not status 100)
-	reqBuf           []byte // if non-nil, body pipe buffer to return later at EOF
+	resetQueued      bool // RST_STREAM queued for write; set by sc.resetStream
+	gotTrailerHeader bool // HEADER frame for trailers was seen
+	wroteHeaders     bool // whether we wrote headers (not status 100)
 
 	trailer    http.Header // accumulated trailers
 	reqTrailer http.Header // handler's Request.Trailer
@@ -1785,16 +1784,14 @@
 		return nil, nil, err
 	}
 	if bodyOpen {
-		st.reqBuf = getRequestBodyBuf()
-		req.Body.(*requestBody).pipe = &pipe{
-			b: &fixedBuffer{buf: st.reqBuf},
-		}
-
 		if vv, ok := rp.header["Content-Length"]; ok {
 			req.ContentLength, _ = strconv.ParseInt(vv[0], 10, 64)
 		} else {
 			req.ContentLength = -1
 		}
+		req.Body.(*requestBody).pipe = &pipe{
+			b: &dataBuffer{expected: req.ContentLength},
+		}
 	}
 	return rw, req, nil
 }
@@ -1890,24 +1887,6 @@
 	return rw, req, nil
 }
 
-var reqBodyCache = make(chan []byte, 8)
-
-func getRequestBodyBuf() []byte {
-	select {
-	case b := <-reqBodyCache:
-		return b
-	default:
-		return make([]byte, initialWindowSize)
-	}
-}
-
-func putRequestBodyBuf(b []byte) {
-	select {
-	case reqBodyCache <- b:
-	default:
-	}
-}
-
 // Run on its own goroutine.
 func (sc *serverConn) runHandler(rw *responseWriter, req *http.Request, handler func(http.ResponseWriter, *http.Request)) {
 	didPanic := true
@@ -2003,12 +1982,6 @@
 		case <-sc.doneServing:
 		}
 	}
-	if err == io.EOF {
-		if buf := st.reqBuf; buf != nil {
-			st.reqBuf = nil // shouldn't matter; field unused by other
-			putRequestBodyBuf(buf)
-		}
-	}
 }
 
 func (sc *serverConn) noteBodyRead(st *stream, n int) {
diff --git a/http2/transport.go b/http2/transport.go
index fef8396..84d042d 100644
--- a/http2/transport.go
+++ b/http2/transport.go
@@ -1528,8 +1528,7 @@
 		return res, nil
 	}
 
-	buf := new(bytes.Buffer) // TODO(bradfitz): recycle this garbage
-	cs.bufPipe = pipe{b: buf}
+	cs.bufPipe = pipe{b: &dataBuffer{expected: res.ContentLength}}
 	cs.bytesRemain = res.ContentLength
 	res.Body = transportResponseBody{cs}
 	go cs.awaitRequestCancel(cs.req)