http2: reduce allocations of (*clientConnReadLoop).handleReponse

Reduce allocation by using 1 capacity slices out of a single
pre-allocated slice for header values, similarly to how it is done
in textproto.(*Reader).ReadMIMEHeader.

  name                                   old time/op    new time/op    delta
  ClientResponseHeaders/___0_Headers-16    82.3µs ± 7%    76.4µs ± 4%   -7.18%  (p=0.000 n=10+10)
  ClientResponseHeaders/__10_Headers-16     101µs ± 2%      99µs ± 3%   -2.00%  (p=0.016 n=8+10)
  ClientResponseHeaders/_100_Headers-16     213µs ± 2%     202µs ± 4%   -4.96%  (p=0.000 n=9+9)
  ClientResponseHeaders/1000_Headers-16    2.28ms ± 1%    2.15ms ± 2%   -5.58%  (p=0.000 n=8+10)

  name                                   old alloc/op   new alloc/op   delta
  ClientResponseHeaders/___0_Headers-16    4.60kB ± 0%    4.60kB ± 0%     ~     (p=0.201 n=10+10)
  ClientResponseHeaders/__10_Headers-16    9.01kB ± 0%    8.66kB ± 0%   -3.96%  (p=0.000 n=10+10)
  ClientResponseHeaders/_100_Headers-16    54.4kB ± 0%    48.4kB ± 0%  -11.01%  (p=0.000 n=10+10)
  ClientResponseHeaders/1000_Headers-16     702kB ± 0%     595kB ± 0%  -15.28%  (p=0.000 n=10+9)

  name                                   old allocs/op  new allocs/op  delta
  ClientResponseHeaders/___0_Headers-16      57.0 ± 0%      56.0 ± 0%   -1.75%  (p=0.000 n=10+10)
  ClientResponseHeaders/__10_Headers-16       135 ± 0%       123 ± 0%   -8.89%  (p=0.000 n=10+10)
  ClientResponseHeaders/_100_Headers-16       786 ± 0%       679 ± 0%  -13.61%  (p=0.000 n=10+10)
  ClientResponseHeaders/1000_Headers-16     8.14k ± 0%     7.11k ± 0%  -12.65%  (p=0.000 n=10+10)

Fixes golang/go#37853

Change-Id: I0bc6d879293a202a2742a06aca0b6dacfae7fc5f
Reviewed-on: https://go-review.googlesource.com/c/net/+/223783
Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Emmanuel Odeke <emm.odeke@gmail.com>
diff --git a/http2/transport.go b/http2/transport.go
index 81778be..e4fb025 100644
--- a/http2/transport.go
+++ b/http2/transport.go
@@ -1892,7 +1892,9 @@
 		return nil, errors.New("malformed response from server: malformed non-numeric status pseudo header")
 	}
 
-	header := make(http.Header)
+	regularFields := f.RegularFields()
+	strs := make([]string, len(regularFields))
+	header := make(http.Header, len(regularFields))
 	res := &http.Response{
 		Proto:      "HTTP/2.0",
 		ProtoMajor: 2,
@@ -1900,7 +1902,7 @@
 		StatusCode: statusCode,
 		Status:     status + " " + http.StatusText(statusCode),
 	}
-	for _, hf := range f.RegularFields() {
+	for _, hf := range regularFields {
 		key := http.CanonicalHeaderKey(hf.Name)
 		if key == "Trailer" {
 			t := res.Trailer
@@ -1912,7 +1914,18 @@
 				t[http.CanonicalHeaderKey(v)] = nil
 			})
 		} else {
-			header[key] = append(header[key], hf.Value)
+			vv := header[key]
+			if vv == nil && len(strs) > 0 {
+				// More than likely this will be a single-element key.
+				// Most headers aren't multi-valued.
+				// Set the capacity on strs[0] to 1, so any future append
+				// won't extend the slice into the other strings.
+				vv, strs = strs[:1:1], strs[1:]
+				vv[0] = hf.Value
+				header[key] = vv
+			} else {
+				header[key] = append(vv, hf.Value)
+			}
 		}
 	}
 
diff --git a/http2/transport_test.go b/http2/transport_test.go
index 64fe1ad..1424f81 100644
--- a/http2/transport_test.go
+++ b/http2/transport_test.go
@@ -3927,11 +3927,15 @@
 	ct.run()
 }
 
-func benchSimpleRoundTrip(b *testing.B, nHeaders int) {
+func benchSimpleRoundTrip(b *testing.B, nReqHeaders, nResHeader int) {
 	defer disableGoroutineTracking()()
 	b.ReportAllocs()
 	st := newServerTester(b,
 		func(w http.ResponseWriter, r *http.Request) {
+			for i := 0; i < nResHeader; i++ {
+				name := fmt.Sprint("A-", i)
+				w.Header().Set(name, "*")
+			}
 		},
 		optOnlyServer,
 		optQuiet,
@@ -3946,7 +3950,7 @@
 		b.Fatal(err)
 	}
 
-	for i := 0; i < nHeaders; i++ {
+	for i := 0; i < nReqHeaders; i++ {
 		name := fmt.Sprint("A-", i)
 		req.Header.Set(name, "*")
 	}
@@ -4037,10 +4041,17 @@
 }
 
 func BenchmarkClientRequestHeaders(b *testing.B) {
-	b.Run("   0 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 0) })
-	b.Run("  10 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 10) })
-	b.Run(" 100 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 100) })
-	b.Run("1000 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 1000) })
+	b.Run("   0 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 0, 0) })
+	b.Run("  10 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 10, 0) })
+	b.Run(" 100 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 100, 0) })
+	b.Run("1000 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 1000, 0) })
+}
+
+func BenchmarkClientResponseHeaders(b *testing.B) {
+	b.Run("   0 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 0, 0) })
+	b.Run("  10 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 0, 10) })
+	b.Run(" 100 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 0, 100) })
+	b.Run("1000 Headers", func(b *testing.B) { benchSimpleRoundTrip(b, 0, 1000) })
 }
 
 func activeStreams(cc *ClientConn) int {