http2: number of allocs reduced from 3852 to 31

buildRootHuffmanNode() was allocating a new node for every leaf of the huffman decoding tree.
This PR allocates an array of nodes for all the leaf nodes, and reuses them.
As buildRootHuffmanNode is only called via sync.Once, it's not significant performance wise, but still seems an unnecessary number of allocations.

benchmark           old ns/op     new ns/op     delta
BenchmarkRoot-4     162847        17911         -89.00%

benchmark           old allocs     new allocs     delta
BenchmarkRoot-4     3852           31             -99.20%

benchmark           old bytes     new bytes     delta
BenchmarkRoot-4     92112         35056         -61.94%

Change-Id: I7c1eae13fb6130090610eec1eb0347d5d1fef20c
GitHub-Last-Rev: f40a4e853001be76a8713cc6429ac203313313cb
GitHub-Pull-Request: golang/net#112
Reviewed-on: https://go-review.googlesource.com/c/net/+/346649
Reviewed-by: Damien Neil <dneil@google.com>
Trust: Damien Neil <dneil@google.com>
Trust: Alexander Rakoczy <alex@golang.org>
Run-TryBot: Damien Neil <dneil@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
diff --git a/http2/hpack/huffman.go b/http2/hpack/huffman.go
index a1ab2f0..fe0b84c 100644
--- a/http2/hpack/huffman.go
+++ b/http2/hpack/huffman.go
@@ -140,25 +140,29 @@
 		panic("unexpected size")
 	}
 	lazyRootHuffmanNode = newInternalNode()
-	for i, code := range huffmanCodes {
-		addDecoderNode(byte(i), code, huffmanCodeLen[i])
-	}
-}
+	// allocate a leaf node for each of the 256 symbols
+	leaves := new([256]node)
 
-func addDecoderNode(sym byte, code uint32, codeLen uint8) {
-	cur := lazyRootHuffmanNode
-	for codeLen > 8 {
-		codeLen -= 8
-		i := uint8(code >> codeLen)
-		if cur.children[i] == nil {
-			cur.children[i] = newInternalNode()
+	for sym, code := range huffmanCodes {
+		codeLen := huffmanCodeLen[sym]
+
+		cur := lazyRootHuffmanNode
+		for codeLen > 8 {
+			codeLen -= 8
+			i := uint8(code >> codeLen)
+			if cur.children[i] == nil {
+				cur.children[i] = newInternalNode()
+			}
+			cur = cur.children[i]
 		}
-		cur = cur.children[i]
-	}
-	shift := 8 - codeLen
-	start, end := int(uint8(code<<shift)), int(1<<shift)
-	for i := start; i < start+end; i++ {
-		cur.children[i] = &node{sym: sym, codeLen: codeLen}
+		shift := 8 - codeLen
+		start, end := int(uint8(code<<shift)), int(1<<shift)
+
+		leaves[sym].sym = byte(sym)
+		leaves[sym].codeLen = codeLen
+		for i := start; i < start+end; i++ {
+			cur.children[i] = &leaves[sym]
+		}
 	}
 }