http2: optimization of AppendHuffmanString
This uses a uint64 bit buffer so can append 4 bytes at a time, instead
of using append every byte.
GOAMD64=v1
name old time/op new time/op delta
AppendHuffmanString-4 316ns ± 3% 169ns ± 1% -46.60% (p=0.000 n=10+8)
GOAMD64=v3
name old time/op new time/op delta
AppendHuffmanString-4 263ns ± 4% 114ns ± 1% -56.68% (p=0.000 n=10+8)
Change-Id: Ib48a0e0711589dd1b311c377f61e30d7c18c2953
GitHub-Last-Rev: 0a3eb27feefe920ffd511f0c4f93dfad1cbe69ce
GitHub-Pull-Request: golang/net#131
Reviewed-on: https://go-review.googlesource.com/c/net/+/403434
Reviewed-by: Damien Neil <dneil@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
diff --git a/http2/hpack/hpack_test.go b/http2/hpack/hpack_test.go
index a361a2a..84f8b83 100644
--- a/http2/hpack/hpack_test.go
+++ b/http2/hpack/hpack_test.go
@@ -509,6 +509,25 @@
}
}
+func BenchmarkAppendHuffmanString(b *testing.B) {
+ b.StopTimer()
+ expected, err := hex.DecodeString(strings.Replace("94e7 821d d7f2 e6c7 b335 dfdf cd5b 3960 d5af 2708 7f36 72c1 ab27 0fb5 291f 9587 3160 65c0 03ed 4ee5 b106 3d50 07",
+ " ", "", -1))
+ if err != nil {
+ b.Fatal(err)
+ }
+ buf := make([]byte, 0, len(expected))
+ b.ReportAllocs()
+ b.StartTimer()
+
+ for i := 0; i < b.N; i++ {
+ enc := AppendHuffmanString(buf, "foo=ASDJKHQKBZXOQWEOPIUAXQWEOIU; max-age=3600; version=1")
+ if string(enc) != string(expected) {
+ b.Fatalf("bogus output %q", enc)
+ }
+ }
+}
+
func TestHuffmanMaxStrLen(t *testing.T) {
const msg = "Some string"
huff := AppendHuffmanString(nil, msg)
diff --git a/http2/hpack/huffman.go b/http2/hpack/huffman.go
index fe0b84c..20d083a 100644
--- a/http2/hpack/huffman.go
+++ b/http2/hpack/huffman.go
@@ -169,25 +169,50 @@
// AppendHuffmanString appends s, as encoded in Huffman codes, to dst
// and returns the extended buffer.
func AppendHuffmanString(dst []byte, s string) []byte {
- rembits := uint8(8)
-
+ // This relies on the maximum huffman code length being 30 (See tables.go huffmanCodeLen array)
+ // So if a uint64 buffer has less than 32 valid bits can always accommodate another huffmanCode.
+ var (
+ x uint64 // buffer
+ n uint // number valid of bits present in x
+ )
for i := 0; i < len(s); i++ {
- if rembits == 8 {
- dst = append(dst, 0)
+ c := s[i]
+ n += uint(huffmanCodeLen[c])
+ x <<= huffmanCodeLen[c] % 64
+ x |= uint64(huffmanCodes[c])
+ if n >= 32 {
+ n %= 32 // Normally would be -= 32 but %= 32 informs compiler 0 <= n <= 31 for upcoming shift
+ y := uint32(x >> n) // Compiler doesn't combine memory writes if y isn't uint32
+ dst = append(dst, byte(y>>24), byte(y>>16), byte(y>>8), byte(y))
}
- dst, rembits = appendByteToHuffmanCode(dst, rembits, s[i])
}
-
- if rembits < 8 {
- // special EOS symbol
- code := uint32(0x3fffffff)
- nbits := uint8(30)
-
- t := uint8(code >> (nbits - rembits))
- dst[len(dst)-1] |= t
+ // Add padding bits if necessary
+ if over := n % 8; over > 0 {
+ const (
+ eosCode = 0x3fffffff
+ eosNBits = 30
+ eosPadByte = eosCode >> (eosNBits - 8)
+ )
+ pad := 8 - over
+ x = (x << pad) | (eosPadByte >> over)
+ n += pad // 8 now divides into n exactly
}
-
- return dst
+ // n in (0, 8, 16, 24, 32)
+ switch n / 8 {
+ case 0:
+ return dst
+ case 1:
+ return append(dst, byte(x))
+ case 2:
+ y := uint16(x)
+ return append(dst, byte(y>>8), byte(y))
+ case 3:
+ y := uint16(x >> 8)
+ return append(dst, byte(y>>8), byte(y), byte(x))
+ }
+ // case 4:
+ y := uint32(x)
+ return append(dst, byte(y>>24), byte(y>>16), byte(y>>8), byte(y))
}
// HuffmanEncodeLength returns the number of bytes required to encode
@@ -199,35 +224,3 @@
}
return (n + 7) / 8
}
-
-// appendByteToHuffmanCode appends Huffman code for c to dst and
-// returns the extended buffer and the remaining bits in the last
-// element. The appending is not byte aligned and the remaining bits
-// in the last element of dst is given in rembits.
-func appendByteToHuffmanCode(dst []byte, rembits uint8, c byte) ([]byte, uint8) {
- code := huffmanCodes[c]
- nbits := huffmanCodeLen[c]
-
- for {
- if rembits > nbits {
- t := uint8(code << (rembits - nbits))
- dst[len(dst)-1] |= t
- rembits -= nbits
- break
- }
-
- t := uint8(code >> (nbits - rembits))
- dst[len(dst)-1] |= t
-
- nbits -= rembits
- rembits = 8
-
- if nbits == 0 {
- break
- }
-
- dst = append(dst, 0)
- }
-
- return dst, rembits
-}