blob: a8d64601925377cc8db8bf70ad110fbae293b0c0 [file] [log] [blame]
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package flate implements the DEFLATE compressed data format, described in
// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file
// formats.
package flate
import (
"bufio"
"io"
"strconv"
)
const (
maxCodeLen = 16 // max length of Huffman code
maxHist = 32768 // max history required
// The next three numbers come from the RFC, section 3.2.7.
maxLit = 286
maxDist = 32
numCodes = 19 // number of codes in Huffman meta-code
)
// A CorruptInputError reports the presence of corrupt input at a given offset.
type CorruptInputError int64
func (e CorruptInputError) Error() string {
return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10)
}
// An InternalError reports an error in the flate code itself.
type InternalError string
func (e InternalError) Error() string { return "flate: internal error: " + string(e) }
// A ReadError reports an error encountered while reading input.
type ReadError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Read
}
func (e *ReadError) Error() string {
return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// A WriteError reports an error encountered while writing output.
type WriteError struct {
Offset int64 // byte offset where error occurred
Err error // error returned by underlying Write
}
func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// Note that much of the implemenation of huffmanDecoder is also copied
// into gen.go (in package main) for the purpose of precomputing the
// fixed huffman tables so they can be included statically.
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
min int // the minimum code length
chunks [huffmanNumChunks]uint32 // chunks as described above
links [][]uint32 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
func (h *huffmanDecoder) init(bits []int) bool {
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range bits {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n]++
}
if max == 0 {
return false
}
h.min = min
var linkBits uint
var numLinks int
if max > huffmanChunkBits {
linkBits = uint(max) - huffmanChunkBits
numLinks = 1 << linkBits
h.linkMask = uint32(numLinks - 1)
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
if i == huffmanChunkBits+1 {
// create link tables
link := code >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
h.links[off] = make([]uint32, 1<<linkBits)
}
}
n := count[i]
nextcode[i] = code
code += n
code <<= 1
}
for i, n := range bits {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint32(i<<huffmanValueShift | n)
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
h.chunks[off] = chunk
}
} else {
linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
reverse >>= huffmanChunkBits
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
linktab[off] = chunk
}
}
}
return true
}
// The actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
type Reader interface {
io.Reader
ReadByte() (c byte, err error)
}
// Decompress state.
type decompressor struct {
// Input source.
r Reader
roffset int64
woffset int64
// Input bits, in top of b.
b uint32
nb uint
// Huffman decoders for literal/length, distance.
h1, h2 huffmanDecoder
// Length arrays used to define Huffman codes.
bits *[maxLit + maxDist]int
codebits *[numCodes]int
// Output history, buffer.
hist *[maxHist]byte
hp int // current output position in buffer
hw int // have written hist[0:hw] already
hfull bool // buffer has filled at least once
// Temporary buffer (avoids repeated allocation).
buf [4]byte
// Next step in the decompression,
// and decompression state.
step func(*decompressor)
final bool
err error
toRead []byte
hl, hd *huffmanDecoder
copyLen int
copyDist int
}
func (f *decompressor) nextBlock() {
if f.final {
if f.hw != f.hp {
f.flush((*decompressor).nextBlock)
return
}
f.err = io.EOF
return
}
for f.nb < 1+2 {
if f.err = f.moreBits(); f.err != nil {
return
}
}
f.final = f.b&1 == 1
f.b >>= 1
typ := f.b & 3
f.b >>= 2
f.nb -= 1 + 2
switch typ {
case 0:
f.dataBlock()
case 1:
// compressed, fixed Huffman tables
f.hl = &fixedHuffmanDecoder
f.hd = nil
f.huffmanBlock()
case 2:
// compressed, dynamic Huffman tables
if f.err = f.readHuffman(); f.err != nil {
break
}
f.hl = &f.h1
f.hd = &f.h2
f.huffmanBlock()
default:
// 3 is reserved.
f.err = CorruptInputError(f.roffset)
}
}
func (f *decompressor) Read(b []byte) (int, error) {
for {
if len(f.toRead) > 0 {
n := copy(b, f.toRead)
f.toRead = f.toRead[n:]
return n, nil
}
if f.err != nil {
return 0, f.err
}
f.step(f)
}
panic("unreachable")
}
func (f *decompressor) Close() error {
if f.err == io.EOF {
return nil
}
return f.err
}
// RFC 1951 section 3.2.7.
// Compression with dynamic Huffman codes
var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
func (f *decompressor) readHuffman() error {
// HLIT[5], HDIST[5], HCLEN[4].
for f.nb < 5+5+4 {
if err := f.moreBits(); err != nil {
return err
}
}
nlit := int(f.b&0x1F) + 257
if nlit > maxLit {
return CorruptInputError(f.roffset)
}
f.b >>= 5
ndist := int(f.b&0x1F) + 1
// maxDist is 32, so ndist is always valid.
f.b >>= 5
nclen := int(f.b&0xF) + 4
// numCodes is 19, so nclen is always valid.
f.b >>= 4
f.nb -= 5 + 5 + 4
// (HCLEN+4)*3 bits: code lengths in the magic codeOrder order.
for i := 0; i < nclen; i++ {
for f.nb < 3 {
if err := f.moreBits(); err != nil {
return err
}
}
f.codebits[codeOrder[i]] = int(f.b & 0x7)
f.b >>= 3
f.nb -= 3
}
for i := nclen; i < len(codeOrder); i++ {
f.codebits[codeOrder[i]] = 0
}
if !f.h1.init(f.codebits[0:]) {
return CorruptInputError(f.roffset)
}
// HLIT + 257 code lengths, HDIST + 1 code lengths,
// using the code length Huffman code.
for i, n := 0, nlit+ndist; i < n; {
x, err := f.huffSym(&f.h1)
if err != nil {
return err
}
if x < 16 {
// Actual length.
f.bits[i] = x
i++
continue
}
// Repeat previous length or zero.
var rep int
var nb uint
var b int
switch x {
default:
return InternalError("unexpected length code")
case 16:
rep = 3
nb = 2
if i == 0 {
return CorruptInputError(f.roffset)
}
b = f.bits[i-1]
case 17:
rep = 3
nb = 3
b = 0
case 18:
rep = 11
nb = 7
b = 0
}
for f.nb < nb {
if err := f.moreBits(); err != nil {
return err
}
}
rep += int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
if i+rep > n {
return CorruptInputError(f.roffset)
}
for j := 0; j < rep; j++ {
f.bits[i] = b
i++
}
}
if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) {
return CorruptInputError(f.roffset)
}
return nil
}
// Decode a single Huffman block from f.
// hl and hd are the Huffman states for the lit/length values
// and the distance values, respectively. If hd == nil, using the
// fixed distance encoding associated with fixed Huffman blocks.
func (f *decompressor) huffmanBlock() {
for {
v, err := f.huffSym(f.hl)
if err != nil {
f.err = err
return
}
var n uint // number of bits extra
var length int
switch {
case v < 256:
f.hist[f.hp] = byte(v)
f.hp++
if f.hp == len(f.hist) {
// After the flush, continue this loop.
f.flush((*decompressor).huffmanBlock)
return
}
continue
case v == 256:
// Done with huffman block; read next block.
f.step = (*decompressor).nextBlock
return
// otherwise, reference to older data
case v < 265:
length = v - (257 - 3)
n = 0
case v < 269:
length = v*2 - (265*2 - 11)
n = 1
case v < 273:
length = v*4 - (269*4 - 19)
n = 2
case v < 277:
length = v*8 - (273*8 - 35)
n = 3
case v < 281:
length = v*16 - (277*16 - 67)
n = 4
case v < 285:
length = v*32 - (281*32 - 131)
n = 5
default:
length = 258
n = 0
}
if n > 0 {
for f.nb < n {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
length += int(f.b & uint32(1<<n-1))
f.b >>= n
f.nb -= n
}
var dist int
if f.hd == nil {
for f.nb < 5 {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
dist = int(reverseByte[(f.b&0x1F)<<3])
f.b >>= 5
f.nb -= 5
} else {
if dist, err = f.huffSym(f.hd); err != nil {
f.err = err
return
}
}
switch {
case dist < 4:
dist++
case dist >= 30:
f.err = CorruptInputError(f.roffset)
return
default:
nb := uint(dist-2) >> 1
// have 1 bit in bottom of dist, need nb more.
extra := (dist & 1) << nb
for f.nb < nb {
if err = f.moreBits(); err != nil {
f.err = err
return
}
}
extra |= int(f.b & uint32(1<<nb-1))
f.b >>= nb
f.nb -= nb
dist = 1<<(nb+1) + 1 + extra
}
// Copy history[-dist:-dist+length] into output.
if dist > len(f.hist) {
f.err = InternalError("bad history distance")
return
}
// No check on length; encoding can be prescient.
if !f.hfull && dist > f.hp {
f.err = CorruptInputError(f.roffset)
return
}
f.copyLen, f.copyDist = length, dist
if f.copyHist() {
return
}
}
panic("unreached")
}
// copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself.
// It reports whether the f.hist buffer is full.
func (f *decompressor) copyHist() bool {
p := f.hp - f.copyDist
if p < 0 {
p += len(f.hist)
}
for f.copyLen > 0 {
n := f.copyLen
if x := len(f.hist) - f.hp; n > x {
n = x
}
if x := len(f.hist) - p; n > x {
n = x
}
forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n])
p += n
f.hp += n
f.copyLen -= n
if f.hp == len(f.hist) {
// After flush continue copying out of history.
f.flush((*decompressor).copyHuff)
return true
}
if p == len(f.hist) {
p = 0
}
}
return false
}
func (f *decompressor) copyHuff() {
if f.copyHist() {
return
}
f.huffmanBlock()
}
// Copy a single uncompressed data block from input to output.
func (f *decompressor) dataBlock() {
// Uncompressed.
// Discard current half-byte.
f.nb = 0
f.b = 0
// Length then ones-complement of length.
nr, err := io.ReadFull(f.r, f.buf[0:4])
f.roffset += int64(nr)
if err != nil {
f.err = &ReadError{f.roffset, err}
return
}
n := int(f.buf[0]) | int(f.buf[1])<<8
nn := int(f.buf[2]) | int(f.buf[3])<<8
if uint16(nn) != uint16(^n) {
f.err = CorruptInputError(f.roffset)
return
}
if n == 0 {
// 0-length block means sync
f.flush((*decompressor).nextBlock)
return
}
f.copyLen = n
f.copyData()
}
// copyData copies f.copyLen bytes from the underlying reader into f.hist.
// It pauses for reads when f.hist is full.
func (f *decompressor) copyData() {
n := f.copyLen
for n > 0 {
m := len(f.hist) - f.hp
if m > n {
m = n
}
m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m])
f.roffset += int64(m)
if err != nil {
f.err = &ReadError{f.roffset, err}
return
}
n -= m
f.hp += m
if f.hp == len(f.hist) {
f.copyLen = n
f.flush((*decompressor).copyData)
return
}
}
f.step = (*decompressor).nextBlock
}
func (f *decompressor) setDict(dict []byte) {
if len(dict) > len(f.hist) {
// Will only remember the tail.
dict = dict[len(dict)-len(f.hist):]
}
f.hp = copy(f.hist[:], dict)
if f.hp == len(f.hist) {
f.hp = 0
f.hfull = true
}
f.hw = f.hp
}
func (f *decompressor) moreBits() error {
c, err := f.r.ReadByte()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
f.roffset++
f.b |= uint32(c) << f.nb
f.nb += 8
return nil
}
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
n := uint(h.min)
for {
for f.nb < n {
if err := f.moreBits(); err != nil {
return 0, err
}
}
chunk := h.chunks[f.b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= f.nb {
f.b >>= n
f.nb -= n
return int(chunk >> huffmanValueShift), nil
}
}
return 0, CorruptInputError(f.roffset)
}
// Flush any buffered output to the underlying writer.
func (f *decompressor) flush(step func(*decompressor)) {
f.toRead = f.hist[f.hw:f.hp]
f.woffset += int64(f.hp - f.hw)
f.hw = f.hp
if f.hp == len(f.hist) {
f.hp = 0
f.hw = 0
f.hfull = true
}
f.step = step
}
func makeReader(r io.Reader) Reader {
if rr, ok := r.(Reader); ok {
return rr
}
return bufio.NewReader(r)
}
// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r. It is the caller's
// responsibility to call Close on the ReadCloser when
// finished reading.
func NewReader(r io.Reader) io.ReadCloser {
var f decompressor
f.bits = new([maxLit + maxDist]int)
f.codebits = new([numCodes]int)
f.r = makeReader(r)
f.hist = new([maxHist]byte)
f.step = (*decompressor).nextBlock
return &f
}
// NewReaderDict is like NewReader but initializes the reader
// with a preset dictionary. The returned Reader behaves as if
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
var f decompressor
f.r = makeReader(r)
f.hist = new([maxHist]byte)
f.bits = new([maxLit + maxDist]int)
f.codebits = new([numCodes]int)
f.step = (*decompressor).nextBlock
f.setDict(dict)
return &f
}