| // Copyright 2009 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package flate implements the DEFLATE compressed data format, described in |
| // RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file |
| // formats. |
| package flate |
| |
| import ( |
| "bufio" |
| "io" |
| "strconv" |
| ) |
| |
| const ( |
| maxCodeLen = 16 // max length of Huffman code |
| maxHist = 32768 // max history required |
| // The next three numbers come from the RFC, section 3.2.7. |
| maxLit = 286 |
| maxDist = 32 |
| numCodes = 19 // number of codes in Huffman meta-code |
| ) |
| |
| // A CorruptInputError reports the presence of corrupt input at a given offset. |
| type CorruptInputError int64 |
| |
| func (e CorruptInputError) Error() string { |
| return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) |
| } |
| |
| // An InternalError reports an error in the flate code itself. |
| type InternalError string |
| |
| func (e InternalError) Error() string { return "flate: internal error: " + string(e) } |
| |
| // A ReadError reports an error encountered while reading input. |
| type ReadError struct { |
| Offset int64 // byte offset where error occurred |
| Err error // error returned by underlying Read |
| } |
| |
| func (e *ReadError) Error() string { |
| return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() |
| } |
| |
| // A WriteError reports an error encountered while writing output. |
| type WriteError struct { |
| Offset int64 // byte offset where error occurred |
| Err error // error returned by underlying Write |
| } |
| |
| func (e *WriteError) Error() string { |
| return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() |
| } |
| |
| // Note that much of the implemenation of huffmanDecoder is also copied |
| // into gen.go (in package main) for the purpose of precomputing the |
| // fixed huffman tables so they can be included statically. |
| |
| // The data structure for decoding Huffman tables is based on that of |
| // zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), |
| // For codes smaller than the table width, there are multiple entries |
| // (each combination of trailing bits has the same value). For codes |
| // larger than the table width, the table contains a link to an overflow |
| // table. The width of each entry in the link table is the maximum code |
| // size minus the chunk width. |
| |
| // Note that you can do a lookup in the table even without all bits |
| // filled. Since the extra bits are zero, and the DEFLATE Huffman codes |
| // have the property that shorter codes come before longer ones, the |
| // bit length estimate in the result is a lower bound on the actual |
| // number of bits. |
| |
| // chunk & 15 is number of bits |
| // chunk >> 4 is value, including table link |
| |
| const ( |
| huffmanChunkBits = 9 |
| huffmanNumChunks = 1 << huffmanChunkBits |
| huffmanCountMask = 15 |
| huffmanValueShift = 4 |
| ) |
| |
| type huffmanDecoder struct { |
| min int // the minimum code length |
| chunks [huffmanNumChunks]uint32 // chunks as described above |
| links [][]uint32 // overflow links |
| linkMask uint32 // mask the width of the link table |
| } |
| |
| // Initialize Huffman decoding tables from array of code lengths. |
| func (h *huffmanDecoder) init(bits []int) bool { |
| // Count number of codes of each length, |
| // compute min and max length. |
| var count [maxCodeLen]int |
| var min, max int |
| for _, n := range bits { |
| if n == 0 { |
| continue |
| } |
| if min == 0 || n < min { |
| min = n |
| } |
| if n > max { |
| max = n |
| } |
| count[n]++ |
| } |
| if max == 0 { |
| return false |
| } |
| |
| h.min = min |
| var linkBits uint |
| var numLinks int |
| if max > huffmanChunkBits { |
| linkBits = uint(max) - huffmanChunkBits |
| numLinks = 1 << linkBits |
| h.linkMask = uint32(numLinks - 1) |
| } |
| code := 0 |
| var nextcode [maxCodeLen]int |
| for i := min; i <= max; i++ { |
| if i == huffmanChunkBits+1 { |
| // create link tables |
| link := code >> 1 |
| h.links = make([][]uint32, huffmanNumChunks-link) |
| for j := uint(link); j < huffmanNumChunks; j++ { |
| reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8 |
| reverse >>= uint(16 - huffmanChunkBits) |
| off := j - uint(link) |
| h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i)) |
| h.links[off] = make([]uint32, 1<<linkBits) |
| } |
| } |
| n := count[i] |
| nextcode[i] = code |
| code += n |
| code <<= 1 |
| } |
| |
| for i, n := range bits { |
| if n == 0 { |
| continue |
| } |
| code := nextcode[n] |
| nextcode[n]++ |
| chunk := uint32(i<<huffmanValueShift | n) |
| reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8 |
| reverse >>= uint(16 - n) |
| if n <= huffmanChunkBits { |
| for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) { |
| h.chunks[off] = chunk |
| } |
| } else { |
| linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift] |
| reverse >>= huffmanChunkBits |
| for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) { |
| linktab[off] = chunk |
| } |
| } |
| } |
| return true |
| } |
| |
| // The actual read interface needed by NewReader. |
| // If the passed in io.Reader does not also have ReadByte, |
| // the NewReader will introduce its own buffering. |
| type Reader interface { |
| io.Reader |
| ReadByte() (c byte, err error) |
| } |
| |
| // Decompress state. |
| type decompressor struct { |
| // Input source. |
| r Reader |
| roffset int64 |
| woffset int64 |
| |
| // Input bits, in top of b. |
| b uint32 |
| nb uint |
| |
| // Huffman decoders for literal/length, distance. |
| h1, h2 huffmanDecoder |
| |
| // Length arrays used to define Huffman codes. |
| bits *[maxLit + maxDist]int |
| codebits *[numCodes]int |
| |
| // Output history, buffer. |
| hist *[maxHist]byte |
| hp int // current output position in buffer |
| hw int // have written hist[0:hw] already |
| hfull bool // buffer has filled at least once |
| |
| // Temporary buffer (avoids repeated allocation). |
| buf [4]byte |
| |
| // Next step in the decompression, |
| // and decompression state. |
| step func(*decompressor) |
| final bool |
| err error |
| toRead []byte |
| hl, hd *huffmanDecoder |
| copyLen int |
| copyDist int |
| } |
| |
| func (f *decompressor) nextBlock() { |
| if f.final { |
| if f.hw != f.hp { |
| f.flush((*decompressor).nextBlock) |
| return |
| } |
| f.err = io.EOF |
| return |
| } |
| for f.nb < 1+2 { |
| if f.err = f.moreBits(); f.err != nil { |
| return |
| } |
| } |
| f.final = f.b&1 == 1 |
| f.b >>= 1 |
| typ := f.b & 3 |
| f.b >>= 2 |
| f.nb -= 1 + 2 |
| switch typ { |
| case 0: |
| f.dataBlock() |
| case 1: |
| // compressed, fixed Huffman tables |
| f.hl = &fixedHuffmanDecoder |
| f.hd = nil |
| f.huffmanBlock() |
| case 2: |
| // compressed, dynamic Huffman tables |
| if f.err = f.readHuffman(); f.err != nil { |
| break |
| } |
| f.hl = &f.h1 |
| f.hd = &f.h2 |
| f.huffmanBlock() |
| default: |
| // 3 is reserved. |
| f.err = CorruptInputError(f.roffset) |
| } |
| } |
| |
| func (f *decompressor) Read(b []byte) (int, error) { |
| for { |
| if len(f.toRead) > 0 { |
| n := copy(b, f.toRead) |
| f.toRead = f.toRead[n:] |
| return n, nil |
| } |
| if f.err != nil { |
| return 0, f.err |
| } |
| f.step(f) |
| } |
| panic("unreachable") |
| } |
| |
| func (f *decompressor) Close() error { |
| if f.err == io.EOF { |
| return nil |
| } |
| return f.err |
| } |
| |
| // RFC 1951 section 3.2.7. |
| // Compression with dynamic Huffman codes |
| |
| var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} |
| |
| func (f *decompressor) readHuffman() error { |
| // HLIT[5], HDIST[5], HCLEN[4]. |
| for f.nb < 5+5+4 { |
| if err := f.moreBits(); err != nil { |
| return err |
| } |
| } |
| nlit := int(f.b&0x1F) + 257 |
| if nlit > maxLit { |
| return CorruptInputError(f.roffset) |
| } |
| f.b >>= 5 |
| ndist := int(f.b&0x1F) + 1 |
| // maxDist is 32, so ndist is always valid. |
| f.b >>= 5 |
| nclen := int(f.b&0xF) + 4 |
| // numCodes is 19, so nclen is always valid. |
| f.b >>= 4 |
| f.nb -= 5 + 5 + 4 |
| |
| // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. |
| for i := 0; i < nclen; i++ { |
| for f.nb < 3 { |
| if err := f.moreBits(); err != nil { |
| return err |
| } |
| } |
| f.codebits[codeOrder[i]] = int(f.b & 0x7) |
| f.b >>= 3 |
| f.nb -= 3 |
| } |
| for i := nclen; i < len(codeOrder); i++ { |
| f.codebits[codeOrder[i]] = 0 |
| } |
| if !f.h1.init(f.codebits[0:]) { |
| return CorruptInputError(f.roffset) |
| } |
| |
| // HLIT + 257 code lengths, HDIST + 1 code lengths, |
| // using the code length Huffman code. |
| for i, n := 0, nlit+ndist; i < n; { |
| x, err := f.huffSym(&f.h1) |
| if err != nil { |
| return err |
| } |
| if x < 16 { |
| // Actual length. |
| f.bits[i] = x |
| i++ |
| continue |
| } |
| // Repeat previous length or zero. |
| var rep int |
| var nb uint |
| var b int |
| switch x { |
| default: |
| return InternalError("unexpected length code") |
| case 16: |
| rep = 3 |
| nb = 2 |
| if i == 0 { |
| return CorruptInputError(f.roffset) |
| } |
| b = f.bits[i-1] |
| case 17: |
| rep = 3 |
| nb = 3 |
| b = 0 |
| case 18: |
| rep = 11 |
| nb = 7 |
| b = 0 |
| } |
| for f.nb < nb { |
| if err := f.moreBits(); err != nil { |
| return err |
| } |
| } |
| rep += int(f.b & uint32(1<<nb-1)) |
| f.b >>= nb |
| f.nb -= nb |
| if i+rep > n { |
| return CorruptInputError(f.roffset) |
| } |
| for j := 0; j < rep; j++ { |
| f.bits[i] = b |
| i++ |
| } |
| } |
| |
| if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { |
| return CorruptInputError(f.roffset) |
| } |
| |
| return nil |
| } |
| |
| // Decode a single Huffman block from f. |
| // hl and hd are the Huffman states for the lit/length values |
| // and the distance values, respectively. If hd == nil, using the |
| // fixed distance encoding associated with fixed Huffman blocks. |
| func (f *decompressor) huffmanBlock() { |
| for { |
| v, err := f.huffSym(f.hl) |
| if err != nil { |
| f.err = err |
| return |
| } |
| var n uint // number of bits extra |
| var length int |
| switch { |
| case v < 256: |
| f.hist[f.hp] = byte(v) |
| f.hp++ |
| if f.hp == len(f.hist) { |
| // After the flush, continue this loop. |
| f.flush((*decompressor).huffmanBlock) |
| return |
| } |
| continue |
| case v == 256: |
| // Done with huffman block; read next block. |
| f.step = (*decompressor).nextBlock |
| return |
| // otherwise, reference to older data |
| case v < 265: |
| length = v - (257 - 3) |
| n = 0 |
| case v < 269: |
| length = v*2 - (265*2 - 11) |
| n = 1 |
| case v < 273: |
| length = v*4 - (269*4 - 19) |
| n = 2 |
| case v < 277: |
| length = v*8 - (273*8 - 35) |
| n = 3 |
| case v < 281: |
| length = v*16 - (277*16 - 67) |
| n = 4 |
| case v < 285: |
| length = v*32 - (281*32 - 131) |
| n = 5 |
| default: |
| length = 258 |
| n = 0 |
| } |
| if n > 0 { |
| for f.nb < n { |
| if err = f.moreBits(); err != nil { |
| f.err = err |
| return |
| } |
| } |
| length += int(f.b & uint32(1<<n-1)) |
| f.b >>= n |
| f.nb -= n |
| } |
| |
| var dist int |
| if f.hd == nil { |
| for f.nb < 5 { |
| if err = f.moreBits(); err != nil { |
| f.err = err |
| return |
| } |
| } |
| dist = int(reverseByte[(f.b&0x1F)<<3]) |
| f.b >>= 5 |
| f.nb -= 5 |
| } else { |
| if dist, err = f.huffSym(f.hd); err != nil { |
| f.err = err |
| return |
| } |
| } |
| |
| switch { |
| case dist < 4: |
| dist++ |
| case dist >= 30: |
| f.err = CorruptInputError(f.roffset) |
| return |
| default: |
| nb := uint(dist-2) >> 1 |
| // have 1 bit in bottom of dist, need nb more. |
| extra := (dist & 1) << nb |
| for f.nb < nb { |
| if err = f.moreBits(); err != nil { |
| f.err = err |
| return |
| } |
| } |
| extra |= int(f.b & uint32(1<<nb-1)) |
| f.b >>= nb |
| f.nb -= nb |
| dist = 1<<(nb+1) + 1 + extra |
| } |
| |
| // Copy history[-dist:-dist+length] into output. |
| if dist > len(f.hist) { |
| f.err = InternalError("bad history distance") |
| return |
| } |
| |
| // No check on length; encoding can be prescient. |
| if !f.hfull && dist > f.hp { |
| f.err = CorruptInputError(f.roffset) |
| return |
| } |
| |
| f.copyLen, f.copyDist = length, dist |
| if f.copyHist() { |
| return |
| } |
| } |
| panic("unreached") |
| } |
| |
| // copyHist copies f.copyLen bytes from f.hist (f.copyDist bytes ago) to itself. |
| // It reports whether the f.hist buffer is full. |
| func (f *decompressor) copyHist() bool { |
| p := f.hp - f.copyDist |
| if p < 0 { |
| p += len(f.hist) |
| } |
| for f.copyLen > 0 { |
| n := f.copyLen |
| if x := len(f.hist) - f.hp; n > x { |
| n = x |
| } |
| if x := len(f.hist) - p; n > x { |
| n = x |
| } |
| forwardCopy(f.hist[f.hp:f.hp+n], f.hist[p:p+n]) |
| p += n |
| f.hp += n |
| f.copyLen -= n |
| if f.hp == len(f.hist) { |
| // After flush continue copying out of history. |
| f.flush((*decompressor).copyHuff) |
| return true |
| } |
| if p == len(f.hist) { |
| p = 0 |
| } |
| } |
| return false |
| } |
| |
| func (f *decompressor) copyHuff() { |
| if f.copyHist() { |
| return |
| } |
| f.huffmanBlock() |
| } |
| |
| // Copy a single uncompressed data block from input to output. |
| func (f *decompressor) dataBlock() { |
| // Uncompressed. |
| // Discard current half-byte. |
| f.nb = 0 |
| f.b = 0 |
| |
| // Length then ones-complement of length. |
| nr, err := io.ReadFull(f.r, f.buf[0:4]) |
| f.roffset += int64(nr) |
| if err != nil { |
| f.err = &ReadError{f.roffset, err} |
| return |
| } |
| n := int(f.buf[0]) | int(f.buf[1])<<8 |
| nn := int(f.buf[2]) | int(f.buf[3])<<8 |
| if uint16(nn) != uint16(^n) { |
| f.err = CorruptInputError(f.roffset) |
| return |
| } |
| |
| if n == 0 { |
| // 0-length block means sync |
| f.flush((*decompressor).nextBlock) |
| return |
| } |
| |
| f.copyLen = n |
| f.copyData() |
| } |
| |
| // copyData copies f.copyLen bytes from the underlying reader into f.hist. |
| // It pauses for reads when f.hist is full. |
| func (f *decompressor) copyData() { |
| n := f.copyLen |
| for n > 0 { |
| m := len(f.hist) - f.hp |
| if m > n { |
| m = n |
| } |
| m, err := io.ReadFull(f.r, f.hist[f.hp:f.hp+m]) |
| f.roffset += int64(m) |
| if err != nil { |
| f.err = &ReadError{f.roffset, err} |
| return |
| } |
| n -= m |
| f.hp += m |
| if f.hp == len(f.hist) { |
| f.copyLen = n |
| f.flush((*decompressor).copyData) |
| return |
| } |
| } |
| f.step = (*decompressor).nextBlock |
| } |
| |
| func (f *decompressor) setDict(dict []byte) { |
| if len(dict) > len(f.hist) { |
| // Will only remember the tail. |
| dict = dict[len(dict)-len(f.hist):] |
| } |
| |
| f.hp = copy(f.hist[:], dict) |
| if f.hp == len(f.hist) { |
| f.hp = 0 |
| f.hfull = true |
| } |
| f.hw = f.hp |
| } |
| |
| func (f *decompressor) moreBits() error { |
| c, err := f.r.ReadByte() |
| if err != nil { |
| if err == io.EOF { |
| err = io.ErrUnexpectedEOF |
| } |
| return err |
| } |
| f.roffset++ |
| f.b |= uint32(c) << f.nb |
| f.nb += 8 |
| return nil |
| } |
| |
| // Read the next Huffman-encoded symbol from f according to h. |
| func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { |
| n := uint(h.min) |
| for { |
| for f.nb < n { |
| if err := f.moreBits(); err != nil { |
| return 0, err |
| } |
| } |
| chunk := h.chunks[f.b&(huffmanNumChunks-1)] |
| n = uint(chunk & huffmanCountMask) |
| if n > huffmanChunkBits { |
| chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask] |
| n = uint(chunk & huffmanCountMask) |
| } |
| if n <= f.nb { |
| f.b >>= n |
| f.nb -= n |
| return int(chunk >> huffmanValueShift), nil |
| } |
| } |
| return 0, CorruptInputError(f.roffset) |
| } |
| |
| // Flush any buffered output to the underlying writer. |
| func (f *decompressor) flush(step func(*decompressor)) { |
| f.toRead = f.hist[f.hw:f.hp] |
| f.woffset += int64(f.hp - f.hw) |
| f.hw = f.hp |
| if f.hp == len(f.hist) { |
| f.hp = 0 |
| f.hw = 0 |
| f.hfull = true |
| } |
| f.step = step |
| } |
| |
| func makeReader(r io.Reader) Reader { |
| if rr, ok := r.(Reader); ok { |
| return rr |
| } |
| return bufio.NewReader(r) |
| } |
| |
| // NewReader returns a new ReadCloser that can be used |
| // to read the uncompressed version of r. It is the caller's |
| // responsibility to call Close on the ReadCloser when |
| // finished reading. |
| func NewReader(r io.Reader) io.ReadCloser { |
| var f decompressor |
| f.bits = new([maxLit + maxDist]int) |
| f.codebits = new([numCodes]int) |
| f.r = makeReader(r) |
| f.hist = new([maxHist]byte) |
| f.step = (*decompressor).nextBlock |
| return &f |
| } |
| |
| // NewReaderDict is like NewReader but initializes the reader |
| // with a preset dictionary. The returned Reader behaves as if |
| // the uncompressed data stream started with the given dictionary, |
| // which has already been read. NewReaderDict is typically used |
| // to read data compressed by NewWriterDict. |
| func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { |
| var f decompressor |
| f.r = makeReader(r) |
| f.hist = new([maxHist]byte) |
| f.bits = new([maxLit + maxDist]int) |
| f.codebits = new([numCodes]int) |
| f.step = (*decompressor).nextBlock |
| f.setDict(dict) |
| return &f |
| } |