|  | // Copyright 2009 The Go Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style | 
|  | // license that can be found in the LICENSE file. | 
|  |  | 
|  | // Package gzip implements reading and writing of gzip format compressed files, | 
|  | // as specified in RFC 1952. | 
|  | package gzip | 
|  |  | 
|  | import ( | 
|  | "bufio" | 
|  | "compress/flate" | 
|  | "encoding/binary" | 
|  | "errors" | 
|  | "hash/crc32" | 
|  | "io" | 
|  | "time" | 
|  | ) | 
|  |  | 
|  | const ( | 
|  | gzipID1     = 0x1f | 
|  | gzipID2     = 0x8b | 
|  | gzipDeflate = 8 | 
|  | flagText    = 1 << 0 | 
|  | flagHdrCrc  = 1 << 1 | 
|  | flagExtra   = 1 << 2 | 
|  | flagName    = 1 << 3 | 
|  | flagComment = 1 << 4 | 
|  | ) | 
|  |  | 
|  | var ( | 
|  | // ErrChecksum is returned when reading GZIP data that has an invalid checksum. | 
|  | ErrChecksum = errors.New("gzip: invalid checksum") | 
|  | // ErrHeader is returned when reading GZIP data that has an invalid header. | 
|  | ErrHeader = errors.New("gzip: invalid header") | 
|  | ) | 
|  |  | 
|  | var le = binary.LittleEndian | 
|  |  | 
|  | // noEOF converts io.EOF to io.ErrUnexpectedEOF. | 
|  | func noEOF(err error) error { | 
|  | if err == io.EOF { | 
|  | return io.ErrUnexpectedEOF | 
|  | } | 
|  | return err | 
|  | } | 
|  |  | 
|  | // The gzip file stores a header giving metadata about the compressed file. | 
|  | // That header is exposed as the fields of the Writer and Reader structs. | 
|  | // | 
|  | // Strings must be UTF-8 encoded and may only contain Unicode code points | 
|  | // U+0001 through U+00FF, due to limitations of the GZIP file format. | 
|  | type Header struct { | 
|  | Comment string    // comment | 
|  | Extra   []byte    // "extra data" | 
|  | ModTime time.Time // modification time | 
|  | Name    string    // file name | 
|  | OS      byte      // operating system type | 
|  | } | 
|  |  | 
|  | // A Reader is an io.Reader that can be read to retrieve | 
|  | // uncompressed data from a gzip-format compressed file. | 
|  | // | 
|  | // In general, a gzip file can be a concatenation of gzip files, | 
|  | // each with its own header. Reads from the Reader | 
|  | // return the concatenation of the uncompressed data of each. | 
|  | // Only the first header is recorded in the Reader fields. | 
|  | // | 
|  | // Gzip files store a length and checksum of the uncompressed data. | 
|  | // The Reader will return an ErrChecksum when Read | 
|  | // reaches the end of the uncompressed data if it does not | 
|  | // have the expected length or checksum. Clients should treat data | 
|  | // returned by Read as tentative until they receive the io.EOF | 
|  | // marking the end of the data. | 
|  | type Reader struct { | 
|  | Header       // valid after NewReader or Reader.Reset | 
|  | r            flate.Reader | 
|  | decompressor io.ReadCloser | 
|  | digest       uint32 // CRC-32, IEEE polynomial (section 8) | 
|  | size         uint32 // Uncompressed size (section 2.3.1) | 
|  | buf          [512]byte | 
|  | err          error | 
|  | multistream  bool | 
|  | } | 
|  |  | 
|  | // NewReader creates a new Reader reading the given reader. | 
|  | // If r does not also implement io.ByteReader, | 
|  | // the decompressor may read more data than necessary from r. | 
|  | // | 
|  | // It is the caller's responsibility to call Close on the Reader when done. | 
|  | // | 
|  | // The Reader.Header fields will be valid in the Reader returned. | 
|  | func NewReader(r io.Reader) (*Reader, error) { | 
|  | z := new(Reader) | 
|  | if err := z.Reset(r); err != nil { | 
|  | return nil, err | 
|  | } | 
|  | return z, nil | 
|  | } | 
|  |  | 
|  | // Reset discards the Reader z's state and makes it equivalent to the | 
|  | // result of its original state from NewReader, but reading from r instead. | 
|  | // This permits reusing a Reader rather than allocating a new one. | 
|  | func (z *Reader) Reset(r io.Reader) error { | 
|  | *z = Reader{ | 
|  | decompressor: z.decompressor, | 
|  | multistream:  true, | 
|  | } | 
|  | if rr, ok := r.(flate.Reader); ok { | 
|  | z.r = rr | 
|  | } else { | 
|  | z.r = bufio.NewReader(r) | 
|  | } | 
|  | z.Header, z.err = z.readHeader() | 
|  | return z.err | 
|  | } | 
|  |  | 
|  | // Multistream controls whether the reader supports multistream files. | 
|  | // | 
|  | // If enabled (the default), the Reader expects the input to be a sequence | 
|  | // of individually gzipped data streams, each with its own header and | 
|  | // trailer, ending at EOF. The effect is that the concatenation of a sequence | 
|  | // of gzipped files is treated as equivalent to the gzip of the concatenation | 
|  | // of the sequence. This is standard behavior for gzip readers. | 
|  | // | 
|  | // Calling Multistream(false) disables this behavior; disabling the behavior | 
|  | // can be useful when reading file formats that distinguish individual gzip | 
|  | // data streams or mix gzip data streams with other data streams. | 
|  | // In this mode, when the Reader reaches the end of the data stream, | 
|  | // Read returns io.EOF. The underlying reader must implement io.ByteReader | 
|  | // in order to be left positioned just after the gzip stream. | 
|  | // To start the next stream, call z.Reset(r) followed by z.Multistream(false). | 
|  | // If there is no next stream, z.Reset(r) will return io.EOF. | 
|  | func (z *Reader) Multistream(ok bool) { | 
|  | z.multistream = ok | 
|  | } | 
|  |  | 
|  | // readString reads a NUL-terminated string from z.r. | 
|  | // It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and | 
|  | // will output a string encoded using UTF-8. | 
|  | // This method always updates z.digest with the data read. | 
|  | func (z *Reader) readString() (string, error) { | 
|  | var err error | 
|  | needConv := false | 
|  | for i := 0; ; i++ { | 
|  | if i >= len(z.buf) { | 
|  | return "", ErrHeader | 
|  | } | 
|  | z.buf[i], err = z.r.ReadByte() | 
|  | if err != nil { | 
|  | return "", err | 
|  | } | 
|  | if z.buf[i] > 0x7f { | 
|  | needConv = true | 
|  | } | 
|  | if z.buf[i] == 0 { | 
|  | // Digest covers the NUL terminator. | 
|  | z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1]) | 
|  |  | 
|  | // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1). | 
|  | if needConv { | 
|  | s := make([]rune, 0, i) | 
|  | for _, v := range z.buf[:i] { | 
|  | s = append(s, rune(v)) | 
|  | } | 
|  | return string(s), nil | 
|  | } | 
|  | return string(z.buf[:i]), nil | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // readHeader reads the GZIP header according to section 2.3.1. | 
|  | // This method does not set z.err. | 
|  | func (z *Reader) readHeader() (hdr Header, err error) { | 
|  | if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil { | 
|  | // RFC 1952, section 2.2, says the following: | 
|  | //	A gzip file consists of a series of "members" (compressed data sets). | 
|  | // | 
|  | // Other than this, the specification does not clarify whether a | 
|  | // "series" is defined as "one or more" or "zero or more". To err on the | 
|  | // side of caution, Go interprets this to mean "zero or more". | 
|  | // Thus, it is okay to return io.EOF here. | 
|  | return hdr, err | 
|  | } | 
|  | if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { | 
|  | return hdr, ErrHeader | 
|  | } | 
|  | flg := z.buf[3] | 
|  | if t := int64(le.Uint32(z.buf[4:8])); t > 0 { | 
|  | // Section 2.3.1, the zero value for MTIME means that the | 
|  | // modified time is not set. | 
|  | hdr.ModTime = time.Unix(t, 0) | 
|  | } | 
|  | // z.buf[8] is XFL and is currently ignored. | 
|  | hdr.OS = z.buf[9] | 
|  | z.digest = crc32.ChecksumIEEE(z.buf[:10]) | 
|  |  | 
|  | if flg&flagExtra != 0 { | 
|  | if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { | 
|  | return hdr, noEOF(err) | 
|  | } | 
|  | z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2]) | 
|  | data := make([]byte, le.Uint16(z.buf[:2])) | 
|  | if _, err = io.ReadFull(z.r, data); err != nil { | 
|  | return hdr, noEOF(err) | 
|  | } | 
|  | z.digest = crc32.Update(z.digest, crc32.IEEETable, data) | 
|  | hdr.Extra = data | 
|  | } | 
|  |  | 
|  | var s string | 
|  | if flg&flagName != 0 { | 
|  | if s, err = z.readString(); err != nil { | 
|  | return hdr, err | 
|  | } | 
|  | hdr.Name = s | 
|  | } | 
|  |  | 
|  | if flg&flagComment != 0 { | 
|  | if s, err = z.readString(); err != nil { | 
|  | return hdr, err | 
|  | } | 
|  | hdr.Comment = s | 
|  | } | 
|  |  | 
|  | if flg&flagHdrCrc != 0 { | 
|  | if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { | 
|  | return hdr, noEOF(err) | 
|  | } | 
|  | digest := le.Uint16(z.buf[:2]) | 
|  | if digest != uint16(z.digest) { | 
|  | return hdr, ErrHeader | 
|  | } | 
|  | } | 
|  |  | 
|  | z.digest = 0 | 
|  | if z.decompressor == nil { | 
|  | z.decompressor = flate.NewReader(z.r) | 
|  | } else { | 
|  | z.decompressor.(flate.Resetter).Reset(z.r, nil) | 
|  | } | 
|  | return hdr, nil | 
|  | } | 
|  |  | 
|  | // Read implements io.Reader, reading uncompressed bytes from its underlying Reader. | 
|  | func (z *Reader) Read(p []byte) (n int, err error) { | 
|  | if z.err != nil { | 
|  | return 0, z.err | 
|  | } | 
|  |  | 
|  | n, z.err = z.decompressor.Read(p) | 
|  | z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) | 
|  | z.size += uint32(n) | 
|  | if z.err != io.EOF { | 
|  | // In the normal case we return here. | 
|  | return n, z.err | 
|  | } | 
|  |  | 
|  | // Finished file; check checksum and size. | 
|  | if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { | 
|  | z.err = noEOF(err) | 
|  | return n, z.err | 
|  | } | 
|  | digest := le.Uint32(z.buf[:4]) | 
|  | size := le.Uint32(z.buf[4:8]) | 
|  | if digest != z.digest || size != z.size { | 
|  | z.err = ErrChecksum | 
|  | return n, z.err | 
|  | } | 
|  | z.digest, z.size = 0, 0 | 
|  |  | 
|  | // File is ok; check if there is another. | 
|  | if !z.multistream { | 
|  | return n, io.EOF | 
|  | } | 
|  | z.err = nil // Remove io.EOF | 
|  |  | 
|  | if _, z.err = z.readHeader(); z.err != nil { | 
|  | return n, z.err | 
|  | } | 
|  |  | 
|  | // Read from next file, if necessary. | 
|  | if n > 0 { | 
|  | return n, nil | 
|  | } | 
|  | return z.Read(p) | 
|  | } | 
|  |  | 
|  | // Close closes the Reader. It does not close the underlying io.Reader. | 
|  | // In order for the GZIP checksum to be verified, the reader must be | 
|  | // fully consumed until the io.EOF. | 
|  | func (z *Reader) Close() error { return z.decompressor.Close() } |