src/compress/lzw/writer.go - go - Git at Google

 // Copyright 2011 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package lzw

 import (
 	"bufio"
 	"errors"
 	"fmt"
 	"io"
 )

 // A writer is a buffered, flushable writer.
 type writer interface {
 	io.ByteWriter
 	Flush() error
 }

 const (
 	// A code is a 12 bit value, stored as a uint32 when encoding to avoid
 	// type conversions when shifting bits.
 	maxCode     = 1<<12 - 1
 	invalidCode = 1<<32 - 1
 	// There are 1<<12 possible codes, which is an upper bound on the number of
 	// valid hash table entries at any given point in time. tableSize is 4x that.
 	tableSize = 4 * 1 << 12
 	tableMask = tableSize - 1
 	// A hash table entry is a uint32. Zero is an invalid entry since the
 	// lower 12 bits of a valid entry must be a non-literal code.
 	invalidEntry = 0
 )

 // Writer is an LZW compressor. It writes the compressed form of the data
 // to an underlying writer (see NewWriter).
 type Writer struct {
 	// w is the writer that compressed bytes are written to.
 	w writer
 	// order, write, bits, nBits and width are the state for
 	// converting a code stream into a byte stream.
 	order Order
 	write func(*Writer, uint32) error
 	bits  uint32
 	nBits uint
 	width uint
 	// litWidth is the width in bits of literal codes.
 	litWidth uint
 	// hi is the code implied by the next code emission.
 	// overflow is the code at which hi overflows the code width.
 	hi, overflow uint32
 	// savedCode is the accumulated code at the end of the most recent Write
 	// call. It is equal to invalidCode if there was no such call.
 	savedCode uint32
 	// err is the first error encountered during writing. Closing the writer
 	// will make any future Write calls return errClosed
 	err error
 	// table is the hash table from 20-bit keys to 12-bit values. Each table
 	// entry contains key<<12|val and collisions resolve by linear probing.
 	// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
 	// The values are a 12-bit code.
 	table [tableSize]uint32
 }

 // writeLSB writes the code c for "Least Significant Bits first" data.
 func (w *Writer) writeLSB(c uint32) error {
 	w.bits |= c << w.nBits
 	w.nBits += w.width
 	for w.nBits >= 8 {
 		if err := w.w.WriteByte(uint8(w.bits)); err != nil {
 			return err
 		}
 		w.bits >>= 8
 		w.nBits -= 8
 	}
 	return nil
 }

 // writeMSB writes the code c for "Most Significant Bits first" data.
 func (w *Writer) writeMSB(c uint32) error {
 	w.bits |= c << (32 - w.width - w.nBits)
 	w.nBits += w.width
 	for w.nBits >= 8 {
 		if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil {
 			return err
 		}
 		w.bits <<= 8
 		w.nBits -= 8
 	}
 	return nil
 }

 // errOutOfCodes is an internal error that means that the writer has run out
 // of unused codes and a clear code needs to be sent next.
 var errOutOfCodes = errors.New("lzw: out of codes")

 // incHi increments e.hi and checks for both overflow and running out of
 // unused codes. In the latter case, incHi sends a clear code, resets the
 // writer state and returns errOutOfCodes.
 func (w *Writer) incHi() error {
 	w.hi++
 	if w.hi == w.overflow {
 		w.width++
 		w.overflow <<= 1
 	}
 	if w.hi == maxCode {
 		clear := uint32(1) << w.litWidth
 		if err := w.write(w, clear); err != nil {
 			return err
 		}
 		w.width = w.litWidth + 1
 		w.hi = clear + 1
 		w.overflow = clear << 1
 		for i := range w.table {
 			w.table[i] = invalidEntry
 		}
 		return errOutOfCodes
 	}
 	return nil
 }

 // Write writes a compressed representation of p to w's underlying writer.
 func (w *Writer) Write(p []byte) (n int, err error) {
 	if w.err != nil {
 		return 0, w.err
 	}
 	if len(p) == 0 {
 		return 0, nil
 	}
 	if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff {
 		for _, x := range p {
 			if x > maxLit {
 				w.err = errors.New("lzw: input byte too large for the litWidth")
 				return 0, w.err
 			}
 		}
 	}
 	n = len(p)
 	code := w.savedCode
 	if code == invalidCode {
 		// This is the first write; send a clear code.
 		// https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
 		// "Variable-Length-Code LZW Compression" says that "Encoders should
 		// output a Clear code as the first code of each image data stream".
 		//
 		// LZW compression isn't only used by GIF, but it's cheap to follow
 		// that directive unconditionally.
 		clear := uint32(1) << w.litWidth
 		if err := w.write(w, clear); err != nil {
 			return 0, err
 		}
 		// After the starting clear code, the next code sent (for non-empty
 		// input) is always a literal code.
 		code, p = uint32(p[0]), p[1:]
 	}
 loop:
 	for _, x := range p {
 		literal := uint32(x)
 		key := code<<8 | literal
 		// If there is a hash table hit for this key then we continue the loop
 		// and do not emit a code yet.
 		hash := (key>>12 ^ key) & tableMask
 		for h, t := hash, w.table[hash]; t != invalidEntry; {
 			if key == t>>12 {
 				code = t & maxCode
 				continue loop
 			}
 			h = (h + 1) & tableMask
 			t = w.table[h]
 		}
 		// Otherwise, write the current code, and literal becomes the start of
 		// the next emitted code.
 		if w.err = w.write(w, code); w.err != nil {
 			return 0, w.err
 		}
 		code = literal
 		// Increment e.hi, the next implied code. If we run out of codes, reset
 		// the writer state (including clearing the hash table) and continue.
 		if err1 := w.incHi(); err1 != nil {
 			if err1 == errOutOfCodes {
 				continue
 			}
 			w.err = err1
 			return 0, w.err
 		}
 		// Otherwise, insert key -> e.hi into the map that e.table represents.
 		for {
 			if w.table[hash] == invalidEntry {
 				w.table[hash] = (key << 12) | w.hi
 				break
 			}
 			hash = (hash + 1) & tableMask
 		}
 	}
 	w.savedCode = code
 	return n, nil
 }

 // Close closes the Writer, flushing any pending output. It does not close
 // w's underlying writer.
 func (w *Writer) Close() error {
 	if w.err != nil {
 		if w.err == errClosed {
 			return nil
 		}
 		return w.err
 	}
 	// Make any future calls to Write return errClosed.
 	w.err = errClosed
 	// Write the savedCode if valid.
 	if w.savedCode != invalidCode {
 		if err := w.write(w, w.savedCode); err != nil {
 			return err
 		}
 		if err := w.incHi(); err != nil && err != errOutOfCodes {
 			return err
 		}
 	} else {
 		// Write the starting clear code, as w.Write did not.
 		clear := uint32(1) << w.litWidth
 		if err := w.write(w, clear); err != nil {
 			return err
 		}
 	}
 	// Write the eof code.
 	eof := uint32(1)<<w.litWidth + 1
 	if err := w.write(w, eof); err != nil {
 		return err
 	}
 	// Write the final bits.
 	if w.nBits > 0 {
 		if w.order == MSB {
 			w.bits >>= 24
 		}
 		if err := w.w.WriteByte(uint8(w.bits)); err != nil {
 			return err
 		}
 	}
 	return w.w.Flush()
 }

 // Reset clears the Writer's state and allows it to be reused again
 // as a new Writer.
 func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) {
 	*w = Writer{}
 	w.init(dst, order, litWidth)
 }

 // NewWriter creates a new io.WriteCloser.
 // Writes to the returned io.WriteCloser are compressed and written to w.
 // It is the caller's responsibility to call Close on the WriteCloser when
 // finished writing.
 // The number of bits to use for literal codes, litWidth, must be in the
 // range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
 //
 // It is guaranteed that the underlying type of the returned io.WriteCloser
 // is a *Writer.
 func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
 	return newWriter(w, order, litWidth)
 }

 func newWriter(dst io.Writer, order Order, litWidth int) *Writer {
 	w := new(Writer)
 	w.init(dst, order, litWidth)
 	return w
 }

 func (w *Writer) init(dst io.Writer, order Order, litWidth int) {
 	switch order {
 	case LSB:
 		w.write = (*Writer).writeLSB
 	case MSB:
 		w.write = (*Writer).writeMSB
 	default:
 		w.err = errors.New("lzw: unknown order")
 		return
 	}
 	if litWidth < 2 || 8 < litWidth {
 		w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
 		return
 	}
 	bw, ok := dst.(writer)
 	if !ok && dst != nil {
 		bw = bufio.NewWriter(dst)
 	}
 	w.w = bw
 	lw := uint(litWidth)
 	w.order = order
 	w.width = 1 + lw
 	w.litWidth = lw
 	w.hi = 1<<lw + 1
 	w.overflow = 1 << (lw + 1)
 	w.savedCode = invalidCode
 }
	// Copyright 2011 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package lzw

	import (
	"bufio"
	"errors"
	"fmt"
	"io"
	)

	// A writer is a buffered, flushable writer.
	type writer interface {
	io.ByteWriter
	Flush() error
	}

	const (
	// A code is a 12 bit value, stored as a uint32 when encoding to avoid
	// type conversions when shifting bits.
	maxCode = 1<<12 - 1
	invalidCode = 1<<32 - 1
	// There are 1<<12 possible codes, which is an upper bound on the number of
	// valid hash table entries at any given point in time. tableSize is 4x that.
	tableSize = 4 * 1 << 12
	tableMask = tableSize - 1
	// A hash table entry is a uint32. Zero is an invalid entry since the
	// lower 12 bits of a valid entry must be a non-literal code.
	invalidEntry = 0
	)

	// Writer is an LZW compressor. It writes the compressed form of the data
	// to an underlying writer (see NewWriter).
	type Writer struct {
	// w is the writer that compressed bytes are written to.
	w writer
	// order, write, bits, nBits and width are the state for
	// converting a code stream into a byte stream.
	order Order
	write func(*Writer, uint32) error
	bits uint32
	nBits uint
	width uint
	// litWidth is the width in bits of literal codes.
	litWidth uint
	// hi is the code implied by the next code emission.
	// overflow is the code at which hi overflows the code width.
	hi, overflow uint32
	// savedCode is the accumulated code at the end of the most recent Write
	// call. It is equal to invalidCode if there was no such call.
	savedCode uint32
	// err is the first error encountered during writing. Closing the writer
	// will make any future Write calls return errClosed
	err error
	// table is the hash table from 20-bit keys to 12-bit values. Each table
	// entry contains key<<12\|val and collisions resolve by linear probing.
	// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
	// The values are a 12-bit code.
	table [tableSize]uint32
	}

	// writeLSB writes the code c for "Least Significant Bits first" data.
	func (w *Writer) writeLSB(c uint32) error {
	w.bits \|= c << w.nBits
	w.nBits += w.width
	for w.nBits >= 8 {
	if err := w.w.WriteByte(uint8(w.bits)); err != nil {
	return err
	}
	w.bits >>= 8
	w.nBits -= 8
	}
	return nil
	}

	// writeMSB writes the code c for "Most Significant Bits first" data.
	func (w *Writer) writeMSB(c uint32) error {
	w.bits \|= c << (32 - w.width - w.nBits)
	w.nBits += w.width
	for w.nBits >= 8 {
	if err := w.w.WriteByte(uint8(w.bits >> 24)); err != nil {
	return err
	}
	w.bits <<= 8
	w.nBits -= 8
	}
	return nil
	}

	// errOutOfCodes is an internal error that means that the writer has run out
	// of unused codes and a clear code needs to be sent next.
	var errOutOfCodes = errors.New("lzw: out of codes")

	// incHi increments e.hi and checks for both overflow and running out of
	// unused codes. In the latter case, incHi sends a clear code, resets the
	// writer state and returns errOutOfCodes.
	func (w *Writer) incHi() error {
	w.hi++
	if w.hi == w.overflow {
	w.width++
	w.overflow <<= 1
	}
	if w.hi == maxCode {
	clear := uint32(1) << w.litWidth
	if err := w.write(w, clear); err != nil {
	return err
	}
	w.width = w.litWidth + 1
	w.hi = clear + 1
	w.overflow = clear << 1
	for i := range w.table {
	w.table[i] = invalidEntry
	}
	return errOutOfCodes
	}
	return nil
	}

	// Write writes a compressed representation of p to w's underlying writer.
	func (w *Writer) Write(p []byte) (n int, err error) {
	if w.err != nil {
	return 0, w.err
	}
	if len(p) == 0 {
	return 0, nil
	}
	if maxLit := uint8(1<<w.litWidth - 1); maxLit != 0xff {
	for _, x := range p {
	if x > maxLit {
	w.err = errors.New("lzw: input byte too large for the litWidth")
	return 0, w.err
	}
	}
	}
	n = len(p)
	code := w.savedCode
	if code == invalidCode {
	// This is the first write; send a clear code.
	// https://www.w3.org/Graphics/GIF/spec-gif89a.txt Appendix F
	// "Variable-Length-Code LZW Compression" says that "Encoders should
	// output a Clear code as the first code of each image data stream".
	//
	// LZW compression isn't only used by GIF, but it's cheap to follow
	// that directive unconditionally.
	clear := uint32(1) << w.litWidth
	if err := w.write(w, clear); err != nil {
	return 0, err
	}
	// After the starting clear code, the next code sent (for non-empty
	// input) is always a literal code.
	code, p = uint32(p[0]), p[1:]
	}
	loop:
	for _, x := range p {
	literal := uint32(x)
	key := code<<8 \| literal
	// If there is a hash table hit for this key then we continue the loop
	// and do not emit a code yet.
	hash := (key>>12 ^ key) & tableMask
	for h, t := hash, w.table[hash]; t != invalidEntry; {
	if key == t>>12 {
	code = t & maxCode
	continue loop
	}
	h = (h + 1) & tableMask
	t = w.table[h]
	}
	// Otherwise, write the current code, and literal becomes the start of
	// the next emitted code.
	if w.err = w.write(w, code); w.err != nil {
	return 0, w.err
	}
	code = literal
	// Increment e.hi, the next implied code. If we run out of codes, reset
	// the writer state (including clearing the hash table) and continue.
	if err1 := w.incHi(); err1 != nil {
	if err1 == errOutOfCodes {
	continue
	}
	w.err = err1
	return 0, w.err
	}
	// Otherwise, insert key -> e.hi into the map that e.table represents.
	for {
	if w.table[hash] == invalidEntry {
	w.table[hash] = (key << 12) \| w.hi
	break
	}
	hash = (hash + 1) & tableMask
	}
	}
	w.savedCode = code
	return n, nil
	}

	// Close closes the Writer, flushing any pending output. It does not close
	// w's underlying writer.
	func (w *Writer) Close() error {
	if w.err != nil {
	if w.err == errClosed {
	return nil
	}
	return w.err
	}
	// Make any future calls to Write return errClosed.
	w.err = errClosed
	// Write the savedCode if valid.
	if w.savedCode != invalidCode {
	if err := w.write(w, w.savedCode); err != nil {
	return err
	}
	if err := w.incHi(); err != nil && err != errOutOfCodes {
	return err
	}
	} else {
	// Write the starting clear code, as w.Write did not.
	clear := uint32(1) << w.litWidth
	if err := w.write(w, clear); err != nil {
	return err
	}
	}
	// Write the eof code.
	eof := uint32(1)<<w.litWidth + 1
	if err := w.write(w, eof); err != nil {
	return err
	}
	// Write the final bits.
	if w.nBits > 0 {
	if w.order == MSB {
	w.bits >>= 24
	}
	if err := w.w.WriteByte(uint8(w.bits)); err != nil {
	return err
	}
	}
	return w.w.Flush()
	}

	// Reset clears the Writer's state and allows it to be reused again
	// as a new Writer.
	func (w *Writer) Reset(dst io.Writer, order Order, litWidth int) {
	*w = Writer{}
	w.init(dst, order, litWidth)
	}

	// NewWriter creates a new io.WriteCloser.
	// Writes to the returned io.WriteCloser are compressed and written to w.
	// It is the caller's responsibility to call Close on the WriteCloser when
	// finished writing.
	// The number of bits to use for literal codes, litWidth, must be in the
	// range [2,8] and is typically 8. Input bytes must be less than 1<<litWidth.
	//
	// It is guaranteed that the underlying type of the returned io.WriteCloser
	// is a *Writer.
	func NewWriter(w io.Writer, order Order, litWidth int) io.WriteCloser {
	return newWriter(w, order, litWidth)
	}

	func newWriter(dst io.Writer, order Order, litWidth int) *Writer {
	w := new(Writer)
	w.init(dst, order, litWidth)
	return w
	}

	func (w *Writer) init(dst io.Writer, order Order, litWidth int) {
	switch order {
	case LSB:
	w.write = (*Writer).writeLSB
	case MSB:
	w.write = (*Writer).writeMSB
	default:
	w.err = errors.New("lzw: unknown order")
	return
	}
	if litWidth < 2 \|\| 8 < litWidth {
	w.err = fmt.Errorf("lzw: litWidth %d out of range", litWidth)
	return
	}
	bw, ok := dst.(writer)
	if !ok && dst != nil {
	bw = bufio.NewWriter(dst)
	}
	w.w = bw
	lw := uint(litWidth)
	w.order = order
	w.width = 1 + lw
	w.litWidth = lw
	w.hi = 1<<lw + 1
	w.overflow = 1 << (lw + 1)
	w.savedCode = invalidCode
	}