src/compress/flate/dict_decoder.go - go - Git at Google

 // Copyright 2016 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package flate

 // dictDecoder implements the LZ77 sliding dictionary as used in decompression.
 // LZ77 decompresses data through sequences of two forms of commands:
 //
 //	* Literal insertions: Runs of one or more symbols are inserted into the data
 //	stream as is. This is accomplished through the writeByte method for a
 //	single symbol, or combinations of writeSlice/writeMark for multiple symbols.
 //	Any valid stream must start with a literal insertion if no preset dictionary
 //	is used.
 //
 //	* Backward copies: Runs of one or more symbols are copied from previously
 //	emitted data. Backward copies come as the tuple (dist, length) where dist
 //	determines how far back in the stream to copy from and length determines how
 //	many bytes to copy. Note that it is valid for the length to be greater than
 //	the distance. Since LZ77 uses forward copies, that situation is used to
 //	perform a form of run-length encoding on repeated runs of symbols.
 //	The writeCopy and tryWriteCopy are used to implement this command.
 //
 // For performance reasons, this implementation performs little to no sanity
 // checks about the arguments. As such, the invariants documented for each
 // method call must be respected.
 type dictDecoder struct {
 	hist []byte // Sliding window history

 	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
 	wrPos int  // Current output position in buffer
 	rdPos int  // Have emitted hist[:rdPos] already
 	full  bool // Has a full window length been written yet?
 }

 // init initializes dictDecoder to have a sliding window dictionary of the given
 // size. If a preset dict is provided, it will initialize the dictionary with
 // the contents of dict.
 func (dd *dictDecoder) init(size int, dict []byte) {
 	*dd = dictDecoder{hist: dd.hist}

 	if cap(dd.hist) < size {
 		dd.hist = make([]byte, size)
 	}
 	dd.hist = dd.hist[:size]

 	if len(dict) > len(dd.hist) {
 		dict = dict[len(dict)-len(dd.hist):]
 	}
 	dd.wrPos = copy(dd.hist, dict)
 	if dd.wrPos == len(dd.hist) {
 		dd.wrPos = 0
 		dd.full = true
 	}
 	dd.rdPos = dd.wrPos
 }

 // histSize reports the total amount of historical data in the dictionary.
 func (dd *dictDecoder) histSize() int {
 	if dd.full {
 		return len(dd.hist)
 	}
 	return dd.wrPos
 }

 // availRead reports the number of bytes that can be flushed by readFlush.
 func (dd *dictDecoder) availRead() int {
 	return dd.wrPos - dd.rdPos
 }

 // availWrite reports the available amount of output buffer space.
 func (dd *dictDecoder) availWrite() int {
 	return len(dd.hist) - dd.wrPos
 }

 // writeSlice returns a slice of the available buffer to write data to.
 //
 // This invariant will be kept: len(s) <= availWrite()
 func (dd *dictDecoder) writeSlice() []byte {
 	return dd.hist[dd.wrPos:]
 }

 // writeMark advances the writer pointer by cnt.
 //
 // This invariant must be kept: 0 <= cnt <= availWrite()
 func (dd *dictDecoder) writeMark(cnt int) {
 	dd.wrPos += cnt
 }

 // writeByte writes a single byte to the dictionary.
 //
 // This invariant must be kept: 0 < availWrite()
 func (dd *dictDecoder) writeByte(c byte) {
 	dd.hist[dd.wrPos] = c
 	dd.wrPos++
 }

 // writeCopy copies a string at a given (dist, length) to the output.
 // This returns the number of bytes copied and may be less than the requested
 // length if the available space in the output buffer is too small.
 //
 // This invariant must be kept: 0 < dist <= histSize()
 func (dd *dictDecoder) writeCopy(dist, length int) int {
 	dstBase := dd.wrPos
 	dstPos := dstBase
 	srcPos := dstPos - dist
 	endPos := dstPos + length
 	if endPos > len(dd.hist) {
 		endPos = len(dd.hist)
 	}

 	// Copy non-overlapping section after destination position.
 	//
 	// This section is non-overlapping in that the copy length for this section
 	// is always less than or equal to the backwards distance. This can occur
 	// if a distance refers to data that wraps-around in the buffer.
 	// Thus, a backwards copy is performed here; that is, the exact bytes in
 	// the source prior to the copy is placed in the destination.
 	if srcPos < 0 {
 		srcPos += len(dd.hist)
 		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
 		srcPos = 0
 	}

 	// Copy possibly overlapping section before destination position.
 	//
 	// This section can overlap if the copy length for this section is larger
 	// than the backwards distance. This is allowed by LZ77 so that repeated
 	// strings can be succinctly represented using (dist, length) pairs.
 	// Thus, a forwards copy is performed here; that is, the bytes copied is
 	// possibly dependent on the resulting bytes in the destination as the copy
 	// progresses along. This is functionally equivalent to the following:
 	//
 	//	for i := 0; i < endPos-dstPos; i++ {
 	//		dd.hist[dstPos+i] = dd.hist[srcPos+i]
 	//	}
 	//	dstPos = endPos
 	//
 	for dstPos < endPos {
 		dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
 	}

 	dd.wrPos = dstPos
 	return dstPos - dstBase
 }

 // tryWriteCopy tries to copy a string at a given (distance, length) to the
 // output. This specialized version is optimized for short distances.
 //
 // This method is designed to be inlined for performance reasons.
 //
 // This invariant must be kept: 0 < dist <= histSize()
 func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
 	dstPos := dd.wrPos
 	endPos := dstPos + length
 	if dstPos < dist || endPos > len(dd.hist) {
 		return 0
 	}
 	dstBase := dstPos
 	srcPos := dstPos - dist

 	// Copy possibly overlapping section before destination position.
 loop:
 	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
 	if dstPos < endPos {
 		goto loop // Avoid for-loop so that this function can be inlined
 	}

 	dd.wrPos = dstPos
 	return dstPos - dstBase
 }

 // readFlush returns a slice of the historical buffer that is ready to be
 // emitted to the user. The data returned by readFlush must be fully consumed
 // before calling any other dictDecoder methods.
 func (dd *dictDecoder) readFlush() []byte {
 	toRead := dd.hist[dd.rdPos:dd.wrPos]
 	dd.rdPos = dd.wrPos
 	if dd.wrPos == len(dd.hist) {
 		dd.wrPos, dd.rdPos = 0, 0
 		dd.full = true
 	}
 	return toRead
 }
	// Copyright 2016 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package flate

	// dictDecoder implements the LZ77 sliding dictionary as used in decompression.
	// LZ77 decompresses data through sequences of two forms of commands:
	//
	// * Literal insertions: Runs of one or more symbols are inserted into the data
	// stream as is. This is accomplished through the writeByte method for a
	// single symbol, or combinations of writeSlice/writeMark for multiple symbols.
	// Any valid stream must start with a literal insertion if no preset dictionary
	// is used.
	//
	// * Backward copies: Runs of one or more symbols are copied from previously
	// emitted data. Backward copies come as the tuple (dist, length) where dist
	// determines how far back in the stream to copy from and length determines how
	// many bytes to copy. Note that it is valid for the length to be greater than
	// the distance. Since LZ77 uses forward copies, that situation is used to
	// perform a form of run-length encoding on repeated runs of symbols.
	// The writeCopy and tryWriteCopy are used to implement this command.
	//
	// For performance reasons, this implementation performs little to no sanity
	// checks about the arguments. As such, the invariants documented for each
	// method call must be respected.
	type dictDecoder struct {
	hist []byte // Sliding window history

	// Invariant: 0 <= rdPos <= wrPos <= len(hist)
	wrPos int // Current output position in buffer
	rdPos int // Have emitted hist[:rdPos] already
	full bool // Has a full window length been written yet?
	}

	// init initializes dictDecoder to have a sliding window dictionary of the given
	// size. If a preset dict is provided, it will initialize the dictionary with
	// the contents of dict.
	func (dd *dictDecoder) init(size int, dict []byte) {
	*dd = dictDecoder{hist: dd.hist}

	if cap(dd.hist) < size {
	dd.hist = make([]byte, size)
	}
	dd.hist = dd.hist[:size]

	if len(dict) > len(dd.hist) {
	dict = dict[len(dict)-len(dd.hist):]
	}
	dd.wrPos = copy(dd.hist, dict)
	if dd.wrPos == len(dd.hist) {
	dd.wrPos = 0
	dd.full = true
	}
	dd.rdPos = dd.wrPos
	}

	// histSize reports the total amount of historical data in the dictionary.
	func (dd *dictDecoder) histSize() int {
	if dd.full {
	return len(dd.hist)
	}
	return dd.wrPos
	}

	// availRead reports the number of bytes that can be flushed by readFlush.
	func (dd *dictDecoder) availRead() int {
	return dd.wrPos - dd.rdPos
	}

	// availWrite reports the available amount of output buffer space.
	func (dd *dictDecoder) availWrite() int {
	return len(dd.hist) - dd.wrPos
	}

	// writeSlice returns a slice of the available buffer to write data to.
	//
	// This invariant will be kept: len(s) <= availWrite()
	func (dd *dictDecoder) writeSlice() []byte {
	return dd.hist[dd.wrPos:]
	}

	// writeMark advances the writer pointer by cnt.
	//
	// This invariant must be kept: 0 <= cnt <= availWrite()
	func (dd *dictDecoder) writeMark(cnt int) {
	dd.wrPos += cnt
	}

	// writeByte writes a single byte to the dictionary.
	//
	// This invariant must be kept: 0 < availWrite()
	func (dd *dictDecoder) writeByte(c byte) {
	dd.hist[dd.wrPos] = c
	dd.wrPos++
	}

	// writeCopy copies a string at a given (dist, length) to the output.
	// This returns the number of bytes copied and may be less than the requested
	// length if the available space in the output buffer is too small.
	//
	// This invariant must be kept: 0 < dist <= histSize()
	func (dd *dictDecoder) writeCopy(dist, length int) int {
	dstBase := dd.wrPos
	dstPos := dstBase
	srcPos := dstPos - dist
	endPos := dstPos + length
	if endPos > len(dd.hist) {
	endPos = len(dd.hist)
	}

	// Copy non-overlapping section after destination position.
	//
	// This section is non-overlapping in that the copy length for this section
	// is always less than or equal to the backwards distance. This can occur
	// if a distance refers to data that wraps-around in the buffer.
	// Thus, a backwards copy is performed here; that is, the exact bytes in
	// the source prior to the copy is placed in the destination.
	if srcPos < 0 {
	srcPos += len(dd.hist)
	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:])
	srcPos = 0
	}

	// Copy possibly overlapping section before destination position.
	//
	// This section can overlap if the copy length for this section is larger
	// than the backwards distance. This is allowed by LZ77 so that repeated
	// strings can be succinctly represented using (dist, length) pairs.
	// Thus, a forwards copy is performed here; that is, the bytes copied is
	// possibly dependent on the resulting bytes in the destination as the copy
	// progresses along. This is functionally equivalent to the following:
	//
	// for i := 0; i < endPos-dstPos; i++ {
	// dd.hist[dstPos+i] = dd.hist[srcPos+i]
	// }
	// dstPos = endPos
	//
	for dstPos < endPos {
	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
	}

	dd.wrPos = dstPos
	return dstPos - dstBase
	}

	// tryWriteCopy tries to copy a string at a given (distance, length) to the
	// output. This specialized version is optimized for short distances.
	//
	// This method is designed to be inlined for performance reasons.
	//
	// This invariant must be kept: 0 < dist <= histSize()
	func (dd *dictDecoder) tryWriteCopy(dist, length int) int {
	dstPos := dd.wrPos
	endPos := dstPos + length
	if dstPos < dist \|\| endPos > len(dd.hist) {
	return 0
	}
	dstBase := dstPos
	srcPos := dstPos - dist

	// Copy possibly overlapping section before destination position.
	loop:
	dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos])
	if dstPos < endPos {
	goto loop // Avoid for-loop so that this function can be inlined
	}

	dd.wrPos = dstPos
	return dstPos - dstBase
	}

	// readFlush returns a slice of the historical buffer that is ready to be
	// emitted to the user. The data returned by readFlush must be fully consumed
	// before calling any other dictDecoder methods.
	func (dd *dictDecoder) readFlush() []byte {
	toRead := dd.hist[dd.rdPos:dd.wrPos]
	dd.rdPos = dd.wrPos
	if dd.wrPos == len(dd.hist) {
	dd.wrPos, dd.rdPos = 0, 0
	dd.full = true
	}
	return toRead
	}