blob: d29d1abdd790a5520356c15629c40ce828c92e5a [file] [edit]
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package flate
import (
"math/bits"
)
const (
// tableBits is the number of bits used in the hash table.
tableBits = 15
// tableSize is the size of the hash table.
tableSize = 1 << tableBits
// hashLongBytes is the number of bytes used for long table hashes.
hashLongBytes = 7
// baseMatchOffset is the smallest match offset.
baseMatchOffset = 1
// baseMatchLength is the smallest match length per RFC section 3.2.5.
baseMatchLength = 3
// maxMatchOffset is the largest match offset.
maxMatchOffset = 1 << 15
// allocHistory is the size to preallocate for history.
allocHistory = maxStoreBlockSize * 5
// bufferReset is the buffer offset at which the history is reset.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1
)
// fastEncL1 to fastEncL6 provides specialized encoders for levels 1-6
// that each provide a different speed/size/memory strategies.
//
// Level 1: Single small table, 5 byte hashes, sparse indexing.
// Level 2: Single big table, 5 byte hashes, indexing ~ every 2 bytes.
// Level 3: Single medium table, 5 byte hashes, 2 candidates per table entry.
// Level 4: Two tables, 4/7 byte hashes, 1 candidate per table entry.
// Level 5: Two tables, 4/7 byte hashes, 2 candidates per 7-byte table entry.
// Level 6: Two tables, 4/7 byte hashes, full indexing, checks for repeats.
//
// Skipping on contiguous non-matches also decreases as levels go up.
// fastEnc is the interface implemented by the level 1-6 fast encoders.
type fastEnc interface {
// encode src into dst.
encode(dst *tokens, src []byte)
// reset the encoder so matches are not made with previous data.
reset()
}
// newFastEnc returns a fastEnc encoder for the given compression level (1-6).
func newFastEnc(level int) fastEnc {
switch level {
case 1:
return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}}
case 2:
return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}}
case 3:
return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}}
case 4:
return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}}
case 5:
return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}}
case 6:
return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}}
default:
panic("invalid level specified")
}
}
// fastGen maintains the table for matches,
// and the previous byte block for level 1 and up.
// This is the generic implementation.
type fastGen struct {
hist []byte
cur int32
}
// addBlock appends src to the history and returns the offset where src starts in e.hist.
func (e *fastGen) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
e.hist = make([]byte, 0, allocHistory)
} else {
if cap(e.hist) < maxMatchOffset*2 {
panic("unexpected buffer size")
}
// Move down
offset := int32(len(e.hist)) - maxMatchOffset
copy(e.hist[0:maxMatchOffset], e.hist[offset:offset+maxMatchOffset])
e.cur += offset
e.hist = e.hist[:maxMatchOffset]
}
}
s := int32(len(e.hist))
e.hist = append(e.hist, src...)
return s
}
// matchLenLimited returns the match length between offsets s and t in src.
// The maximum length returned is maxMatchLength - 4.
// It is assumed that s > t, that t >= 0 and s < len(src).
func (e *fastGen) matchLenLimited(s, t int, src []byte) int32 {
a := src[s:min(s+maxMatchLength-4, len(src))]
b := src[t:]
return int32(matchLen(a, b))
}
// matchLenLong returns the match length between offsets s and t in src.
// It is assumed that s > t, that t >= 0 and s < len(src).
func (e *fastGen) matchLenLong(s, t int, src []byte) int32 {
return int32(matchLen(src[s:], src[t:]))
}
// reset resets the encoding table to prepare for a new compression stream.
func (e *fastGen) reset() {
if cap(e.hist) < allocHistory {
e.hist = make([]byte, 0, allocHistory)
}
// We offset current position so everything will be out of reach.
// If we are above the buffer reset it will be cleared anyway since len(hist) == 0.
if e.cur <= bufferReset {
e.cur += maxMatchOffset + int32(len(e.hist))
}
e.hist = e.hist[:0]
}
func (f *fastGen) getFastGen() *fastGen { return f }
// tableEntry stores the offset of a hash match in the input history.
type tableEntry struct {
offset int32
}
// tableEntryPrev stores the current and previous offsets for a hash entry.
type tableEntryPrev struct {
cur tableEntry
prev tableEntry
}
const (
prime3bytes = 506832829
prime4bytes = 2654435761
prime5bytes = 889523592379
prime6bytes = 227718039650203
prime7bytes = 58295818150454627
prime8bytes = 0xcf1bbcdcb7a56463
)
// hashLen returns a hash of the first n bytes of u, using b output bits.
// It expects 3 <= n <= 8; other values are treated as n == 4.
// The bit length b must be <= 32.
// b and n should be constants in speed-critical use.
func hashLen(u uint64, b, n uint8) uint32 {
switch n {
case 3:
return (uint32(u<<8) * prime3bytes) >> (32 - b)
case 5:
return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - b))
case 6:
return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - b))
case 7:
return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - b))
case 8:
return uint32((u * prime8bytes) >> (64 - b))
default:
return (uint32(u) * prime4bytes) >> (32 - b)
}
}
// matchLen returns the maximum common prefix length of a and b.
// a must be the shortest of the two.
func matchLen(a, b []byte) (n int) {
left := len(a)
for left >= 8 {
diff := loadLE64(a, n) ^ loadLE64(b, n)
if diff != 0 {
return n + bits.TrailingZeros64(diff)>>3
}
n += 8
left -= 8
}
a = a[n:]
b = b[n:]
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
break
}
n++
}
return n
}