| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package maphash provides hash functions on byte sequences. |
| // These hash functions are intended to be used to implement hash tables or |
| // other data structures that need to map arbitrary strings or byte |
| // sequences to a uniform distribution on unsigned 64-bit integers. |
| // Each different instance of a hash table or data structure should use its own Seed. |
| // |
| // The hash functions are not cryptographically secure. |
| // (See crypto/sha256 and crypto/sha512 for cryptographic use.) |
| // |
| package maphash |
| |
| import "unsafe" |
| |
| // A Seed is a random value that selects the specific hash function |
| // computed by a Hash. If two Hashes use the same Seeds, they |
| // will compute the same hash values for any given input. |
| // If two Hashes use different Seeds, they are very likely to compute |
| // distinct hash values for any given input. |
| // |
| // A Seed must be initialized by calling MakeSeed. |
| // The zero seed is uninitialized and not valid for use with Hash's SetSeed method. |
| // |
| // Each Seed value is local to a single process and cannot be serialized |
| // or otherwise recreated in a different process. |
| type Seed struct { |
| s uint64 |
| } |
| |
| // A Hash computes a seeded hash of a byte sequence. |
| // |
| // The zero Hash is a valid Hash ready to use. |
| // A zero Hash chooses a random seed for itself during |
| // the first call to a Reset, Write, Seed, Sum64, or Seed method. |
| // For control over the seed, use SetSeed. |
| // |
| // The computed hash values depend only on the initial seed and |
| // the sequence of bytes provided to the Hash object, not on the way |
| // in which the bytes are provided. For example, the three sequences |
| // |
| // h.Write([]byte{'f','o','o'}) |
| // h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o') |
| // h.WriteString("foo") |
| // |
| // all have the same effect. |
| // |
| // Hashes are intended to be collision-resistant, even for situations |
| // where an adversary controls the byte sequences being hashed. |
| // |
| // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is. |
| // If multiple goroutines must compute the same seeded hash, |
| // each can declare its own Hash and call SetSeed with a common Seed. |
| type Hash struct { |
| _ [0]func() // not comparable |
| seed Seed // initial seed used for this hash |
| state Seed // current hash of all flushed bytes |
| buf [64]byte // unflushed byte buffer |
| n int // number of unflushed bytes |
| } |
| |
| // initSeed seeds the hash if necessary. |
| // initSeed is called lazily before any operation that actually uses h.seed/h.state. |
| // Note that this does not include Write/WriteByte/WriteString in the case |
| // where they only add to h.buf. (If they write too much, they call h.flush, |
| // which does call h.initSeed.) |
| func (h *Hash) initSeed() { |
| if h.seed.s == 0 { |
| h.setSeed(MakeSeed()) |
| } |
| } |
| |
| // WriteByte adds b to the sequence of bytes hashed by h. |
| // It never fails; the error result is for implementing io.ByteWriter. |
| func (h *Hash) WriteByte(b byte) error { |
| if h.n == len(h.buf) { |
| h.flush() |
| } |
| h.buf[h.n] = b |
| h.n++ |
| return nil |
| } |
| |
| // Write adds b to the sequence of bytes hashed by h. |
| // It always writes all of b and never fails; the count and error result are for implementing io.Writer. |
| func (h *Hash) Write(b []byte) (int, error) { |
| size := len(b) |
| for h.n+len(b) > len(h.buf) { |
| k := copy(h.buf[h.n:], b) |
| h.n = len(h.buf) |
| b = b[k:] |
| h.flush() |
| } |
| h.n += copy(h.buf[h.n:], b) |
| return size, nil |
| } |
| |
| // WriteString adds the bytes of s to the sequence of bytes hashed by h. |
| // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter. |
| func (h *Hash) WriteString(s string) (int, error) { |
| size := len(s) |
| for h.n+len(s) > len(h.buf) { |
| k := copy(h.buf[h.n:], s) |
| h.n = len(h.buf) |
| s = s[k:] |
| h.flush() |
| } |
| h.n += copy(h.buf[h.n:], s) |
| return size, nil |
| } |
| |
| // Seed returns h's seed value. |
| func (h *Hash) Seed() Seed { |
| h.initSeed() |
| return h.seed |
| } |
| |
| // SetSeed sets h to use seed, which must have been returned by MakeSeed |
| // or by another Hash's Seed method. |
| // Two Hash objects with the same seed behave identically. |
| // Two Hash objects with different seeds will very likely behave differently. |
| // Any bytes added to h before this call will be discarded. |
| func (h *Hash) SetSeed(seed Seed) { |
| h.setSeed(seed) |
| h.n = 0 |
| } |
| |
| // setSeed sets seed without discarding accumulated data. |
| func (h *Hash) setSeed(seed Seed) { |
| if seed.s == 0 { |
| panic("maphash: use of uninitialized Seed") |
| } |
| h.seed = seed |
| h.state = seed |
| } |
| |
| // Reset discards all bytes added to h. |
| // (The seed remains the same.) |
| func (h *Hash) Reset() { |
| h.initSeed() |
| h.state = h.seed |
| h.n = 0 |
| } |
| |
| // precondition: buffer is full. |
| func (h *Hash) flush() { |
| if h.n != len(h.buf) { |
| panic("maphash: flush of partially full buffer") |
| } |
| h.initSeed() |
| h.state.s = rthash(h.buf[:], h.state.s) |
| h.n = 0 |
| } |
| |
| // Sum64 returns h's current 64-bit value, which depends on |
| // h's seed and the sequence of bytes added to h since the |
| // last call to Reset or SetSeed. |
| // |
| // All bits of the Sum64 result are close to uniformly and |
| // independently distributed, so it can be safely reduced |
| // by using bit masking, shifting, or modular arithmetic. |
| func (h *Hash) Sum64() uint64 { |
| h.initSeed() |
| return rthash(h.buf[:h.n], h.state.s) |
| } |
| |
| // MakeSeed returns a new random seed. |
| func MakeSeed() Seed { |
| var s1, s2 uint64 |
| for { |
| s1 = uint64(runtime_fastrand()) |
| s2 = uint64(runtime_fastrand()) |
| // We use seed 0 to indicate an uninitialized seed/hash, |
| // so keep trying until we get a non-zero seed. |
| if s1|s2 != 0 { |
| break |
| } |
| } |
| return Seed{s: s1<<32 + s2} |
| } |
| |
| //go:linkname runtime_fastrand runtime.fastrand |
| func runtime_fastrand() uint32 |
| |
| func rthash(b []byte, seed uint64) uint64 { |
| if len(b) == 0 { |
| return seed |
| } |
| // The runtime hasher only works on uintptr. For 64-bit |
| // architectures, we use the hasher directly. Otherwise, |
| // we use two parallel hashers on the lower and upper 32 bits. |
| if unsafe.Sizeof(uintptr(0)) == 8 { |
| return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))) |
| } |
| lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))) |
| hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b))) |
| return uint64(hi)<<32 | uint64(lo) |
| } |
| |
| //go:linkname runtime_memhash runtime.memhash |
| //go:noescape |
| func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr |
| |
| // Sum appends the hash's current 64-bit value to b. |
| // It exists for implementing hash.Hash. |
| // For direct calls, it is more efficient to use Sum64. |
| func (h *Hash) Sum(b []byte) []byte { |
| x := h.Sum64() |
| return append(b, |
| byte(x>>0), |
| byte(x>>8), |
| byte(x>>16), |
| byte(x>>24), |
| byte(x>>32), |
| byte(x>>40), |
| byte(x>>48), |
| byte(x>>56)) |
| } |
| |
| // Size returns h's hash value size, 8 bytes. |
| func (h *Hash) Size() int { return 8 } |
| |
| // BlockSize returns h's block size. |
| func (h *Hash) BlockSize() int { return len(h.buf) } |