| // Copyright 2019 The Go Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style |
| // license that can be found in the LICENSE file. |
| |
| // Package bytes/hash provides hash functions on byte sequences. These |
| // hash functions are intended to be used to implement hash tables or |
| // other data structures that need to map arbitrary strings or byte |
| // sequences to a uniform distribution of integers. The hash functions |
| // are collision-resistant but are not cryptographically secure (use |
| // one of the hash functions in crypto/* if you need that). |
| // |
| // The produced hashes depend only on the sequence of bytes provided |
| // to the Hash object, not on the way in which they are provided. For |
| // example, the calls |
| // h.AddString("foo") |
| // h.AddBytes([]byte{'f','o','o'}) |
| // h.AddByte('f'); h.AddByte('o'); h.AddByte('o') |
| // will all have the same effect. |
| // |
| // Two Hash instances in the same process using the same seed |
| // behave identically. |
| // |
| // Two Hash instances with the same seed in different processes are |
| // not guaranteed to behave identically, even if the processes share |
| // the same binary. |
| // |
| // Hashes are intended to be collision-resistant, even for situations |
| // where an adversary controls the byte sequences being hashed. |
| // All bits of the Hash result are close to uniformly and |
| // independently distributed, so can be safely restricted to a range |
| // using bit masking, shifting, or modular arithmetic. |
| package hash |
| |
| import ( |
| "unsafe" |
| ) |
| |
| // A Seed controls the behavior of a Hash. Two Hash objects with the |
| // same seed in the same process will behave identically. Two Hash |
| // objects with different seeds will very likely behave differently. |
| type Seed struct { |
| s uint64 |
| } |
| |
| // A Hash object is used to compute the hash of a byte sequence. |
| type Hash struct { |
| seed Seed // initial seed used for this hash |
| state Seed // current hash of all flushed bytes |
| buf [64]byte // unflushed byte buffer |
| n int // number of unflushed bytes |
| } |
| |
| // AddByte adds b to the sequence of bytes hashed by h. |
| func (h *Hash) AddByte(b byte) { |
| if h.n == len(h.buf) { |
| h.flush() |
| } |
| h.buf[h.n] = b |
| h.n++ |
| } |
| |
| // AddBytes adds b to the sequence of bytes hashed by h. |
| func (h *Hash) AddBytes(b []byte) { |
| for h.n+len(b) > len(h.buf) { |
| k := copy(h.buf[h.n:], b) |
| h.n = len(h.buf) |
| b = b[k:] |
| h.flush() |
| } |
| h.n += copy(h.buf[h.n:], b) |
| } |
| |
| // AddString adds the bytes of s to the sequence of bytes hashed by h. |
| func (h *Hash) AddString(s string) { |
| for h.n+len(s) > len(h.buf) { |
| k := copy(h.buf[h.n:], s) |
| h.n = len(h.buf) |
| s = s[k:] |
| h.flush() |
| } |
| h.n += copy(h.buf[h.n:], s) |
| } |
| |
| // Seed returns the seed value specified in the most recent call to |
| // SetSeed, or the initial seed if SetSeed was never called. |
| func (h *Hash) Seed() Seed { |
| return h.seed |
| } |
| |
| // SetSeed sets the seed used by h. Two Hash objects with the same |
| // seed in the same process will behave identically. Two Hash objects |
| // with different seeds will very likely behave differently. Any |
| // bytes added to h previous to this call will be discarded. |
| func (h *Hash) SetSeed(seed Seed) { |
| h.seed = seed |
| h.state = seed |
| h.n = 0 |
| } |
| |
| // Reset discards all bytes added to h. |
| // (The seed remains the same.) |
| func (h *Hash) Reset() { |
| h.state = h.seed |
| h.n = 0 |
| } |
| |
| // precondition: buffer is full. |
| func (h *Hash) flush() { |
| if h.n != len(h.buf) { |
| panic("flush of partially full buffer") |
| } |
| h.state.s = rthash(h.buf[:], h.state.s) |
| h.n = 0 |
| } |
| |
| // Hash returns a value which depends on h's seed and the sequence of |
| // bytes added to h (since the last call to Reset or SetSeed). |
| func (h *Hash) Hash() uint64 { |
| return rthash(h.buf[:h.n], h.state.s) |
| } |
| |
| // MakeSeed returns a Seed initialized using the bits in s. |
| // Two seeds generated with the same s are guaranteed to be equal. |
| // Two seeds generated with different s are very likely to be different. |
| // TODO: disallow this? See Alan's comment in the issue. |
| func MakeSeed(s uint64) Seed { |
| return Seed{s: s} |
| } |
| |
| // New returns a new Hash object. Different hash objects allocated by |
| // this function will very likely have different seeds. |
| func New() *Hash { |
| seed := Seed{s: uint64(runtime_fastrand())} |
| return &Hash{ |
| seed: seed, |
| state: seed, |
| } |
| } |
| |
| //go:linkname runtime_fastrand runtime.fastrand |
| func runtime_fastrand() uint32 |
| |
| func rthash(b []byte, seed uint64) uint64 { |
| if len(b) == 0 { |
| return seed |
| } |
| // The runtime hasher only works on uintptr. For 64-bit |
| // architectures, we use the hasher directly. Otherwise, |
| // we use two parallel hashers on the lower and upper 32 bits. |
| if unsafe.Sizeof(uintptr(0)) == 8 { |
| return uint64(runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b)))) |
| } |
| lo := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed), uintptr(len(b))) |
| hi := runtime_memhash(unsafe.Pointer(&b[0]), uintptr(seed>>32), uintptr(len(b))) |
| // TODO: mix lo/hi? Get 64 bits some other way? |
| return uint64(hi)<<32 | uint64(lo) |
| } |
| |
| //go:linkname runtime_memhash runtime.memhash |
| func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr |
| |
| // Wrapper functions so that a bytes/hash.Hash implements |
| // the hash.Hash and hash.Hash64 interfaces. |
| |
| func (h *Hash) Write(b []byte) (int, error) { |
| h.AddBytes(b) |
| return len(b), nil |
| } |
| func (h *Hash) Sum(b []byte) []byte { |
| x := h.Hash() |
| return append(b, |
| byte(x>>0), |
| byte(x>>8), |
| byte(x>>16), |
| byte(x>>24), |
| byte(x>>32), |
| byte(x>>40), |
| byte(x>>48), |
| byte(x>>56)) |
| } |
| func (h *Hash) Sum64() uint64 { |
| return h.Hash() |
| } |
| func (h *Hash) Size() int { return 8 } |
| func (h *Hash) BlockSize() int { return len(h.buf) } |