src/hash/maphash/maphash.go - go - Git at Google

 // Copyright 2019 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 // Package maphash provides hash functions on byte sequences.
 // These hash functions are intended to be used to implement hash tables or
 // other data structures that need to map arbitrary strings or byte
 // sequences to a uniform distribution on unsigned 64-bit integers.
 // Each different instance of a hash table or data structure should use its own Seed.
 //
 // The hash functions are not cryptographically secure.
 // (See crypto/sha256 and crypto/sha512 for cryptographic use.)
 //
 package maphash

 import (
 	"internal/unsafeheader"
 	"unsafe"
 )

 // A Seed is a random value that selects the specific hash function
 // computed by a Hash. If two Hashes use the same Seeds, they
 // will compute the same hash values for any given input.
 // If two Hashes use different Seeds, they are very likely to compute
 // distinct hash values for any given input.
 //
 // A Seed must be initialized by calling MakeSeed.
 // The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
 //
 // Each Seed value is local to a single process and cannot be serialized
 // or otherwise recreated in a different process.
 type Seed struct {
 	s uint64
 }

 // A Hash computes a seeded hash of a byte sequence.
 //
 // The zero Hash is a valid Hash ready to use.
 // A zero Hash chooses a random seed for itself during
 // the first call to a Reset, Write, Seed, or Sum64 method.
 // For control over the seed, use SetSeed.
 //
 // The computed hash values depend only on the initial seed and
 // the sequence of bytes provided to the Hash object, not on the way
 // in which the bytes are provided. For example, the three sequences
 //
 //     h.Write([]byte{'f','o','o'})
 //     h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
 //     h.WriteString("foo")
 //
 // all have the same effect.
 //
 // Hashes are intended to be collision-resistant, even for situations
 // where an adversary controls the byte sequences being hashed.
 //
 // A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
 // If multiple goroutines must compute the same seeded hash,
 // each can declare its own Hash and call SetSeed with a common Seed.
 type Hash struct {
 	_     [0]func()     // not comparable
 	seed  Seed          // initial seed used for this hash
 	state Seed          // current hash of all flushed bytes
 	buf   [bufSize]byte // unflushed byte buffer
 	n     int           // number of unflushed bytes
 }

 // bufSize is the size of the Hash write buffer.
 // The buffer ensures that writes depend only on the sequence of bytes,
 // not the sequence of WriteByte/Write/WriteString calls,
 // by always calling rthash with a full buffer (except for the tail).
 const bufSize = 128

 // initSeed seeds the hash if necessary.
 // initSeed is called lazily before any operation that actually uses h.seed/h.state.
 // Note that this does not include Write/WriteByte/WriteString in the case
 // where they only add to h.buf. (If they write too much, they call h.flush,
 // which does call h.initSeed.)
 func (h *Hash) initSeed() {
 	if h.seed.s == 0 {
 		seed := MakeSeed()
 		h.seed = seed
 		h.state = seed
 	}
 }

 // WriteByte adds b to the sequence of bytes hashed by h.
 // It never fails; the error result is for implementing io.ByteWriter.
 func (h *Hash) WriteByte(b byte) error {
 	if h.n == len(h.buf) {
 		h.flush()
 	}
 	h.buf[h.n] = b
 	h.n++
 	return nil
 }

 // Write adds b to the sequence of bytes hashed by h.
 // It always writes all of b and never fails; the count and error result are for implementing io.Writer.
 func (h *Hash) Write(b []byte) (int, error) {
 	size := len(b)
 	// Deal with bytes left over in h.buf.
 	// h.n <= bufSize is always true.
 	// Checking it is ~free and it lets the compiler eliminate a bounds check.
 	if h.n > 0 && h.n <= bufSize {
 		k := copy(h.buf[h.n:], b)
 		h.n += k
 		if h.n < bufSize {
 			// Copied the entirety of b to h.buf.
 			return size, nil
 		}
 		b = b[k:]
 		h.flush()
 		// No need to set h.n = 0 here; it happens just before exit.
 	}
 	// Process as many full buffers as possible, without copying, and calling initSeed only once.
 	if len(b) > bufSize {
 		h.initSeed()
 		for len(b) > bufSize {
 			h.state.s = rthash(&b[0], bufSize, h.state.s)
 			b = b[bufSize:]
 		}
 	}
 	// Copy the tail.
 	copy(h.buf[:], b)
 	h.n = len(b)
 	return size, nil
 }

 // WriteString adds the bytes of s to the sequence of bytes hashed by h.
 // It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
 func (h *Hash) WriteString(s string) (int, error) {
 	// WriteString mirrors Write. See Write for comments.
 	size := len(s)
 	if h.n > 0 && h.n <= bufSize {
 		k := copy(h.buf[h.n:], s)
 		h.n += k
 		if h.n < bufSize {
 			return size, nil
 		}
 		s = s[k:]
 		h.flush()
 	}
 	if len(s) > bufSize {
 		h.initSeed()
 		for len(s) > bufSize {
 			ptr := (*byte)((*unsafeheader.String)(unsafe.Pointer(&s)).Data)
 			h.state.s = rthash(ptr, bufSize, h.state.s)
 			s = s[bufSize:]
 		}
 	}
 	copy(h.buf[:], s)
 	h.n = len(s)
 	return size, nil
 }

 // Seed returns h's seed value.
 func (h *Hash) Seed() Seed {
 	h.initSeed()
 	return h.seed
 }

 // SetSeed sets h to use seed, which must have been returned by MakeSeed
 // or by another Hash's Seed method.
 // Two Hash objects with the same seed behave identically.
 // Two Hash objects with different seeds will very likely behave differently.
 // Any bytes added to h before this call will be discarded.
 func (h *Hash) SetSeed(seed Seed) {
 	if seed.s == 0 {
 		panic("maphash: use of uninitialized Seed")
 	}
 	h.seed = seed
 	h.state = seed
 	h.n = 0
 }

 // Reset discards all bytes added to h.
 // (The seed remains the same.)
 func (h *Hash) Reset() {
 	h.initSeed()
 	h.state = h.seed
 	h.n = 0
 }

 // precondition: buffer is full.
 func (h *Hash) flush() {
 	if h.n != len(h.buf) {
 		panic("maphash: flush of partially full buffer")
 	}
 	h.initSeed()
 	h.state.s = rthash(&h.buf[0], h.n, h.state.s)
 	h.n = 0
 }

 // Sum64 returns h's current 64-bit value, which depends on
 // h's seed and the sequence of bytes added to h since the
 // last call to Reset or SetSeed.
 //
 // All bits of the Sum64 result are close to uniformly and
 // independently distributed, so it can be safely reduced
 // by using bit masking, shifting, or modular arithmetic.
 func (h *Hash) Sum64() uint64 {
 	h.initSeed()
 	return rthash(&h.buf[0], h.n, h.state.s)
 }

 // MakeSeed returns a new random seed.
 func MakeSeed() Seed {
 	var s1, s2 uint64
 	for {
 		s1 = uint64(runtime_fastrand())
 		s2 = uint64(runtime_fastrand())
 		// We use seed 0 to indicate an uninitialized seed/hash,
 		// so keep trying until we get a non-zero seed.
 		if s1|s2 != 0 {
 			break
 		}
 	}
 	return Seed{s: s1<<32 + s2}
 }

 //go:linkname runtime_fastrand runtime.fastrand
 func runtime_fastrand() uint32

 func rthash(ptr *byte, len int, seed uint64) uint64 {
 	if len == 0 {
 		return seed
 	}
 	// The runtime hasher only works on uintptr. For 64-bit
 	// architectures, we use the hasher directly. Otherwise,
 	// we use two parallel hashers on the lower and upper 32 bits.
 	if unsafe.Sizeof(uintptr(0)) == 8 {
 		return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)))
 	}
 	lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))
 	hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len))
 	return uint64(hi)<<32 | uint64(lo)
 }

 //go:linkname runtime_memhash runtime.memhash
 //go:noescape
 func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr

 // Sum appends the hash's current 64-bit value to b.
 // It exists for implementing hash.Hash.
 // For direct calls, it is more efficient to use Sum64.
 func (h *Hash) Sum(b []byte) []byte {
 	x := h.Sum64()
 	return append(b,
 		byte(x>>0),
 		byte(x>>8),
 		byte(x>>16),
 		byte(x>>24),
 		byte(x>>32),
 		byte(x>>40),
 		byte(x>>48),
 		byte(x>>56))
 }

 // Size returns h's hash value size, 8 bytes.
 func (h *Hash) Size() int { return 8 }

 // BlockSize returns h's block size.
 func (h *Hash) BlockSize() int { return len(h.buf) }
	// Copyright 2019 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	// Package maphash provides hash functions on byte sequences.
	// These hash functions are intended to be used to implement hash tables or
	// other data structures that need to map arbitrary strings or byte
	// sequences to a uniform distribution on unsigned 64-bit integers.
	// Each different instance of a hash table or data structure should use its own Seed.
	//
	// The hash functions are not cryptographically secure.
	// (See crypto/sha256 and crypto/sha512 for cryptographic use.)
	//
	package maphash

	import (
	"internal/unsafeheader"
	"unsafe"
	)

	// A Seed is a random value that selects the specific hash function
	// computed by a Hash. If two Hashes use the same Seeds, they
	// will compute the same hash values for any given input.
	// If two Hashes use different Seeds, they are very likely to compute
	// distinct hash values for any given input.
	//
	// A Seed must be initialized by calling MakeSeed.
	// The zero seed is uninitialized and not valid for use with Hash's SetSeed method.
	//
	// Each Seed value is local to a single process and cannot be serialized
	// or otherwise recreated in a different process.
	type Seed struct {
	s uint64
	}

	// A Hash computes a seeded hash of a byte sequence.
	//
	// The zero Hash is a valid Hash ready to use.
	// A zero Hash chooses a random seed for itself during
	// the first call to a Reset, Write, Seed, or Sum64 method.
	// For control over the seed, use SetSeed.
	//
	// The computed hash values depend only on the initial seed and
	// the sequence of bytes provided to the Hash object, not on the way
	// in which the bytes are provided. For example, the three sequences
	//
	// h.Write([]byte{'f','o','o'})
	// h.WriteByte('f'); h.WriteByte('o'); h.WriteByte('o')
	// h.WriteString("foo")
	//
	// all have the same effect.
	//
	// Hashes are intended to be collision-resistant, even for situations
	// where an adversary controls the byte sequences being hashed.
	//
	// A Hash is not safe for concurrent use by multiple goroutines, but a Seed is.
	// If multiple goroutines must compute the same seeded hash,
	// each can declare its own Hash and call SetSeed with a common Seed.
	type Hash struct {
	_ [0]func() // not comparable
	seed Seed // initial seed used for this hash
	state Seed // current hash of all flushed bytes
	buf [bufSize]byte // unflushed byte buffer
	n int // number of unflushed bytes
	}

	// bufSize is the size of the Hash write buffer.
	// The buffer ensures that writes depend only on the sequence of bytes,
	// not the sequence of WriteByte/Write/WriteString calls,
	// by always calling rthash with a full buffer (except for the tail).
	const bufSize = 128

	// initSeed seeds the hash if necessary.
	// initSeed is called lazily before any operation that actually uses h.seed/h.state.
	// Note that this does not include Write/WriteByte/WriteString in the case
	// where they only add to h.buf. (If they write too much, they call h.flush,
	// which does call h.initSeed.)
	func (h *Hash) initSeed() {
	if h.seed.s == 0 {
	seed := MakeSeed()
	h.seed = seed
	h.state = seed
	}
	}

	// WriteByte adds b to the sequence of bytes hashed by h.
	// It never fails; the error result is for implementing io.ByteWriter.
	func (h *Hash) WriteByte(b byte) error {
	if h.n == len(h.buf) {
	h.flush()
	}
	h.buf[h.n] = b
	h.n++
	return nil
	}

	// Write adds b to the sequence of bytes hashed by h.
	// It always writes all of b and never fails; the count and error result are for implementing io.Writer.
	func (h *Hash) Write(b []byte) (int, error) {
	size := len(b)
	// Deal with bytes left over in h.buf.
	// h.n <= bufSize is always true.
	// Checking it is ~free and it lets the compiler eliminate a bounds check.
	if h.n > 0 && h.n <= bufSize {
	k := copy(h.buf[h.n:], b)
	h.n += k
	if h.n < bufSize {
	// Copied the entirety of b to h.buf.
	return size, nil
	}
	b = b[k:]
	h.flush()
	// No need to set h.n = 0 here; it happens just before exit.
	}
	// Process as many full buffers as possible, without copying, and calling initSeed only once.
	if len(b) > bufSize {
	h.initSeed()
	for len(b) > bufSize {
	h.state.s = rthash(&b[0], bufSize, h.state.s)
	b = b[bufSize:]
	}
	}
	// Copy the tail.
	copy(h.buf[:], b)
	h.n = len(b)
	return size, nil
	}

	// WriteString adds the bytes of s to the sequence of bytes hashed by h.
	// It always writes all of s and never fails; the count and error result are for implementing io.StringWriter.
	func (h *Hash) WriteString(s string) (int, error) {
	// WriteString mirrors Write. See Write for comments.
	size := len(s)
	if h.n > 0 && h.n <= bufSize {
	k := copy(h.buf[h.n:], s)
	h.n += k
	if h.n < bufSize {
	return size, nil
	}
	s = s[k:]
	h.flush()
	}
	if len(s) > bufSize {
	h.initSeed()
	for len(s) > bufSize {
	ptr := (byte)((unsafeheader.String)(unsafe.Pointer(&s)).Data)
	h.state.s = rthash(ptr, bufSize, h.state.s)
	s = s[bufSize:]
	}
	}
	copy(h.buf[:], s)
	h.n = len(s)
	return size, nil
	}

	// Seed returns h's seed value.
	func (h *Hash) Seed() Seed {
	h.initSeed()
	return h.seed
	}

	// SetSeed sets h to use seed, which must have been returned by MakeSeed
	// or by another Hash's Seed method.
	// Two Hash objects with the same seed behave identically.
	// Two Hash objects with different seeds will very likely behave differently.
	// Any bytes added to h before this call will be discarded.
	func (h *Hash) SetSeed(seed Seed) {
	if seed.s == 0 {
	panic("maphash: use of uninitialized Seed")
	}
	h.seed = seed
	h.state = seed
	h.n = 0
	}

	// Reset discards all bytes added to h.
	// (The seed remains the same.)
	func (h *Hash) Reset() {
	h.initSeed()
	h.state = h.seed
	h.n = 0
	}

	// precondition: buffer is full.
	func (h *Hash) flush() {
	if h.n != len(h.buf) {
	panic("maphash: flush of partially full buffer")
	}
	h.initSeed()
	h.state.s = rthash(&h.buf[0], h.n, h.state.s)
	h.n = 0
	}

	// Sum64 returns h's current 64-bit value, which depends on
	// h's seed and the sequence of bytes added to h since the
	// last call to Reset or SetSeed.
	//
	// All bits of the Sum64 result are close to uniformly and
	// independently distributed, so it can be safely reduced
	// by using bit masking, shifting, or modular arithmetic.
	func (h *Hash) Sum64() uint64 {
	h.initSeed()
	return rthash(&h.buf[0], h.n, h.state.s)
	}

	// MakeSeed returns a new random seed.
	func MakeSeed() Seed {
	var s1, s2 uint64
	for {
	s1 = uint64(runtime_fastrand())
	s2 = uint64(runtime_fastrand())
	// We use seed 0 to indicate an uninitialized seed/hash,
	// so keep trying until we get a non-zero seed.
	if s1\|s2 != 0 {
	break
	}
	}
	return Seed{s: s1<<32 + s2}
	}

	//go:linkname runtime_fastrand runtime.fastrand
	func runtime_fastrand() uint32

	func rthash(ptr *byte, len int, seed uint64) uint64 {
	if len == 0 {
	return seed
	}
	// The runtime hasher only works on uintptr. For 64-bit
	// architectures, we use the hasher directly. Otherwise,
	// we use two parallel hashers on the lower and upper 32 bits.
	if unsafe.Sizeof(uintptr(0)) == 8 {
	return uint64(runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len)))
	}
	lo := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed), uintptr(len))
	hi := runtime_memhash(unsafe.Pointer(ptr), uintptr(seed>>32), uintptr(len))
	return uint64(hi)<<32 \| uint64(lo)
	}

	//go:linkname runtime_memhash runtime.memhash
	//go:noescape
	func runtime_memhash(p unsafe.Pointer, seed, s uintptr) uintptr

	// Sum appends the hash's current 64-bit value to b.
	// It exists for implementing hash.Hash.
	// For direct calls, it is more efficient to use Sum64.
	func (h *Hash) Sum(b []byte) []byte {
	x := h.Sum64()
	return append(b,
	byte(x>>0),
	byte(x>>8),
	byte(x>>16),
	byte(x>>24),
	byte(x>>32),
	byte(x>>40),
	byte(x>>48),
	byte(x>>56))
	}

	// Size returns h's hash value size, 8 bytes.
	func (h *Hash) Size() int { return 8 }

	// BlockSize returns h's block size.
	func (h *Hash) BlockSize() int { return len(h.buf) }