src/hash/maphash/hasher.go - go - Git at Google

 // Copyright 2025 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package maphash

 // A Hasher defines the interface between a hash-based container and its elements.
 // It provides a hash function and an equivalence relation over values
 // of type T, enabling those values to be inserted in hash tables
 // and similar data structures.
 //
 // Of course, comparable types can already be used as keys of Go's
 // built-in map type, but a Hasher enables non-comparable types to be
 // used as keys of a suitable hash table too.
 // Hashers may be useful even for comparable types, to define an
 // equivalence relation that differs from the usual one (==), such as a
 // field-based comparison for a pointer-to-struct type, or a
 // case-insensitive comparison for strings, as in this example:
 //
 //	// CaseInsensitive is a Hasher[string] whose
 //	// equivalence relation ignores letter case.
 //	type CaseInsensitive struct{}
 //
 //	func (CaseInsensitive) Hash(h *Hash, s string) {
 //		h.WriteString(strings.ToLower(s))
 //	}
 //
 //	func (CaseInsensitive) Equal(x, y string) bool {
 //		// (We avoid strings.EqualFold as it is not
 //		// consistent with ToLower for all values.)
 //		return strings.ToLower(x) == strings.ToLower(y)
 //	}
 //
 // A Hasher also permits values to be used with other hash-based data
 // structures such as a Bloom filter.
 // The [ComparableHasher] type makes it convenient to enable comparable
 // types to be used in such data structures under their usual (==)
 // equivalence relation.
 //
 // # Hash invariants
 //
 // If two values are equal as defined by Equal(x, y), then they must
 // have the same hash as defined by the effects of Hash(h, x) on h.
 //
 // Hashers must be logically stateless: the behavior of the Hash and
 // Equal methods depends only on the arguments.
 //
 // # Writing a good function
 //
 // When defining a hash function and equivalence relation for a data
 // type, it may help to first define a canonical encoding for values
 // of that type as a sequence of elements, each being a number,
 // string, boolean, or pointer.
 // An encoding is canonical if two values that are logically equal
 // have the same encoding, even if they are represented differently.
 // For example, a canonical case-insensitive encoding of a string is
 // [strings.ToLower].
 //
 // Once you have defined the encoding, the Hasher's Hash method should
 // encode a value into the [Hash] using a sequence of calls to
 // [Hash.Write] for byte slices, [Hash.WriteString] for strings,
 // [Hash.WriteByte] for bytes, and [WriteComparable] for elements of
 // other types. The Hasher's Equal method should compute the
 // encodings of two values, then compare their corresponding
 // elements, returning false at the first mismatch.
 //
 // A Hash method may discard information so long as it remains
 // consistent with the Equal method as defined above.
 // For example, valid implementations of CaseInsensitive.Hash might inspect
 // only the first letter of the string, or even use a constant value.
 // However, the lossier the hash function, the more frequent
 // the hash collisions and the slower the hash table.
 //
 // Some data types, such as sets, are inherently unordered: the set
 // {a, b, c} is equal to the set {c, b, a}.
 // In some cases it is possible to define a canonical encoding for a
 // set by sorting the elements into some order.
 // In other cases this may inefficient, since it may require allocating
 // memory, or infeasible, as when there is no convenient order.
 // Another way to hash an unordered set is to compute the hash
 // for each element separately, then combine all the element hashes
 // using a commutative (order-independent) operator such as + or ^.
 //
 // The Hash method below, for a hypothetical Set type, illustrates
 // this approach:
 //
 //	type Set[T comparable] struct{ ... }
 //
 //	type setHasher[T comparable] struct{}
 //
 //	func (setHasher[T]) Hash(hash *maphash.Hash, set *Set[T]) {
 //		var accum uint64
 //		for elem := range set.Elements() {
 //			// Initialize a hasher for the element,
 //			// using same seed as the outer hash.
 //			var sub maphash.Hash
 //			sub.SetSeed(hash.Seed())
 //
 //			// Hash the element.
 //			maphash.WriteComparable(&sub, elem)
 //
 //			// Mix the element's hash into the set's hash.
 //			accum ^= sub.Sum64()
 //		}
 //		maphash.WriteComparable(hash, accum)
 //	}
 //
 // In many languages, a data type's hash operation simply returns an
 // integer value.
 // However, that makes it possible for an adversary to systematically
 // construct a large number of values that all have the same hash,
 // degrading the asymptotic performance of hash tables in a
 // denial-of-service attack known as "hash flooding".
 // By contrast, computing hashes as a sequence of values emitted into
 // a [Hash] with an unpredictable [Seed] that varies from one hash
 // table to another mitigates this attack.
 //
 // In effect, the Seed chooses one of 2⁶⁴ different hash functions.
 // The code example above calls SetSeed on the element's sub-Hasher
 // so that it uses the same hash function as for the Set itself, and
 // not a random one.
 type Hasher[T any] interface {
 	Hash(*Hash, T)
 	Equal(x, y T) bool
 }

 // ComparableHasher is an implementation of [Hasher] whose
 // Equal(x, y) method is consistent with x == y.
 //
 // ComparableHasher is defined only for comparable types.
 // The type system will not prevent you from instantiating a type
 // such as ComparableHasher[any]; nonetheless you must not pass
 // non-comparable argument values to its Hash or Equal methods.
 type ComparableHasher[T comparable] struct {
 	_ [0]func(T) // disallow comparison, and conversion between ComparableHasher[X] and ComparableHasher[Y]
 }

 func (ComparableHasher[T]) Hash(h *Hash, v T) { WriteComparable(h, v) }
 func (ComparableHasher[T]) Equal(x, y T) bool { return x == y }
	// Copyright 2025 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package maphash

	// A Hasher defines the interface between a hash-based container and its elements.
	// It provides a hash function and an equivalence relation over values
	// of type T, enabling those values to be inserted in hash tables
	// and similar data structures.
	//
	// Of course, comparable types can already be used as keys of Go's
	// built-in map type, but a Hasher enables non-comparable types to be
	// used as keys of a suitable hash table too.
	// Hashers may be useful even for comparable types, to define an
	// equivalence relation that differs from the usual one (==), such as a
	// field-based comparison for a pointer-to-struct type, or a
	// case-insensitive comparison for strings, as in this example:
	//
	// // CaseInsensitive is a Hasher[string] whose
	// // equivalence relation ignores letter case.
	// type CaseInsensitive struct{}
	//
	// func (CaseInsensitive) Hash(h *Hash, s string) {
	// h.WriteString(strings.ToLower(s))
	// }
	//
	// func (CaseInsensitive) Equal(x, y string) bool {
	// // (We avoid strings.EqualFold as it is not
	// // consistent with ToLower for all values.)
	// return strings.ToLower(x) == strings.ToLower(y)
	// }
	//
	// A Hasher also permits values to be used with other hash-based data
	// structures such as a Bloom filter.
	// The [ComparableHasher] type makes it convenient to enable comparable
	// types to be used in such data structures under their usual (==)
	// equivalence relation.
	//
	// # Hash invariants
	//
	// If two values are equal as defined by Equal(x, y), then they must
	// have the same hash as defined by the effects of Hash(h, x) on h.
	//
	// Hashers must be logically stateless: the behavior of the Hash and
	// Equal methods depends only on the arguments.
	//
	// # Writing a good function
	//
	// When defining a hash function and equivalence relation for a data
	// type, it may help to first define a canonical encoding for values
	// of that type as a sequence of elements, each being a number,
	// string, boolean, or pointer.
	// An encoding is canonical if two values that are logically equal
	// have the same encoding, even if they are represented differently.
	// For example, a canonical case-insensitive encoding of a string is
	// [strings.ToLower].
	//
	// Once you have defined the encoding, the Hasher's Hash method should
	// encode a value into the [Hash] using a sequence of calls to
	// [Hash.Write] for byte slices, [Hash.WriteString] for strings,
	// [Hash.WriteByte] for bytes, and [WriteComparable] for elements of
	// other types. The Hasher's Equal method should compute the
	// encodings of two values, then compare their corresponding
	// elements, returning false at the first mismatch.
	//
	// A Hash method may discard information so long as it remains
	// consistent with the Equal method as defined above.
	// For example, valid implementations of CaseInsensitive.Hash might inspect
	// only the first letter of the string, or even use a constant value.
	// However, the lossier the hash function, the more frequent
	// the hash collisions and the slower the hash table.
	//
	// Some data types, such as sets, are inherently unordered: the set
	// {a, b, c} is equal to the set {c, b, a}.
	// In some cases it is possible to define a canonical encoding for a
	// set by sorting the elements into some order.
	// In other cases this may inefficient, since it may require allocating
	// memory, or infeasible, as when there is no convenient order.
	// Another way to hash an unordered set is to compute the hash
	// for each element separately, then combine all the element hashes
	// using a commutative (order-independent) operator such as + or ^.
	//
	// The Hash method below, for a hypothetical Set type, illustrates
	// this approach:
	//
	// type Set[T comparable] struct{ ... }
	//
	// type setHasher[T comparable] struct{}
	//
	// func (setHasher[T]) Hash(hash maphash.Hash, set Set[T]) {
	// var accum uint64
	// for elem := range set.Elements() {
	// // Initialize a hasher for the element,
	// // using same seed as the outer hash.
	// var sub maphash.Hash
	// sub.SetSeed(hash.Seed())
	//
	// // Hash the element.
	// maphash.WriteComparable(&sub, elem)
	//
	// // Mix the element's hash into the set's hash.
	// accum ^= sub.Sum64()
	// }
	// maphash.WriteComparable(hash, accum)
	// }
	//
	// In many languages, a data type's hash operation simply returns an
	// integer value.
	// However, that makes it possible for an adversary to systematically
	// construct a large number of values that all have the same hash,
	// degrading the asymptotic performance of hash tables in a
	// denial-of-service attack known as "hash flooding".
	// By contrast, computing hashes as a sequence of values emitted into
	// a [Hash] with an unpredictable [Seed] that varies from one hash
	// table to another mitigates this attack.
	//
	// In effect, the Seed chooses one of 2⁶⁴ different hash functions.
	// The code example above calls SetSeed on the element's sub-Hasher
	// so that it uses the same hash function as for the Set itself, and
	// not a random one.
	type Hasher[T any] interface {
	Hash(*Hash, T)
	Equal(x, y T) bool
	}

	// ComparableHasher is an implementation of [Hasher] whose
	// Equal(x, y) method is consistent with x == y.
	//
	// ComparableHasher is defined only for comparable types.
	// The type system will not prevent you from instantiating a type
	// such as ComparableHasher[any]; nonetheless you must not pass
	// non-comparable argument values to its Hash or Equal methods.
	type ComparableHasher[T comparable] struct {
	_ [0]func(T) // disallow comparison, and conversion between ComparableHasher[X] and ComparableHasher[Y]
	}

	func (ComparableHasher[T]) Hash(h *Hash, v T) { WriteComparable(h, v) }
	func (ComparableHasher[T]) Equal(x, y T) bool { return x == y }