src/hash/maphash/example_bloom_test.go - go - Git at Google

 // Copyright 2026 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package maphash_test

 // This file demonstrates a Bloom filter.

 import (
 	"fmt"
 	"hash/maphash"
 	"math"
 	"math/bits"
 )

 // BloomFilter is a Bloom filter, a probabilistic space-efficient
 // representation of a set of values of type V based on hashing.
 //
 // It provides two operations: Insert inserts an element and Contains
 // queries whether a value is a member of the set.
 //
 // However, unlike a typical set, the size is independent of the
 // number of elements. The catch: Contains is permitted to report
 // true even for some elements that are not present. The trade-off
 // between space and accuracy is determined by parameters at
 // construction.
 type BloomFilter[V any] struct {
 	hasher maphash.Hasher[V]
 	seeds  []maphash.Seed // each seed determines a hash function
 	bytes  []byte         // bit vector
 }

 // NewBloomFilterComparable returns a new BloomFilter for the
 // specified elements using their natural comparison.
 func NewComparableBloomFilter[V comparable](n int, fpProb float64) *BloomFilter[V] {
 	return NewBloomFilter(maphash.ComparableHasher[V]{}, n, fpProb)
 }

 // NewBloomFilter constructs a new BloomFilter capable of holding n
 // elements with the specified probability of false positive results,
 // assuming a well dispersed hash function.
 func NewBloomFilter[V any](hasher maphash.Hasher[V], n int, fpProb float64) *BloomFilter[V] {
 	nbytes, nseeds := calibrate(n, fpProb)

 	seeds := make([]maphash.Seed, nseeds)
 	for i := range nseeds {
 		seeds[i] = maphash.MakeSeed()
 	}

 	return &BloomFilter[V]{
 		hasher: hasher,
 		bytes:  make([]byte, nbytes),
 		seeds:  seeds,
 	}
 }

 func (f *BloomFilter[V]) Contains(v V) bool {
 	for _, seed := range f.seeds {
 		index, bit := f.locate(seed, v)
 		if f.bytes[index]&bit == 0 {
 			return false
 		}
 	}
 	return true
 }

 func (f *BloomFilter[V]) Insert(v V) {
 	for _, seed := range f.seeds {
 		index, bit := f.locate(seed, v)
 		f.bytes[index] |= bit
 	}
 }

 func (f *BloomFilter[V]) locate(seed maphash.Seed, v V) (uint64, byte) {
 	// Optimization note: the dynamic call to hasher.Hash causes h
 	// to escape. You can use a sync.Pool can help mitigate the
 	// allocation cost.
 	//
 	// Alternatively, you could copy and specialize the filter logic
 	// for a specific implementation of maphash.Hasher, allowing
 	// the compiler's escape analysis to determine that the
 	// hasher.Hash call does not in fact cause h to escape.
 	var h maphash.Hash
 	h.SetSeed(seed)
 	f.hasher.Hash(&h, v)
 	hash := h.Sum64()

 	index := reduce(hash, uint64(len(f.bytes)))
 	mask := byte(1 << (hash % 8))
 	return index, mask
 }

 // reduce maps hash to a value in the interval [0, n).
 // See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
 func reduce(hash, n uint64) uint64 {
 	if n > 1<<32-1 {
 		hi, _ := bits.Mul64(hash, n)
 		return hi
 	}
 	return hash >> 32 * n >> 32
 }

 func calibrate(n int, fpProb float64) (bytes, seeds int) {
 	// Following https://en.wikipedia.org/wiki/Bloom_filter:
 	// - k is the number of hash functions,
 	// - m is the size of the bit field;
 	// - n is the number of set bits.

 	if n == 0 {
 		return 1, 1
 	}

 	logFpProb := math.Log(fpProb)
 	m := -(float64(n) * logFpProb) / (math.Ln2 * math.Ln2)

 	// Round up to a byte.
 	// TODO(adonovan): opt: round up to the next allocation size
 	// class (see bytes.growSlice) and then compute the possibly
 	// smaller number of needed seeds based on this higher number.
 	bytes = int(m) / 8
 	if float64(bytes*8) < m {
 		bytes++
 	}

 	k := -logFpProb / math.Ln2
 	seeds = max(int(math.Round(k)), 1)
 	return bytes, seeds
 }

 func Example_bloomFilter() {
 	// Create a Bloom filter optimized for 2 elements with
 	// a one-in-a-billion false-positive rate.
 	//
 	// (This low rate demands a lot of space: 88 bits and
 	// 30 hash functions. More typical rates are 1-5%;
 	// at 5%, we need only 16 bits and 4 hash functions.)
 	f := NewComparableBloomFilter[string](2, 1e-9)

 	// Insert two elements.
 	f.Insert("apple")
 	f.Insert("banana")

 	// Check whether elements are present.
 	//
 	// "cherry" was not inserted, but Contains is probabilistic, so
 	// this test will spuriously report Contains("cherry") = true
 	// about once every billion runs.
 	for _, fruit := range []string{"apple", "banana", "cherry"} {
 		fmt.Printf("Contains(%q) = %v\n", fruit, f.Contains(fruit))
 	}

 	// Output:
 	//
 	// Contains("apple") = true
 	// Contains("banana") = true
 	// Contains("cherry") = false
 }
	// Copyright 2026 The Go Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style
	// license that can be found in the LICENSE file.

	package maphash_test

	// This file demonstrates a Bloom filter.

	import (
	"fmt"
	"hash/maphash"
	"math"
	"math/bits"
	)

	// BloomFilter is a Bloom filter, a probabilistic space-efficient
	// representation of a set of values of type V based on hashing.
	//
	// It provides two operations: Insert inserts an element and Contains
	// queries whether a value is a member of the set.
	//
	// However, unlike a typical set, the size is independent of the
	// number of elements. The catch: Contains is permitted to report
	// true even for some elements that are not present. The trade-off
	// between space and accuracy is determined by parameters at
	// construction.
	type BloomFilter[V any] struct {
	hasher maphash.Hasher[V]
	seeds []maphash.Seed // each seed determines a hash function
	bytes []byte // bit vector
	}

	// NewBloomFilterComparable returns a new BloomFilter for the
	// specified elements using their natural comparison.
	func NewComparableBloomFilter[V comparable](n int, fpProb float64) *BloomFilter[V] {
	return NewBloomFilter(maphash.ComparableHasher[V]{}, n, fpProb)
	}

	// NewBloomFilter constructs a new BloomFilter capable of holding n
	// elements with the specified probability of false positive results,
	// assuming a well dispersed hash function.
	func NewBloomFilter[V any](hasher maphash.Hasher[V], n int, fpProb float64) *BloomFilter[V] {
	nbytes, nseeds := calibrate(n, fpProb)

	seeds := make([]maphash.Seed, nseeds)
	for i := range nseeds {
	seeds[i] = maphash.MakeSeed()
	}

	return &BloomFilter[V]{
	hasher: hasher,
	bytes: make([]byte, nbytes),
	seeds: seeds,
	}
	}

	func (f *BloomFilter[V]) Contains(v V) bool {
	for _, seed := range f.seeds {
	index, bit := f.locate(seed, v)
	if f.bytes[index]&bit == 0 {
	return false
	}
	}
	return true
	}

	func (f *BloomFilter[V]) Insert(v V) {
	for _, seed := range f.seeds {
	index, bit := f.locate(seed, v)
	f.bytes[index] \|= bit
	}
	}

	func (f *BloomFilter[V]) locate(seed maphash.Seed, v V) (uint64, byte) {
	// Optimization note: the dynamic call to hasher.Hash causes h
	// to escape. You can use a sync.Pool can help mitigate the
	// allocation cost.
	//
	// Alternatively, you could copy and specialize the filter logic
	// for a specific implementation of maphash.Hasher, allowing
	// the compiler's escape analysis to determine that the
	// hasher.Hash call does not in fact cause h to escape.
	var h maphash.Hash
	h.SetSeed(seed)
	f.hasher.Hash(&h, v)
	hash := h.Sum64()

	index := reduce(hash, uint64(len(f.bytes)))
	mask := byte(1 << (hash % 8))
	return index, mask
	}

	// reduce maps hash to a value in the interval [0, n).
	// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
	func reduce(hash, n uint64) uint64 {
	if n > 1<<32-1 {
	hi, _ := bits.Mul64(hash, n)
	return hi
	}
	return hash >> 32 * n >> 32
	}

	func calibrate(n int, fpProb float64) (bytes, seeds int) {
	// Following https://en.wikipedia.org/wiki/Bloom_filter:
	// - k is the number of hash functions,
	// - m is the size of the bit field;
	// - n is the number of set bits.

	if n == 0 {
	return 1, 1
	}

	logFpProb := math.Log(fpProb)
	m := -(float64(n) * logFpProb) / (math.Ln2 * math.Ln2)

	// Round up to a byte.
	// TODO(adonovan): opt: round up to the next allocation size
	// class (see bytes.growSlice) and then compute the possibly
	// smaller number of needed seeds based on this higher number.
	bytes = int(m) / 8
	if float64(bytes*8) < m {
	bytes++
	}

	k := -logFpProb / math.Ln2
	seeds = max(int(math.Round(k)), 1)
	return bytes, seeds
	}

	func Example_bloomFilter() {
	// Create a Bloom filter optimized for 2 elements with
	// a one-in-a-billion false-positive rate.
	//
	// (This low rate demands a lot of space: 88 bits and
	// 30 hash functions. More typical rates are 1-5%;
	// at 5%, we need only 16 bits and 4 hash functions.)
	f := NewComparableBloomFilter[string](2, 1e-9)

	// Insert two elements.
	f.Insert("apple")
	f.Insert("banana")

	// Check whether elements are present.
	//
	// "cherry" was not inserted, but Contains is probabilistic, so
	// this test will spuriously report Contains("cherry") = true
	// about once every billion runs.
	for _, fruit := range []string{"apple", "banana", "cherry"} {
	fmt.Printf("Contains(%q) = %v\n", fruit, f.Contains(fruit))
	}

	// Output:
	//
	// Contains("apple") = true
	// Contains("banana") = true
	// Contains("cherry") = false
	}