hash/maphash: add BloomFilter example

This CL adds an example of using the maphash package to construct a Bloom filter, a compact probabilistic set of values whose hash function and equivalence relation are specified by a Hasher. Change-Id: I1545d75e23c1456178098d09487ba9fed03e5a27 Reviewed-on: https://go-review.googlesource.com/c/go/+/740440 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Robert Griesemer <gri@google.com>
author: Alan Donovan <adonovan@google.com> 2026-01-29 18:14:06 -0500
committer: Alan Donovan <adonovan@google.com> 2026-04-07 15:17:06 -0700
commit: 230d1407b04da5d65505415b1a7b4f6d70c75086 (patch)
tree: 34f6d97f886f75338b82a9ca21f8bd12073916b6
parent: e3b3216242691ab0e02164458bda98346ce36adf (diff)
download: go-230d1407b04da5d65505415b1a7b4f6d70c75086.tar.xz
1 files changed, 157 insertions, 0 deletions
diff --git a/src/hash/maphash/example_bloom_test.go b/src/hash/maphash/example_bloom_test.go
new file mode 100644
index 0000000000..2a2a27e1e4
--- /dev/null
+++ b/src/hash/maphash/example_bloom_test.go
@@ -0,0 +1,157 @@
+// Copyright 2026 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package maphash_test
+
+// This file demonstrates a Bloom filter.
+
+import (
+	"fmt"
+	"hash/maphash"
+	"math"
+	"math/bits"
+)
+
+// BloomFilter is a Bloom filter, a probabilistic space-efficient
+// representation of a set of values of type V based on hashing.
+//
+// It provides two operations: Insert inserts an element and Contains
+// queries whether a value is a member of the set.
+//
+// However, unlike a typical set, the size is independent of the
+// number of elements. The catch: Contains is permitted to report
+// true even for some elements that are not present. The trade-off
+// between space and accuracy is determined by parameters at
+// construction.
+type BloomFilter[V any] struct {
+	hasher maphash.Hasher[V]
+	seeds  []maphash.Seed // each seed determines a hash function
+	bytes  []byte         // bit vector
+}
+
+// NewBloomFilterComparable returns a new BloomFilter for the
+// specified elements using their natural comparison.
+func NewComparableBloomFilter[V comparable](n int, fpProb float64) *BloomFilter[V] {
+	return NewBloomFilter(maphash.ComparableHasher[V]{}, n, fpProb)
+}
+
+// NewBloomFilter constructs a new BloomFilter capable of holding n
+// elements with the specified probability of false positive results,
+// assuming a well dispersed hash function.
+func NewBloomFilter[V any](hasher maphash.Hasher[V], n int, fpProb float64) *BloomFilter[V] {
+	nbytes, nseeds := calibrate(n, fpProb)
+
+	seeds := make([]maphash.Seed, nseeds)
+	for i := range nseeds {
+		seeds[i] = maphash.MakeSeed()
+	}
+
+	return &BloomFilter[V]{
+		hasher: hasher,
+		bytes:  make([]byte, nbytes),
+		seeds:  seeds,
+	}
+}
+
+func (f *BloomFilter[V]) Contains(v V) bool {
+	for _, seed := range f.seeds {
+		index, bit := f.locate(seed, v)
+		if f.bytes[index]&bit == 0 {
+			return false
+		}
+	}
+	return true
+}
+
+func (f *BloomFilter[V]) Insert(v V) {
+	for _, seed := range f.seeds {
+		index, bit := f.locate(seed, v)
+		f.bytes[index] |= bit
+	}
+}
+
+func (f *BloomFilter[V]) locate(seed maphash.Seed, v V) (uint64, byte) {
+	// Optimization note: the dynamic call to hasher.Hash causes h
+	// to escape. You can use a sync.Pool can help mitigate the
+	// allocation cost.
+	//
+	// Alternatively, you could copy and specialize the filter logic
+	// for a specific implementation of maphash.Hasher, allowing
+	// the compiler's escape analysis to determine that the
+	// hasher.Hash call does not in fact cause h to escape.
+	var h maphash.Hash
+	h.SetSeed(seed)
+	f.hasher.Hash(&h, v)
+	hash := h.Sum64()
+
+	index := reduce(hash, uint64(len(f.bytes)))
+	mask := byte(1 << (hash % 8))
+	return index, mask
+}
+
+// reduce maps hash to a value in the interval [0, n).
+// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction
+func reduce(hash, n uint64) uint64 {
+	if n > 1<<32-1 {
+		hi, _ := bits.Mul64(hash, n)
+		return hi
+	}
+	return hash >> 32 * n >> 32
+}
+
+func calibrate(n int, fpProb float64) (bytes, seeds int) {
+	// Following https://en.wikipedia.org/wiki/Bloom_filter:
+	// - k is the number of hash functions,
+	// - m is the size of the bit field;
+	// - n is the number of set bits.
+
+	if n == 0 {
+		return 1, 1
+	}
+
+	logFpProb := math.Log(fpProb)
+	m := -(float64(n) * logFpProb) / (math.Ln2 * math.Ln2)
+
+	// Round up to a byte.
+	// TODO(adonovan): opt: round up to the next allocation size
+	// class (see bytes.growSlice) and then compute the possibly
+	// smaller number of needed seeds based on this higher number.
+	bytes = int(m) / 8
+	if float64(bytes*8) < m {
+		bytes++
+	}
+
+	k := -logFpProb / math.Ln2
+	seeds = max(int(math.Round(k)), 1)
+	return bytes, seeds
+}
+
+func Example_bloomFilter() {
+	// Create a Bloom filter optimized for 2 elements with
+	// a one-in-a-billion false-positive rate.
+	//
+	// (This low rate demands a lot of space: 88 bits and
+	// 30 hash functions. More typical rates are 1-5%;
+	// at 5%, we need only 16 bits and 4 hash functions.)
+	f := NewComparableBloomFilter[string](2, 1e-9)
+
+	// Insert two elements.
+	f.Insert("apple")
+	f.Insert("banana")
+
+	// Check whether elements are present.
+	//
+	// "cherry" was not inserted, but Contains is probabilistic, so
+	// this test will spuriously report Contains("cherry") = true
+	// about once every billion runs.
+	for _, fruit := range []string{"apple", "banana", "cherry"} {
+		fmt.Printf("Contains(%q) = %v\n", fruit, f.Contains(fruit))
+	}
+
+	// Output:
+	//
+	// Contains("apple") = true
+	// Contains("banana") = true
+	// Contains("cherry") = false
+}
author	Alan Donovan <adonovan@google.com>	2026-01-29 18:14:06 -0500
committer	Alan Donovan <adonovan@google.com>	2026-04-07 15:17:06 -0700
commit	230d1407b04da5d65505415b1a7b4f6d70c75086 (patch)
tree	34f6d97f886f75338b82a9ca21f8bd12073916b6
parent	e3b3216242691ab0e02164458bda98346ce36adf (diff)
download	go-230d1407b04da5d65505415b1a7b4f6d70c75086.tar.xz