diff options
| author | Alan Donovan <adonovan@google.com> | 2025-03-12 17:12:41 -0400 |
|---|---|---|
| committer | Alan Donovan <adonovan@google.com> | 2026-04-03 08:21:15 -0700 |
| commit | 330aec810997f89262fa04939a00425194e94216 (patch) | |
| tree | 4e7e2e736050b06fadf72865dd2b02cd32801edf | |
| parent | 40ec033c33802cf6e1236ea8030d882338a457d5 (diff) | |
| download | go-330aec810997f89262fa04939a00425194e94216.tar.xz | |
hash/maphash: add Hasher interface and ComparableHasher impl
Hasher is the standard interface for expressing custom hash
functions and equivalence relations for arbitrary data types.
This allows them to be used in hash-based collection types
such as a hash table (CL 612217) or a Bloom filter (CL 740440).
The ComparableHasher type is an implementation of the Hasher
interface for comparable values that is consistent with their
usual equivalence relation (==).
Fixes #70471
Change-Id: Iaa42eb7017d9c4dab487ad6c842c68a81fa7d28a
Reviewed-on: https://go-review.googlesource.com/c/go/+/657296
Reviewed-by: Ian Lance Taylor <iant@golang.org>
Reviewed-by: Hongxiang Jiang <hxjiang@golang.org>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Auto-Submit: Alan Donovan <adonovan@google.com>
TryBot-Bypass: Alan Donovan <adonovan@google.com>
| -rw-r--r-- | api/next/70471.txt | 6 | ||||
| -rw-r--r-- | doc/next/6-stdlib/99-minor/hash/maphash/70471.md | 4 | ||||
| -rw-r--r-- | src/hash/maphash/hasher.go | 139 | ||||
| -rw-r--r-- | src/hash/maphash/maphash.go | 10 |
4 files changed, 156 insertions, 3 deletions
diff --git a/api/next/70471.txt b/api/next/70471.txt new file mode 100644 index 0000000000..0da3230db6 --- /dev/null +++ b/api/next/70471.txt @@ -0,0 +1,6 @@ +pkg hash/maphash, method (ComparableHasher[$0]) Equal($0, $0) bool #70471 +pkg hash/maphash, method (ComparableHasher[$0]) Hash(*Hash, $0) #70471 +pkg hash/maphash, type ComparableHasher[$0 comparable] struct #70471 +pkg hash/maphash, type Hasher[$0 interface{}] interface { Equal, Hash } #70471 +pkg hash/maphash, type Hasher[$0 interface{}] interface, Equal($0, $0) bool #70471 +pkg hash/maphash, type Hasher[$0 interface{}] interface, Hash(*Hash, $0) #70471 diff --git a/doc/next/6-stdlib/99-minor/hash/maphash/70471.md b/doc/next/6-stdlib/99-minor/hash/maphash/70471.md new file mode 100644 index 0000000000..0cccc19be6 --- /dev/null +++ b/doc/next/6-stdlib/99-minor/hash/maphash/70471.md @@ -0,0 +1,4 @@ +The [Hasher] interface type defines the contract between values of a +particular type and future hash-based data structures such as hash +tables and Bloom filters; see [#70471](/issue/70471). + diff --git a/src/hash/maphash/hasher.go b/src/hash/maphash/hasher.go new file mode 100644 index 0000000000..f2ac0a1478 --- /dev/null +++ b/src/hash/maphash/hasher.go @@ -0,0 +1,139 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package maphash + +// A Hasher defines the interface between a hash-based container and its elements. +// It provides a hash function and an equivalence relation over values +// of type T, enabling those values to be inserted in hash tables +// and similar data structures. +// +// Of course, comparable types can already be used as keys of Go's +// built-in map type, but a Hasher enables non-comparable types to be +// used as keys of a suitable hash table too. +// Hashers may be useful even for comparable types, to define an +// equivalence relation that differs from the usual one (==), such as a +// field-based comparison for a pointer-to-struct type, or a +// case-insensitive comparison for strings, as in this example: +// +// // CaseInsensitive is a Hasher[string] whose +// // equivalence relation ignores letter case. +// type CaseInsensitive struct{} +// +// func (CaseInsensitive) Hash(h *Hash, s string) { +// h.WriteString(strings.ToLower(s)) +// } +// +// func (CaseInsensitive) Equal(x, y string) bool { +// // (We avoid strings.EqualFold as it is not +// // consistent with ToLower for all values.) +// return strings.ToLower(x) == strings.ToLower(y) +// } +// +// A Hasher also permits values to be used with other hash-based data +// structures such as a Bloom filter. +// The [ComparableHasher] type makes it convenient to enable comparable +// types to be used in such data structures under their usual (==) +// equivalence relation. +// +// # Hash invariants +// +// If two values are equal as defined by Equal(x, y), then they must +// have the same hash as defined by the effects of Hash(h, x) on h. +// +// Hashers must be logically stateless: the behavior of the Hash and +// Equal methods depends only on the arguments. +// +// # Writing a good function +// +// When defining a hash function and equivalence relation for a data +// type, it may help to first define a canonical encoding for values +// of that type as a sequence of elements, each being a number, +// string, boolean, or pointer. +// An encoding is canonical if two values that are logically equal +// have the same encoding, even if they are represented differently. +// For example, a canonical case-insensitive encoding of a string is +// [strings.ToLower]. +// +// Once you have defined the encoding, the Hasher's Hash method should +// encode a value into the [Hash] using a sequence of calls to +// [Hash.Write] for byte slices, [Hash.WriteString] for strings, +// [Hash.WriteByte] for bytes, and [WriteComparable] for elements of +// other types. The Hasher's Equal method should compute the +// encodings of two values, then compare their corresponding +// elements, returning false at the first mismatch. +// +// A Hash method may discard information so long as it remains +// consistent with the Equal method as defined above. +// For example, valid implementations of CaseInsensitive.Hash might inspect +// only the first letter of the string, or even use a constant value. +// However, the lossier the hash function, the more frequent +// the hash collisions and the slower the hash table. +// +// Some data types, such as sets, are inherently unordered: the set +// {a, b, c} is equal to the set {c, b, a}. +// In some cases it is possible to define a canonical encoding for a +// set by sorting the elements into some order. +// In other cases this may inefficient, since it may require allocating +// memory, or infeasible, as when there is no convenient order. +// Another way to hash an unordered set is to compute the hash +// for each element separately, then combine all the element hashes +// using a commutative (order-independent) operator such as + or ^. +// +// The Hash method below, for a hypothetical Set type, illustrates +// this approach: +// +// type Set[T comparable] struct{ ... } +// +// type setHasher[T comparable] struct{} +// +// func (setHasher[T]) Hash(hash *maphash.Hash, set *Set[T]) { +// var accum uint64 +// for elem := range set.Elements() { +// // Initialize a hasher for the element, +// // using same seed as the outer hash. +// var sub maphash.Hash +// sub.SetSeed(hash.Seed()) +// +// // Hash the element. +// maphash.WriteComparable(&sub, elem) +// +// // Mix the element's hash into the set's hash. +// accum ^= sub.Sum64() +// } +// maphash.WriteComparable(hash, accum) +// } +// +// In many languages, a data type's hash operation simply returns an +// integer value. +// However, that makes it possible for an adversary to systematically +// construct a large number of values that all have the same hash, +// degrading the asymptotic performance of hash tables in a +// denial-of-service attack known as "hash flooding". +// By contrast, computing hashes as a sequence of values emitted into +// a [Hash] with an unpredictable [Seed] that varies from one hash +// table to another mitigates this attack. +// +// In effect, the Seed chooses one of 2⁶⁴ different hash functions. +// The code example above calls SetSeed on the element's sub-Hasher +// so that it uses the same hash function as for the Set itself, and +// not a random one. +type Hasher[T any] interface { + Hash(*Hash, T) + Equal(x, y T) bool +} + +// ComparableHasher is an implementation of [Hasher] whose +// Equal(x, y) method is consistent with x == y. +// +// ComparableHasher is defined only for comparable types. +// The type system will not prevent you from instantiating a type +// such as ComparableHasher[any]; nonetheless you must not pass +// non-comparable argument values to its Hash or Equal methods. +type ComparableHasher[T comparable] struct { + _ [0]func(T) // disallow comparison, and conversion between ComparableHasher[X] and ComparableHasher[Y] +} + +func (ComparableHasher[T]) Hash(h *Hash, v T) { WriteComparable(h, v) } +func (ComparableHasher[T]) Equal(x, y T) bool { return x == y } diff --git a/src/hash/maphash/maphash.go b/src/hash/maphash/maphash.go index 2629e33453..5cf3f49a51 100644 --- a/src/hash/maphash/maphash.go +++ b/src/hash/maphash/maphash.go @@ -3,9 +3,13 @@ // license that can be found in the LICENSE file. // Package maphash provides hash functions on byte sequences and comparable values. -// These hash functions are intended to be used to implement hash tables or -// other data structures that need to map arbitrary strings or byte -// sequences to a uniform distribution on unsigned 64-bit integers. +// It also defines [Hasher], the interface between a hash function and a hash table. +// +// These hash functions are intended to be used to implement hash +// tables, Bloom filters, and other data structures that need to map +// arbitrary strings or byte sequences to a uniform distribution on +// unsigned 64-bit integers. +// // Each different instance of a hash table or data structure should use its own [Seed]. // // The hash functions are not cryptographically secure. |
