From a2cc178edf7908a2686089bdc35d1e6d134c7dd1 Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Wed, 12 Mar 2025 18:13:04 -0400 Subject: go/types: Hasher, a hash function for Types This CL defines a hash function for Types using the maphash.Hasher API proposed in #70471 (CL 657296) that is consistent with the equivalence relation of types.Identical. It also defines a variant, HasherIgnoreTypes, that is consistent with types.IdenticalIgnoreTags. (The actual hash functions are identical: both ignore tags.) The logic of the hash function was derived from golang.org/x/tools/go/types/typeutil.Hash. + test, doc, relnote Fixes #69420 Updates #70471 Change-Id: I1947cda6aec229d56eeda13decfb05eb266770f9 Reviewed-on: https://go-review.googlesource.com/c/go/+/657297 Reviewed-by: Robert Griesemer LUCI-TryBot-Result: Go LUCI Reviewed-by: Jonathan Amsterdam Auto-Submit: Alan Donovan --- api/next/69420.txt | 6 + doc/next/6-stdlib/99-minor/go/types/69420.md | 4 + src/go/build/deps_test.go | 9 +- src/go/types/hash.go | 299 +++++++++++++++++++++++++++ src/go/types/hash_test.go | 264 +++++++++++++++++++++++ 5 files changed, 579 insertions(+), 3 deletions(-) create mode 100644 api/next/69420.txt create mode 100644 doc/next/6-stdlib/99-minor/go/types/69420.md create mode 100644 src/go/types/hash.go create mode 100644 src/go/types/hash_test.go diff --git a/api/next/69420.txt b/api/next/69420.txt new file mode 100644 index 0000000000..020948aa72 --- /dev/null +++ b/api/next/69420.txt @@ -0,0 +1,6 @@ +pkg go/types, method (Hasher) Equal(Type, Type) bool #69420 +pkg go/types, method (Hasher) Hash(*maphash.Hash, Type) #69420 +pkg go/types, method (HasherIgnoreTags) Equal(Type, Type) bool #69420 +pkg go/types, method (HasherIgnoreTags) Hash(*maphash.Hash, Type) #69420 +pkg go/types, type Hasher struct #69420 +pkg go/types, type HasherIgnoreTags struct #69420 diff --git a/doc/next/6-stdlib/99-minor/go/types/69420.md b/doc/next/6-stdlib/99-minor/go/types/69420.md new file mode 100644 index 0000000000..8e2f35ceeb --- /dev/null +++ b/doc/next/6-stdlib/99-minor/go/types/69420.md @@ -0,0 +1,4 @@ +The [Hasher] type is an implementation of `maphash.Hasher` for [Type]s +that respects the [Identical] equivalence relation, allowing `Types` +to be used in hash tables and similar data structures (see `container/hash`). +[HasherIgnoreTags] is the analogous hasher for [IdenticalIgnoreTags]. diff --git a/src/go/build/deps_test.go b/src/go/build/deps_test.go index 7b3a8933c7..2f424e7e3c 100644 --- a/src/go/build/deps_test.go +++ b/src/go/build/deps_test.go @@ -363,9 +363,6 @@ var depsRules = ` FMT, internal/goexperiment < internal/buildcfg; - container/heap, go/constant, go/parser, internal/buildcfg, internal/goversion, internal/types/errors - < go/types; - # The vast majority of standard library packages should not be resorting to regexp. # go/types is a good chokepoint. It shouldn't use regexp, nor should anything # that is low-enough level to be used by go/types. @@ -615,6 +612,12 @@ var depsRules = ` # crypto-aware packages + FMT, hash/maphash + < container/hash; + + hash/maphash, container/heap, go/constant, go/parser, internal/buildcfg, internal/goversion, internal/types/errors + < go/types; + DEBUG, go/build, go/types, text/scanner, crypto/sha256 < internal/pkgbits, internal/exportdata < go/internal/gcimporter, go/internal/gccgoimporter, go/internal/srcimporter diff --git a/src/go/types/hash.go b/src/go/types/hash.go new file mode 100644 index 0000000000..916cd0610a --- /dev/null +++ b/src/go/types/hash.go @@ -0,0 +1,299 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types + +// This file defines a hash function for Types. + +import ( + "fmt" + "hash/maphash" +) + +type ( + // Hasher and HasherIgnoreTags define hash functions and + // equivalence relations for [Types] that are consistent with + // [Identical] and [IdenticalIgnoreTags], respectively. + // They use the same hash function, which ignores tags; + // only the Equal methods vary. + // + // Hashers are stateless. + Hasher struct{} + HasherIgnoreTags struct{} +) + +var ( + _ maphash.Hasher[Type] = Hasher{} + _ maphash.Hasher[Type] = HasherIgnoreTags{} +) + +func (Hasher) Hash(h *maphash.Hash, t Type) { hasher{inGenericSig: false}.hash(h, t) } +func (HasherIgnoreTags) Hash(h *maphash.Hash, t Type) { hasher{inGenericSig: false}.hash(h, t) } +func (Hasher) Equal(x, y Type) bool { return Identical(x, y) } +func (HasherIgnoreTags) Equal(x, y Type) bool { return IdenticalIgnoreTags(x, y) } + +// hasher holds the state of a single hash traversal, namely, +// whether we are inside the signature of a generic function. +// This is used to optimize [hasher.hashTypeParam]. +type hasher struct{ inGenericSig bool } + +func (hr hasher) hash(h *maphash.Hash, t Type) { + // See [Identical] for rationale. + switch t := t.(type) { + case *Alias: + hr.hash(h, Unalias(t)) + + case *Array: + h.WriteByte('A') + maphash.WriteComparable(h, t.Len()) + hr.hash(h, t.Elem()) + + case *Basic: + h.WriteByte('B') + h.WriteByte(byte(t.Kind())) + + case *Chan: + h.WriteByte('C') + h.WriteByte(byte(t.Dir())) + hr.hash(h, t.Elem()) + + case *Interface: + h.WriteByte('I') + h.WriteByte(byte(t.NumMethods())) + + // Interfaces are identical if they have the same set of methods, with + // identical names and types, and they have the same set of type + // restrictions. See [Identical] for more details. + + // Hash the methods. + // + // Because [Identical] treats Methods as an unordered set, + // we must either: + // (a) sort the methods into some canonical order; or + // (b) hash them each in parallel, combine them with a + // commutative operation such as + or ^, and then + // write this value into the primary hasher. + // Since (a) requires allocation, we choose (b). + var hash uint64 + for m := range t.Methods() { + var subh maphash.Hash + subh.SetSeed(h.Seed()) + // Ignore m.Pkg(). + // Use shallow hash on method signature to + // avoid anonymous interface cycles. + subh.WriteString(m.Name()) + hr.shallowHash(&subh, m.Type()) + hash ^= subh.Sum64() + } + maphash.WriteComparable(h, hash) + + // Hash type restrictions. + // TODO(adonovan): call (fork of) InterfaceTermSet from + // golang.org/x/tools/internal/typeparams/normalize.go. + // hr.hashTermSet(h, terms) + + case *Map: + h.WriteByte('M') + hr.hash(h, t.Key()) + hr.hash(h, t.Elem()) + + case *Named: + h.WriteByte('N') + hr.hashTypeName(h, t.Obj()) + for targ := range t.TypeArgs().Types() { + hr.hash(h, targ) + } + + case *Pointer: + h.WriteByte('P') + hr.hash(h, t.Elem()) + + case *Signature: + h.WriteByte('F') + maphash.WriteComparable(h, t.Variadic()) + tparams := t.TypeParams() + if n := tparams.Len(); n > 0 { + hr.inGenericSig = true // affects constraints, params, and results + + maphash.WriteComparable(h, n) + for tparam := range tparams.TypeParams() { + hr.hash(h, tparam.Constraint()) + } + } + hr.hashTuple(h, t.Params()) + hr.hashTuple(h, t.Results()) + + case *Slice: + h.WriteByte('S') + hr.hash(h, t.Elem()) + + case *Struct: + h.WriteByte('R') // mnemonic: a struct is a record type + n := t.NumFields() + h.WriteByte(byte(n)) + for i := range n { + f := t.Field(i) + maphash.WriteComparable(h, f.Anonymous()) + // Ignore t.Tag(i), so that a single hash function + // can be used with both [Identical] and [IdenticalIgnoreTags]. + h.WriteString(f.Name()) // (ignore f.Pkg) + hr.hash(h, f.Type()) + } + + case *Tuple: + hr.hashTuple(h, t) + + case *TypeParam: + hr.hashTypeParam(h, t) + + case *Union: + h.WriteByte('U') + // TODO(adonovan): opt: call (fork of) UnionTermSet from + // golang.org/x/tools/internal/typeparams/normalize.go. + // hr.hashTermSet(h, terms) + + default: + panic(fmt.Sprintf("%T: %v", t, t)) + } +} + +func (hr hasher) hashTuple(h *maphash.Hash, t *Tuple) { + h.WriteByte('T') + h.WriteByte(byte(t.Len())) + for v := range t.Variables() { + hr.hash(h, v.Type()) + } +} + +// func (hr hasher) hashTermSet(h *maphash.Hash, terms []*Term) { +// h.WriteByte(byte(len(terms))) +// for _, term := range terms { +// // term order is not significant. +// h.WriteByte(byte(btoi(term.Tilde()))) +// hr.hash(h, term.Type()) +// } +// } + +// hashTypeParam encodes a type parameter into hasher h. +func (hr hasher) hashTypeParam(h *maphash.Hash, t *TypeParam) { + h.WriteByte('P') + // Within the signature of a generic function, TypeParams are + // identical if they have the same index and constraint, so we + // hash them based on index. + // + // When we are outside a generic function, free TypeParams are + // identical iff they are the same object, so we can use a + // more discriminating hash consistent with object identity. + // This optimization saves [Map] about 4% when hashing all the + // Info.Types in the forward closure of net/http. + if !hr.inGenericSig { + // Optimization: outside a generic function signature, + // use a more discrimating hash consistent with object identity. + hr.hashTypeName(h, t.Obj()) + } else { + h.WriteByte(byte(t.Index())) + } +} + +// hashTypeName hashes the pointer of tname. +func (hasher) hashTypeName(h *maphash.Hash, tname *TypeName) { + h.WriteByte('N') + // Since Identical uses == to compare TypeNames, + // the hash function uses maphash.Comparable. + maphash.WriteComparable(h, tname) +} + +// shallowHash computes a hash of t without looking at any of its +// element Types, to avoid potential anonymous cycles in the types of +// interface methods. +// +// When an unnamed non-empty interface type appears anywhere among the +// arguments or results of an interface method, there is a potential +// for endless recursion. Consider: +// +// type X interface { m() []*interface { X } } +// +// The problem is that the Methods of the interface in m's result type +// include m itself; there is no mention of the named type X that +// might help us break the cycle. +// (See comment in [Identical], case *Interface, for more.) +func (hr hasher) shallowHash(h *maphash.Hash, t Type) { + // t is the type of an interface method (Signature), + // its params or results (Tuples), or their immediate + // elements (mostly Slice, Pointer, Basic, Named), + // so there's no need to optimize anything else. + switch t := t.(type) { + case *Alias: + hr.shallowHash(h, Unalias(t)) + + case *Array: + h.WriteByte('A') + maphash.WriteComparable(h, t.Len()) + // ignore t.Elem() + + case *Basic: + h.WriteByte('B') + h.WriteByte(byte(t.Kind())) + + case *Chan: + h.WriteByte('C') + // ignore Dir(), Elem() + + case *Interface: + h.WriteByte('I') + // no recursion here + + case *Map: + h.WriteByte('M') + // ignore Key(), Elem() + + case *Named: + hr.hashTypeName(h, t.Obj()) + + case *Pointer: + h.WriteByte('P') + // ignore t.Elem() + + case *Signature: + h.WriteByte(byte(btoi(t.Variadic()))) + // The Signature/Tuple recursion is always + // finite and invariably shallow. + hr.shallowHash(h, t.Params()) + hr.shallowHash(h, t.Results()) + + case *Slice: + h.WriteByte('S') + // ignore t.Elem() + + case *Struct: + h.WriteByte('R') // mnemonic: a struct is a record type + h.WriteByte(byte(t.NumFields())) + // ignore t.Fields() + + case *Tuple: + h.WriteByte('T') + h.WriteByte(byte(t.Len())) + for v := range t.Variables() { + hr.shallowHash(h, v.Type()) + } + + case *TypeParam: + hr.hashTypeParam(h, t) + + case *Union: + h.WriteByte('U') + // ignore term set + + default: + panic(fmt.Sprintf("shallowHash: %T: %v", t, t)) + } +} + +func btoi(b bool) int { + if b { + return 1 + } else { + return 0 + } +} diff --git a/src/go/types/hash_test.go b/src/go/types/hash_test.go new file mode 100644 index 0000000000..98b1ae23df --- /dev/null +++ b/src/go/types/hash_test.go @@ -0,0 +1,264 @@ +// Copyright 2026 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package types_test + +// This file defines a test of using [types.Hasher]. + +import ( + "go/ast" + "go/parser" + "go/token" + "go/types" + "hash/maphash" + "testing" +) + +func TestHasher(t *testing.T) { + const src = ` +package p + +// Basic defined types. +type T1 int +type T2 int + +// Identical methods. +func (T1) M(int) {} +func (T2) M(int) {} + +// A constraint interface. +type C interface { + ~int | string +} + +type I interface { +} + +// A generic type. +type G[P C] int + +// Generic functions with identical signature. +func Fa1[P C](p P) {} +func Fa2[Q C](q Q) {} + +// Fb1 and Fb2 are identical and should be mapped to the same entry, even if we +// map their arguments first. +func Fb1[P any](x *P) { + var y *P // Map this first. + _ = y +} +func Fb2[Q any](x *Q) { +} + +// G1 and G2 are mutally recursive, and have identical methods. +type G1[P any] struct{ + Field *G2[P] +} +func (G1[P]) M(G1[P], G2[P]) {} +type G2[Q any] struct{ + Field *G1[Q] +} +func (G2[P]) M(G1[P], G2[P]) {} + +// Method type expressions on different generic types are different. +var ME1 = G1[int].M +var ME2 = G2[int].M + +// ME1Type should have identical type as ME1. +var ME1Type func(G1[int], G1[int], G2[int]) + +// Examples from issue #51314 +type Constraint[T any] any +func Foo[T Constraint[T]]() {} +func Fn[T1 ~*T2, T2 ~*T1](t1 T1, t2 T2) {} + +// Bar and Baz are identical to Foo. +func Bar[P Constraint[P]]() {} +func Baz[Q any]() {} // The underlying type of Constraint[P] is any. +// But Quux is not. +func Quux[Q interface{ quux() }]() {} + +type Issue56048_I interface{ m() interface { Issue56048_I } } +var Issue56048 = Issue56048_I.m + +type Issue56048_Ib interface{ m() chan []*interface { Issue56048_Ib } } +var Issue56048b = Issue56048_Ib.m + +// Non-generic alias +type NonAlias int +type Alias1 = NonAlias +type Alias2 = NonAlias + +type Tagged1 struct { F int "tag1" } +type Tagged2 struct { F int "tag2" } +` + + fset := token.NewFileSet() + file, err := parser.ParseFile(fset, "p.go", src, 0) + if err != nil { + t.Fatal(err) + } + + var conf types.Config + pkg, err := conf.Check("", fset, []*ast.File{file}, nil) + if err != nil { + t.Fatal(err) + } + + instantiate := func(origin types.Type, targs ...types.Type) types.Type { + inst, err := types.Instantiate(nil, origin, targs, true) + if err != nil { + t.Fatal(err) + } + return inst + } + + scope := pkg.Scope() + var ( + tInt = types.Typ[types.Int] + tString = types.Typ[types.String] + + T1 = scope.Lookup("T1").Type().(*types.Named) + T2 = scope.Lookup("T2").Type().(*types.Named) + T1M = T1.Method(0).Type() + T2M = T2.Method(0).Type() + G = scope.Lookup("G").Type() + GInt1 = instantiate(G, tInt) + GInt2 = instantiate(G, tInt) + GStr = instantiate(G, tString) + C = scope.Lookup("C").Type() + CI = C.Underlying().(*types.Interface) + I = scope.Lookup("I").Type() + II = I.Underlying().(*types.Interface) + U = CI.EmbeddedType(0).(*types.Union) + Fa1 = scope.Lookup("Fa1").Type().(*types.Signature) + Fa2 = scope.Lookup("Fa2").Type().(*types.Signature) + Fa1P = Fa1.TypeParams().At(0) + Fa2Q = Fa2.TypeParams().At(0) + Fb1 = scope.Lookup("Fb1").Type().(*types.Signature) + Fb1x = Fb1.Params().At(0).Type() + Fb1y = scope.Lookup("Fb1").(*types.Func).Scope().Lookup("y").Type() + Fb2 = scope.Lookup("Fb2").Type().(*types.Signature) + Fb2x = Fb2.Params().At(0).Type() + G1 = scope.Lookup("G1").Type().(*types.Named) + G1M = G1.Method(0).Type() + G1IntM1 = instantiate(G1, tInt).(*types.Named).Method(0).Type() + G1IntM2 = instantiate(G1, tInt).(*types.Named).Method(0).Type() + G1StrM = instantiate(G1, tString).(*types.Named).Method(0).Type() + G2 = scope.Lookup("G2").Type() + G2IntM = instantiate(G2, tInt).(*types.Named).Method(0).Type() + ME1 = scope.Lookup("ME1").Type() + ME1Type = scope.Lookup("ME1Type").Type() + ME2 = scope.Lookup("ME2").Type() + + Constraint = scope.Lookup("Constraint").Type() + Foo = scope.Lookup("Foo").Type() + Fn = scope.Lookup("Fn").Type() + Bar = scope.Lookup("Bar").Type() + Baz = scope.Lookup("Baz").Type() + Quux = scope.Lookup("Quux").Type() + Issue56048 = scope.Lookup("Issue56048").Type() + Issue56048b = scope.Lookup("Issue56048b").Type() + + NonAlias = scope.Lookup("NonAlias").Type() + Alias1 = scope.Lookup("Alias1").Type() + Alias2 = scope.Lookup("Alias2").Type() + + Tagged1 = scope.Lookup("Tagged1").Type().Underlying().(*types.Struct) + Tagged2 = scope.Lookup("Tagged2").Type().Underlying().(*types.Struct) + ) + + // eqclasses groups the above types into types.Identical equivalence classes. + eqclasses := [][]types.Type{ + {T1}, + {T2}, + {G}, + {C}, + {CI}, + {U}, + {I}, + {II}, // should not be identical to CI + {T1M, T2M}, + {GInt1, GInt2}, + {GStr}, + {Fa1, Fa2}, + {Fa1P}, + {Fa2Q}, + {Fb1y, Fb1x}, + {Fb2x}, + {Fb1, Fb2}, + {G1}, + {G1M}, + {G2}, + {G1IntM1, G1IntM2, G2IntM}, + {G1StrM}, + {ME1, ME1Type}, + {ME2}, + {Constraint}, + {Foo, Bar}, + {Baz}, + {Fn}, + {Quux}, + {Issue56048}, + {Issue56048b}, + {NonAlias, Alias1, Alias2}, + } + + run := func(t *testing.T, hasher maphash.Hasher[types.Type], eq func(x, y types.Type) bool, classes [][]types.Type) { + seed := maphash.MakeSeed() + + hash := func(t types.Type) uint64 { + var h maphash.Hash + h.SetSeed(seed) + hasher.Hash(&h, t) + return h.Sum64() + } + + for xi, class := range classes { + tx := class[0] // arbitrary representative of class + + for yi := range classes { + if xi == yi { + // Within a class, each element is equivalent to first + // and has the same hash. + for i, ty := range class { + hx, hy := hash(tx), hash(ty) + if !eq(tx, ty) || hx != hy { + t.Fatalf("class[%d][%d] (%v, hash %x) is not equivalent to class[%d][%d] (%v, hash %x)", + xi, 0, tx, hx, + yi, i, ty, hy) + } + } + } else { + // Across classes, no element is equivalent to first. + // (We can't say for sure that the hashes are unequal.) + for k, typ := range classes[yi] { + if eq(tx, typ) { + t.Fatalf("class[%d][%d] (%v) is equivalent to class[%d][%d] (%v)", + xi, 0, tx, + yi, k, typ) + } + } + } + } + } + } + + // Hasher considers the two Tagged{1,2} types distinct. + t.Run("Hasher", func(t *testing.T) { + run(t, types.Hasher{}, types.Identical, append( + eqclasses, + []types.Type{Tagged1}, + []types.Type{Tagged2}, + )) + }) + + // HasherIgnoreTags considers the two Tagged{1,2} types equal. + t.Run("HasherIgnoreTags", func(t *testing.T) { + run(t, types.HasherIgnoreTags{}, types.IdenticalIgnoreTags, append( + eqclasses, + []types.Type{Tagged1, Tagged2}, + )) + }) +} -- cgit v1.3