diff options
| author | Austin Clements <austin@google.com> | 2026-01-13 09:34:53 -0500 |
|---|---|---|
| committer | Cherry Mui <cherryyz@google.com> | 2026-01-13 12:17:09 -0800 |
| commit | 9ef1692c93bf96328bcaf7a5c8a46094748da7f3 (patch) | |
| tree | 96266bb6505523fc2a42b89ce1bfb59dfe4b61b2 /src/simd/archsimd/_gen | |
| parent | e2fef50def98b87107ab963f657d43d7869b8463 (diff) | |
| download | go-9ef1692c93bf96328bcaf7a5c8a46094748da7f3.tar.xz | |
simd/archsimd/_gen/simdgen: feature implications
This simplifies our handling of XED features, adds a table of which
features imply which other features, and adds this information to the
documentation of the CPU features APIs.
As part of this we fix an issue around the "AVXAES" feature. AVXAES is
defined as the combination of the AVX and AES CPUID flags. Several
other features also work like this, but have hand-written logic in
internal/cpu to compute logical feature flags from the underlying
CPUID bits. For these, we expose a single feature check function from
the SIMD API.
AVXAES currently doesn't work like this: it requires the user to check
both features. However, this forces the SIMD API to expose an "AES"
feature check, which really has nothing to do with SIMD. To make this
consistent, we introduce an AVXAES feature check function and use it
in feature requirement docs. Unlike the others combo features, this is
implemented in the simd package, but the difference is invisible to
the user.
Change-Id: I2985ebd361f0ecd45fd428903efe4c981a5ec65d
Reviewed-on: https://go-review.googlesource.com/c/go/+/736100
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Junyang Shao <shaojunyang@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/736200
Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/simd/archsimd/_gen')
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/gen_simdTypes.go | 97 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/xed.go | 121 |
2 files changed, 169 insertions, 49 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go index dd3a75eb44..4f1c70e211 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go @@ -189,6 +189,7 @@ type X86Features struct {} var X86 X86Features {{range .}} +{{$f := .}} {{- if eq .Feature "AVX512"}} // {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features. // @@ -199,11 +200,19 @@ var X86 X86Features {{- else -}} // {{.Feature}} returns whether the CPU supports the {{.Feature}} feature. {{- end}} +{{- if ne .ImpliesAll ""}} +// +// If it returns true, then the CPU also supports {{.ImpliesAll}}. +{{- end}} // // {{.Feature}} is defined on all GOARCHes, but will only return true on // GOARCH {{.GoArch}}. -func (X86Features) {{.Feature}}() bool { - return cpu.X86.Has{{.Feature}} +func ({{.FeatureVar}}Features) {{.Feature}}() bool { +{{- if .Virtual}} + return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}} +{{- else}} + return cpu.{{.FeatureVar}}.Has{{.Feature}} +{{- end}} } {{end}} ` @@ -591,6 +600,65 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { return buffer } +type goarchFeatures struct { + // featureVar is the name of the exported feature-check variable for this + // architecture. + featureVar string + + // features records per-feature information. + features map[string]featureInfo +} + +type featureInfo struct { + // Implies is a list of other CPU features that are required for this + // feature. These are allowed to chain. + // + // For example, if the Frob feature lists "Baz", then if X.Frob() returns + // true, it must also be true that the CPU has feature Baz. + Implies []string + + // Virtual means this feature is not represented directly in internal/cpu, + // but is instead the logical AND of the features in Implies. + Virtual bool +} + +// goarchFeatureInfo maps from GOARCH to CPU feature to additional information +// about that feature. Not all features need to be in this map. +var goarchFeatureInfo = make(map[string]goarchFeatures) + +func registerFeatureInfo(goArch string, features goarchFeatures) { + goarchFeatureInfo[goArch] = features +} + +func featureImplies(goarch string, base string) string { + // Compute the transitive closure of base. + var list []string + var visit func(f string) + visit = func(f string) { + list = append(list, f) + for _, dep := range goarchFeatureInfo[goarch].features[f].Implies { + visit(dep) + } + } + visit(base) + // Drop base + list = list[1:] + // Put in "nice" order + slices.Reverse(list) + // Combine into a comment-ready form + switch len(list) { + case 0: + return "" + case 1: + return list[0] + case 2: + return list[0] + " and " + list[1] + default: + list[len(list)-1] = "and " + list[len(list)-1] + return strings.Join(list, ", ") + } +} + func writeSIMDFeatures(ops []Operation) *bytes.Buffer { // Gather all features type featureKey struct { @@ -606,13 +674,36 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer { featureSet[featureKey{op.GoArch, feature}] = struct{}{} } } - features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { + featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 { return c } return compareNatural(a.Feature, b.Feature) }) + // TODO: internal/cpu doesn't enforce these at all. You can even do + // GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off + // AVX2. We need to push these dependencies into it somehow. + type feature struct { + featureKey + FeatureVar string + Virtual bool + Implies []string + ImpliesAll string + } + var features []feature + for _, k := range featureKeys { + featureVar := goarchFeatureInfo[k.GoArch].featureVar + fi := goarchFeatureInfo[k.GoArch].features[k.Feature] + features = append(features, feature{ + featureKey: k, + FeatureVar: featureVar, + Virtual: fi.Virtual, + Implies: fi.Implies, + ImpliesAll: featureImplies(k.GoArch, k.Feature), + }) + } + // If we ever have the same feature name on more than one GOARCH, we'll have // to be more careful about this. t := templateOf(simdFeaturesTemplate, "features") diff --git a/src/simd/archsimd/_gen/simdgen/xed.go b/src/simd/archsimd/_gen/simdgen/xed.go index 4ba6738e7e..49bedc9b28 100644 --- a/src/simd/archsimd/_gen/simdgen/xed.go +++ b/src/simd/archsimd/_gen/simdgen/xed.go @@ -5,7 +5,6 @@ package main import ( - "cmp" "fmt" "log" "maps" @@ -210,16 +209,9 @@ func loadXED(xedPath string) []*unify.Value { } log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst) } else { - keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int { - return cmp.Or(cmp.Compare(a.Extension, b.Extension), - cmp.Compare(a.ISASet, b.ISASet)) - }) + keys := slices.Sorted(maps.Keys(unknownFeatures)) for _, key := range keys { - if key.ISASet == "" || key.ISASet == key.Extension { - log.Printf("unhandled Extension %s", key.Extension) - } else { - log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet) - } + log.Printf("unhandled ISASet %s", key) log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key]))) } } @@ -763,16 +755,24 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant inst // decodeCPUFeature returns the CPU feature name required by inst. These match // the names of the "Has*" feature checks in the simd package. func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { - key := cpuFeatureKey{ - Extension: inst.Extension, - ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""), + isaSet := inst.ISASet + if isaSet == "" { + // Older instructions don't have an ISA set. Use their "extension" + // instead. + isaSet = inst.Extension } - feat, ok := cpuFeatureMap[key] + // We require AVX512VL to use AVX512 at all, so strip off the vector length + // suffixes. + if strings.HasPrefix(isaSet, "AVX512") { + isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "") + } + + feat, ok := cpuFeatureMap[isaSet] if !ok { - imap := unknownFeatures[key] + imap := unknownFeatures[isaSet] if imap == nil { imap = make(map[string]struct{}) - unknownFeatures[key] = imap + unknownFeatures[isaSet] = imap } imap[inst.Opcode()] = struct{}{} return "", false @@ -783,45 +783,74 @@ func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { return feat, true } -var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$") +var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$") -type cpuFeatureKey struct { - Extension, ISASet string -} - -// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name -// that can be used in the SIMD API. -var cpuFeatureMap = map[cpuFeatureKey]string{ - {"SHA", "SHA"}: "SHA", - - {"AVX", ""}: "AVX", - {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", - {"AVX2", ""}: "AVX2", - {"AVXAES", ""}: "AVX, AES", +// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature +// name to expose in the SIMD feature check API. +// +// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags. +var cpuFeatureMap = map[string]string{ + "AVX": "AVX", + "AVX_VNNI": "AVXVNNI", + "AVX2": "AVX2", + "AVXAES": "AVXAES", + "SHA": "SHA", // AVX-512 foundational features. We combine all of these into one "AVX512" feature. - {"AVX512EVEX", "AVX512F"}: "AVX512", - {"AVX512EVEX", "AVX512CD"}: "AVX512", - {"AVX512EVEX", "AVX512BW"}: "AVX512", - {"AVX512EVEX", "AVX512DQ"}: "AVX512", - // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by - // the vector length suffix. + "AVX512F": "AVX512", + "AVX512BW": "AVX512", + "AVX512CD": "AVX512", + "AVX512DQ": "AVX512", + // AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is + // required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway. // AVX-512 extension features - {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG", - {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI", - {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2", - {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI", - {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI", - {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ", - {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES", - {"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ", + "AVX512_BITALG": "AVX512BITALG", + "AVX512_GFNI": "AVX512GFNI", + "AVX512_VBMI": "AVX512VBMI", + "AVX512_VBMI2": "AVX512VBMI2", + "AVX512_VNNI": "AVX512VNNI", + "AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ", + "AVX512_VAES": "AVX512VAES", + "AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ", // AVX 10.2 (not yet supported) - {"AVX512EVEX", "AVX10_2_RC"}: "ignore", + "AVX10_2_RC": "ignore", +} + +func init() { + // TODO: In general, Intel doesn't make any guarantees about what flags are + // set, so this means our feature checks need to ensure these, just to be + // sure. + var features = map[string]featureInfo{ + "AVX2": {Implies: []string{"AVX"}}, + "AVX512": {Implies: []string{"AVX2"}}, + + "AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}}, + + // AVX-512 subfeatures. + "AVX512BITALG": {Implies: []string{"AVX512"}}, + "AVX512GFNI": {Implies: []string{"AVX512"}}, + "AVX512VBMI": {Implies: []string{"AVX512"}}, + "AVX512VBMI2": {Implies: []string{"AVX512"}}, + "AVX512VNNI": {Implies: []string{"AVX512"}}, + "AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}}, + "AVX512VAES": {Implies: []string{"AVX512"}}, + + // AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA + // instructions to VEX encoding, limited to 256 bit vectors. They're + // intended for lower end CPUs that want to support VNNI/IFMA without + // supporting AVX-512. As such, they're built on AVX2's VEX encoding. + "AVXVNNI": {Implies: []string{"AVX2"}}, + "AVXIFMA": {Implies: []string{"AVX2"}}, + } + registerFeatureInfo("amd64", goarchFeatures{ + featureVar: "X86", + features: features, + }) } -var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{} +var unknownFeatures = map[string]map[string]struct{}{} // hasOptionalMask returns whether there is an optional mask operand in ops. func hasOptionalMask(ops []operand) bool { |
