aboutsummaryrefslogtreecommitdiff
path: root/src/simd/archsimd/_gen
diff options
context:
space:
mode:
authorAustin Clements <austin@google.com>2026-01-13 09:34:53 -0500
committerCherry Mui <cherryyz@google.com>2026-01-13 12:17:09 -0800
commit9ef1692c93bf96328bcaf7a5c8a46094748da7f3 (patch)
tree96266bb6505523fc2a42b89ce1bfb59dfe4b61b2 /src/simd/archsimd/_gen
parente2fef50def98b87107ab963f657d43d7869b8463 (diff)
downloadgo-9ef1692c93bf96328bcaf7a5c8a46094748da7f3.tar.xz
simd/archsimd/_gen/simdgen: feature implications
This simplifies our handling of XED features, adds a table of which features imply which other features, and adds this information to the documentation of the CPU features APIs. As part of this we fix an issue around the "AVXAES" feature. AVXAES is defined as the combination of the AVX and AES CPUID flags. Several other features also work like this, but have hand-written logic in internal/cpu to compute logical feature flags from the underlying CPUID bits. For these, we expose a single feature check function from the SIMD API. AVXAES currently doesn't work like this: it requires the user to check both features. However, this forces the SIMD API to expose an "AES" feature check, which really has nothing to do with SIMD. To make this consistent, we introduce an AVXAES feature check function and use it in feature requirement docs. Unlike the others combo features, this is implemented in the simd package, but the difference is invisible to the user. Change-Id: I2985ebd361f0ecd45fd428903efe4c981a5ec65d Reviewed-on: https://go-review.googlesource.com/c/go/+/736100 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Junyang Shao <shaojunyang@google.com> Reviewed-on: https://go-review.googlesource.com/c/go/+/736200 Reviewed-by: Austin Clements <austin@google.com>
Diffstat (limited to 'src/simd/archsimd/_gen')
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdTypes.go97
-rw-r--r--src/simd/archsimd/_gen/simdgen/xed.go121
2 files changed, 169 insertions, 49 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
index dd3a75eb44..4f1c70e211 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
@@ -189,6 +189,7 @@ type X86Features struct {}
var X86 X86Features
{{range .}}
+{{$f := .}}
{{- if eq .Feature "AVX512"}}
// {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
//
@@ -199,11 +200,19 @@ var X86 X86Features
{{- else -}}
// {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
{{- end}}
+{{- if ne .ImpliesAll ""}}
+//
+// If it returns true, then the CPU also supports {{.ImpliesAll}}.
+{{- end}}
//
// {{.Feature}} is defined on all GOARCHes, but will only return true on
// GOARCH {{.GoArch}}.
-func (X86Features) {{.Feature}}() bool {
- return cpu.X86.Has{{.Feature}}
+func ({{.FeatureVar}}Features) {{.Feature}}() bool {
+{{- if .Virtual}}
+ return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
+{{- else}}
+ return cpu.{{.FeatureVar}}.Has{{.Feature}}
+{{- end}}
}
{{end}}
`
@@ -591,6 +600,65 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
return buffer
}
+type goarchFeatures struct {
+ // featureVar is the name of the exported feature-check variable for this
+ // architecture.
+ featureVar string
+
+ // features records per-feature information.
+ features map[string]featureInfo
+}
+
+type featureInfo struct {
+ // Implies is a list of other CPU features that are required for this
+ // feature. These are allowed to chain.
+ //
+ // For example, if the Frob feature lists "Baz", then if X.Frob() returns
+ // true, it must also be true that the CPU has feature Baz.
+ Implies []string
+
+ // Virtual means this feature is not represented directly in internal/cpu,
+ // but is instead the logical AND of the features in Implies.
+ Virtual bool
+}
+
+// goarchFeatureInfo maps from GOARCH to CPU feature to additional information
+// about that feature. Not all features need to be in this map.
+var goarchFeatureInfo = make(map[string]goarchFeatures)
+
+func registerFeatureInfo(goArch string, features goarchFeatures) {
+ goarchFeatureInfo[goArch] = features
+}
+
+func featureImplies(goarch string, base string) string {
+ // Compute the transitive closure of base.
+ var list []string
+ var visit func(f string)
+ visit = func(f string) {
+ list = append(list, f)
+ for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
+ visit(dep)
+ }
+ }
+ visit(base)
+ // Drop base
+ list = list[1:]
+ // Put in "nice" order
+ slices.Reverse(list)
+ // Combine into a comment-ready form
+ switch len(list) {
+ case 0:
+ return ""
+ case 1:
+ return list[0]
+ case 2:
+ return list[0] + " and " + list[1]
+ default:
+ list[len(list)-1] = "and " + list[len(list)-1]
+ return strings.Join(list, ", ")
+ }
+}
+
func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
// Gather all features
type featureKey struct {
@@ -606,13 +674,36 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
featureSet[featureKey{op.GoArch, feature}] = struct{}{}
}
}
- features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
+ featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
return c
}
return compareNatural(a.Feature, b.Feature)
})
+ // TODO: internal/cpu doesn't enforce these at all. You can even do
+ // GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off
+ // AVX2. We need to push these dependencies into it somehow.
+ type feature struct {
+ featureKey
+ FeatureVar string
+ Virtual bool
+ Implies []string
+ ImpliesAll string
+ }
+ var features []feature
+ for _, k := range featureKeys {
+ featureVar := goarchFeatureInfo[k.GoArch].featureVar
+ fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
+ features = append(features, feature{
+ featureKey: k,
+ FeatureVar: featureVar,
+ Virtual: fi.Virtual,
+ Implies: fi.Implies,
+ ImpliesAll: featureImplies(k.GoArch, k.Feature),
+ })
+ }
+
// If we ever have the same feature name on more than one GOARCH, we'll have
// to be more careful about this.
t := templateOf(simdFeaturesTemplate, "features")
diff --git a/src/simd/archsimd/_gen/simdgen/xed.go b/src/simd/archsimd/_gen/simdgen/xed.go
index 4ba6738e7e..49bedc9b28 100644
--- a/src/simd/archsimd/_gen/simdgen/xed.go
+++ b/src/simd/archsimd/_gen/simdgen/xed.go
@@ -5,7 +5,6 @@
package main
import (
- "cmp"
"fmt"
"log"
"maps"
@@ -210,16 +209,9 @@ func loadXED(xedPath string) []*unify.Value {
}
log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
} else {
- keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
- return cmp.Or(cmp.Compare(a.Extension, b.Extension),
- cmp.Compare(a.ISASet, b.ISASet))
- })
+ keys := slices.Sorted(maps.Keys(unknownFeatures))
for _, key := range keys {
- if key.ISASet == "" || key.ISASet == key.Extension {
- log.Printf("unhandled Extension %s", key.Extension)
- } else {
- log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
- }
+ log.Printf("unhandled ISASet %s", key)
log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
}
}
@@ -763,16 +755,24 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant inst
// decodeCPUFeature returns the CPU feature name required by inst. These match
// the names of the "Has*" feature checks in the simd package.
func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
- key := cpuFeatureKey{
- Extension: inst.Extension,
- ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
+ isaSet := inst.ISASet
+ if isaSet == "" {
+ // Older instructions don't have an ISA set. Use their "extension"
+ // instead.
+ isaSet = inst.Extension
}
- feat, ok := cpuFeatureMap[key]
+ // We require AVX512VL to use AVX512 at all, so strip off the vector length
+ // suffixes.
+ if strings.HasPrefix(isaSet, "AVX512") {
+ isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
+ }
+
+ feat, ok := cpuFeatureMap[isaSet]
if !ok {
- imap := unknownFeatures[key]
+ imap := unknownFeatures[isaSet]
if imap == nil {
imap = make(map[string]struct{})
- unknownFeatures[key] = imap
+ unknownFeatures[isaSet] = imap
}
imap[inst.Opcode()] = struct{}{}
return "", false
@@ -783,45 +783,74 @@ func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
return feat, true
}
-var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
+var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
-type cpuFeatureKey struct {
- Extension, ISASet string
-}
-
-// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
-// that can be used in the SIMD API.
-var cpuFeatureMap = map[cpuFeatureKey]string{
- {"SHA", "SHA"}: "SHA",
-
- {"AVX", ""}: "AVX",
- {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
- {"AVX2", ""}: "AVX2",
- {"AVXAES", ""}: "AVX, AES",
+// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
+// name to expose in the SIMD feature check API.
+//
+// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
+var cpuFeatureMap = map[string]string{
+ "AVX": "AVX",
+ "AVX_VNNI": "AVXVNNI",
+ "AVX2": "AVX2",
+ "AVXAES": "AVXAES",
+ "SHA": "SHA",
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
- {"AVX512EVEX", "AVX512F"}: "AVX512",
- {"AVX512EVEX", "AVX512CD"}: "AVX512",
- {"AVX512EVEX", "AVX512BW"}: "AVX512",
- {"AVX512EVEX", "AVX512DQ"}: "AVX512",
- // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
- // the vector length suffix.
+ "AVX512F": "AVX512",
+ "AVX512BW": "AVX512",
+ "AVX512CD": "AVX512",
+ "AVX512DQ": "AVX512",
+ // AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
+ // required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
// AVX-512 extension features
- {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
- {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
- {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
- {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
- {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
- {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
- {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
- {"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ",
+ "AVX512_BITALG": "AVX512BITALG",
+ "AVX512_GFNI": "AVX512GFNI",
+ "AVX512_VBMI": "AVX512VBMI",
+ "AVX512_VBMI2": "AVX512VBMI2",
+ "AVX512_VNNI": "AVX512VNNI",
+ "AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ",
+ "AVX512_VAES": "AVX512VAES",
+ "AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
// AVX 10.2 (not yet supported)
- {"AVX512EVEX", "AVX10_2_RC"}: "ignore",
+ "AVX10_2_RC": "ignore",
+}
+
+func init() {
+ // TODO: In general, Intel doesn't make any guarantees about what flags are
+ // set, so this means our feature checks need to ensure these, just to be
+ // sure.
+ var features = map[string]featureInfo{
+ "AVX2": {Implies: []string{"AVX"}},
+ "AVX512": {Implies: []string{"AVX2"}},
+
+ "AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
+
+ // AVX-512 subfeatures.
+ "AVX512BITALG": {Implies: []string{"AVX512"}},
+ "AVX512GFNI": {Implies: []string{"AVX512"}},
+ "AVX512VBMI": {Implies: []string{"AVX512"}},
+ "AVX512VBMI2": {Implies: []string{"AVX512"}},
+ "AVX512VNNI": {Implies: []string{"AVX512"}},
+ "AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
+ "AVX512VAES": {Implies: []string{"AVX512"}},
+
+ // AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
+ // instructions to VEX encoding, limited to 256 bit vectors. They're
+ // intended for lower end CPUs that want to support VNNI/IFMA without
+ // supporting AVX-512. As such, they're built on AVX2's VEX encoding.
+ "AVXVNNI": {Implies: []string{"AVX2"}},
+ "AVXIFMA": {Implies: []string{"AVX2"}},
+ }
+ registerFeatureInfo("amd64", goarchFeatures{
+ featureVar: "X86",
+ features: features,
+ })
}
-var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
+var unknownFeatures = map[string]map[string]struct{}{}
// hasOptionalMask returns whether there is an optional mask operand in ops.
func hasOptionalMask(ops []operand) bool {