aboutsummaryrefslogtreecommitdiff
path: root/src/simd/archsimd/_gen/simdgen
diff options
context:
space:
mode:
Diffstat (limited to 'src/simd/archsimd/_gen/simdgen')
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go2
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdTypes.go97
-rw-r--r--src/simd/archsimd/_gen/simdgen/gen_simdssa.go2
-rw-r--r--src/simd/archsimd/_gen/simdgen/xed.go125
4 files changed, 174 insertions, 52 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go b/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go
index 3d99dd2a81..94b122ac39 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go
@@ -181,7 +181,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
}
hasMerging = gOp.hasMaskedMerging(maskType, shapeOut)
if hasMerging && !resultInArg0 {
- // We have to copy the slice here becasue the sort will be visible from other
+ // We have to copy the slice here because the sort will be visible from other
// aliases when no reslicing is happening.
newIn := make([]Operand, len(op.In), len(op.In)+1)
copy(newIn, op.In)
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
index dd3a75eb44..4f1c70e211 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go
@@ -189,6 +189,7 @@ type X86Features struct {}
var X86 X86Features
{{range .}}
+{{$f := .}}
{{- if eq .Feature "AVX512"}}
// {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features.
//
@@ -199,11 +200,19 @@ var X86 X86Features
{{- else -}}
// {{.Feature}} returns whether the CPU supports the {{.Feature}} feature.
{{- end}}
+{{- if ne .ImpliesAll ""}}
+//
+// If it returns true, then the CPU also supports {{.ImpliesAll}}.
+{{- end}}
//
// {{.Feature}} is defined on all GOARCHes, but will only return true on
// GOARCH {{.GoArch}}.
-func (X86Features) {{.Feature}}() bool {
- return cpu.X86.Has{{.Feature}}
+func ({{.FeatureVar}}Features) {{.Feature}}() bool {
+{{- if .Virtual}}
+ return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}}
+{{- else}}
+ return cpu.{{.FeatureVar}}.Has{{.Feature}}
+{{- end}}
}
{{end}}
`
@@ -591,6 +600,65 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer {
return buffer
}
+type goarchFeatures struct {
+ // featureVar is the name of the exported feature-check variable for this
+ // architecture.
+ featureVar string
+
+ // features records per-feature information.
+ features map[string]featureInfo
+}
+
+type featureInfo struct {
+ // Implies is a list of other CPU features that are required for this
+ // feature. These are allowed to chain.
+ //
+ // For example, if the Frob feature lists "Baz", then if X.Frob() returns
+ // true, it must also be true that the CPU has feature Baz.
+ Implies []string
+
+ // Virtual means this feature is not represented directly in internal/cpu,
+ // but is instead the logical AND of the features in Implies.
+ Virtual bool
+}
+
+// goarchFeatureInfo maps from GOARCH to CPU feature to additional information
+// about that feature. Not all features need to be in this map.
+var goarchFeatureInfo = make(map[string]goarchFeatures)
+
+func registerFeatureInfo(goArch string, features goarchFeatures) {
+ goarchFeatureInfo[goArch] = features
+}
+
+func featureImplies(goarch string, base string) string {
+ // Compute the transitive closure of base.
+ var list []string
+ var visit func(f string)
+ visit = func(f string) {
+ list = append(list, f)
+ for _, dep := range goarchFeatureInfo[goarch].features[f].Implies {
+ visit(dep)
+ }
+ }
+ visit(base)
+ // Drop base
+ list = list[1:]
+ // Put in "nice" order
+ slices.Reverse(list)
+ // Combine into a comment-ready form
+ switch len(list) {
+ case 0:
+ return ""
+ case 1:
+ return list[0]
+ case 2:
+ return list[0] + " and " + list[1]
+ default:
+ list[len(list)-1] = "and " + list[len(list)-1]
+ return strings.Join(list, ", ")
+ }
+}
+
func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
// Gather all features
type featureKey struct {
@@ -606,13 +674,36 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer {
featureSet[featureKey{op.GoArch, feature}] = struct{}{}
}
}
- features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
+ featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int {
if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 {
return c
}
return compareNatural(a.Feature, b.Feature)
})
+ // TODO: internal/cpu doesn't enforce these at all. You can even do
+ // GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off
+ // AVX2. We need to push these dependencies into it somehow.
+ type feature struct {
+ featureKey
+ FeatureVar string
+ Virtual bool
+ Implies []string
+ ImpliesAll string
+ }
+ var features []feature
+ for _, k := range featureKeys {
+ featureVar := goarchFeatureInfo[k.GoArch].featureVar
+ fi := goarchFeatureInfo[k.GoArch].features[k.Feature]
+ features = append(features, feature{
+ featureKey: k,
+ FeatureVar: featureVar,
+ Virtual: fi.Virtual,
+ Implies: fi.Implies,
+ ImpliesAll: featureImplies(k.GoArch, k.Feature),
+ })
+ }
+
// If we ever have the same feature name on more than one GOARCH, we'll have
// to be more careful about this.
t := templateOf(simdFeaturesTemplate, "features")
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go
index 876ffabe3d..96d096688f 100644
--- a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go
+++ b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go
@@ -133,7 +133,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer {
if mem == NoMem && op.hasMaskedMerging(maskType, shapeOut) {
regShapeMerging := regShape
if shapeOut != OneVregOutAtIn {
- // We have to copy the slice here becasue the sort will be visible from other
+ // We have to copy the slice here because the sort will be visible from other
// aliases when no reslicing is happening.
newIn := make([]Operand, len(op.In), len(op.In)+1)
copy(newIn, op.In)
diff --git a/src/simd/archsimd/_gen/simdgen/xed.go b/src/simd/archsimd/_gen/simdgen/xed.go
index 4ba6738e7e..5d6fac64d0 100644
--- a/src/simd/archsimd/_gen/simdgen/xed.go
+++ b/src/simd/archsimd/_gen/simdgen/xed.go
@@ -5,7 +5,6 @@
package main
import (
- "cmp"
"fmt"
"log"
"maps"
@@ -78,7 +77,7 @@ func loadXED(xedPath string) []*unify.Value {
switch {
case inst.RealOpcode == "N":
return // Skip unstable instructions
- case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA")):
+ case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || inst.Extension == "FMA"):
// We're only interested in AVX and SHA instructions.
return
}
@@ -210,16 +209,9 @@ func loadXED(xedPath string) []*unify.Value {
}
log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst)
} else {
- keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int {
- return cmp.Or(cmp.Compare(a.Extension, b.Extension),
- cmp.Compare(a.ISASet, b.ISASet))
- })
+ keys := slices.Sorted(maps.Keys(unknownFeatures))
for _, key := range keys {
- if key.ISASet == "" || key.ISASet == key.Extension {
- log.Printf("unhandled Extension %s", key.Extension)
- } else {
- log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet)
- }
+ log.Printf("unhandled ISASet %s", key)
log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key])))
}
}
@@ -763,16 +755,24 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant inst
// decodeCPUFeature returns the CPU feature name required by inst. These match
// the names of the "Has*" feature checks in the simd package.
func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
- key := cpuFeatureKey{
- Extension: inst.Extension,
- ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""),
+ isaSet := inst.ISASet
+ if isaSet == "" {
+ // Older instructions don't have an ISA set. Use their "extension"
+ // instead.
+ isaSet = inst.Extension
}
- feat, ok := cpuFeatureMap[key]
+ // We require AVX512VL to use AVX512 at all, so strip off the vector length
+ // suffixes.
+ if strings.HasPrefix(isaSet, "AVX512") {
+ isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "")
+ }
+
+ feat, ok := cpuFeatureMap[isaSet]
if !ok {
- imap := unknownFeatures[key]
+ imap := unknownFeatures[isaSet]
if imap == nil {
imap = make(map[string]struct{})
- unknownFeatures[key] = imap
+ unknownFeatures[isaSet] = imap
}
imap[inst.Opcode()] = struct{}{}
return "", false
@@ -783,45 +783,76 @@ func decodeCPUFeature(inst *xeddata.Inst) (string, bool) {
return feat, true
}
-var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$")
+var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$")
-type cpuFeatureKey struct {
- Extension, ISASet string
-}
-
-// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name
-// that can be used in the SIMD API.
-var cpuFeatureMap = map[cpuFeatureKey]string{
- {"SHA", "SHA"}: "SHA",
-
- {"AVX", ""}: "AVX",
- {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI",
- {"AVX2", ""}: "AVX2",
- {"AVXAES", ""}: "AVX, AES",
+// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature
+// name to expose in the SIMD feature check API.
+//
+// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags.
+var cpuFeatureMap = map[string]string{
+ "AVX": "AVX",
+ "AVX_VNNI": "AVXVNNI",
+ "AVX2": "AVX2",
+ "AVXAES": "AVXAES",
+ "SHA": "SHA",
+ "FMA": "FMA",
// AVX-512 foundational features. We combine all of these into one "AVX512" feature.
- {"AVX512EVEX", "AVX512F"}: "AVX512",
- {"AVX512EVEX", "AVX512CD"}: "AVX512",
- {"AVX512EVEX", "AVX512BW"}: "AVX512",
- {"AVX512EVEX", "AVX512DQ"}: "AVX512",
- // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by
- // the vector length suffix.
+ "AVX512F": "AVX512",
+ "AVX512BW": "AVX512",
+ "AVX512CD": "AVX512",
+ "AVX512DQ": "AVX512",
+ // AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is
+ // required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway.
// AVX-512 extension features
- {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG",
- {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI",
- {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2",
- {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI",
- {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI",
- {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ",
- {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES",
- {"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ",
+ "AVX512_BITALG": "AVX512BITALG",
+ "AVX512_GFNI": "AVX512GFNI",
+ "AVX512_VBMI": "AVX512VBMI",
+ "AVX512_VBMI2": "AVX512VBMI2",
+ "AVX512_VNNI": "AVX512VNNI",
+ "AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ",
+ "AVX512_VAES": "AVX512VAES",
+ "AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ",
// AVX 10.2 (not yet supported)
- {"AVX512EVEX", "AVX10_2_RC"}: "ignore",
+ "AVX10_2_RC": "ignore",
+}
+
+func init() {
+ // TODO: In general, Intel doesn't make any guarantees about what flags are
+ // set, so this means our feature checks need to ensure these, just to be
+ // sure.
+ var features = map[string]featureInfo{
+ "AVX2": {Implies: []string{"AVX"}},
+ "AVX512": {Implies: []string{"AVX2"}},
+
+ "AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}},
+ "FMA": {Implies: []string{"AVX"}},
+
+ // AVX-512 subfeatures.
+ "AVX512BITALG": {Implies: []string{"AVX512"}},
+ "AVX512GFNI": {Implies: []string{"AVX512"}},
+ "AVX512VBMI": {Implies: []string{"AVX512"}},
+ "AVX512VBMI2": {Implies: []string{"AVX512"}},
+ "AVX512VNNI": {Implies: []string{"AVX512"}},
+ "AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}},
+ "AVX512VAES": {Implies: []string{"AVX512"}},
+
+ // AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA
+ // instructions to VEX encoding, limited to 256 bit vectors. They're
+ // intended for lower end CPUs that want to support VNNI/IFMA without
+ // supporting AVX-512. As such, they're built on AVX2's VEX encoding.
+ "AVXVNNI": {Implies: []string{"AVX2"}},
+ "AVXIFMA": {Implies: []string{"AVX2"}},
+ }
+ registerFeatureInfo("amd64", goarchFeatures{
+ featureVar: "X86",
+ features: features,
+ })
}
-var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{}
+var unknownFeatures = map[string]map[string]struct{}{}
// hasOptionalMask returns whether there is an optional mask operand in ops.
func hasOptionalMask(ops []operand) bool {