diff options
Diffstat (limited to 'src/simd/archsimd')
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go | 2 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/gen_simdTypes.go | 97 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/gen_simdssa.go | 2 | ||||
| -rw-r--r-- | src/simd/archsimd/_gen/simdgen/xed.go | 125 | ||||
| -rw-r--r-- | src/simd/archsimd/cpu.go | 48 | ||||
| -rw-r--r-- | src/simd/archsimd/ops_amd64.go | 36 |
6 files changed, 232 insertions, 78 deletions
diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go b/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go index 3d99dd2a81..94b122ac39 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdMachineOps.go @@ -181,7 +181,7 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer { } hasMerging = gOp.hasMaskedMerging(maskType, shapeOut) if hasMerging && !resultInArg0 { - // We have to copy the slice here becasue the sort will be visible from other + // We have to copy the slice here because the sort will be visible from other // aliases when no reslicing is happening. newIn := make([]Operand, len(op.In), len(op.In)+1) copy(newIn, op.In) diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go index dd3a75eb44..4f1c70e211 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdTypes.go @@ -189,6 +189,7 @@ type X86Features struct {} var X86 X86Features {{range .}} +{{$f := .}} {{- if eq .Feature "AVX512"}} // {{.Feature}} returns whether the CPU supports the AVX512F+CD+BW+DQ+VL features. // @@ -199,11 +200,19 @@ var X86 X86Features {{- else -}} // {{.Feature}} returns whether the CPU supports the {{.Feature}} feature. {{- end}} +{{- if ne .ImpliesAll ""}} +// +// If it returns true, then the CPU also supports {{.ImpliesAll}}. +{{- end}} // // {{.Feature}} is defined on all GOARCHes, but will only return true on // GOARCH {{.GoArch}}. -func (X86Features) {{.Feature}}() bool { - return cpu.X86.Has{{.Feature}} +func ({{.FeatureVar}}Features) {{.Feature}}() bool { +{{- if .Virtual}} + return {{range $i, $dep := .Implies}}{{if $i}} && {{end}}cpu.{{$f.FeatureVar}}.Has{{$dep}}{{end}} +{{- else}} + return cpu.{{.FeatureVar}}.Has{{.Feature}} +{{- end}} } {{end}} ` @@ -591,6 +600,65 @@ func writeSIMDTypes(typeMap simdTypeMap) *bytes.Buffer { return buffer } +type goarchFeatures struct { + // featureVar is the name of the exported feature-check variable for this + // architecture. + featureVar string + + // features records per-feature information. + features map[string]featureInfo +} + +type featureInfo struct { + // Implies is a list of other CPU features that are required for this + // feature. These are allowed to chain. + // + // For example, if the Frob feature lists "Baz", then if X.Frob() returns + // true, it must also be true that the CPU has feature Baz. + Implies []string + + // Virtual means this feature is not represented directly in internal/cpu, + // but is instead the logical AND of the features in Implies. + Virtual bool +} + +// goarchFeatureInfo maps from GOARCH to CPU feature to additional information +// about that feature. Not all features need to be in this map. +var goarchFeatureInfo = make(map[string]goarchFeatures) + +func registerFeatureInfo(goArch string, features goarchFeatures) { + goarchFeatureInfo[goArch] = features +} + +func featureImplies(goarch string, base string) string { + // Compute the transitive closure of base. + var list []string + var visit func(f string) + visit = func(f string) { + list = append(list, f) + for _, dep := range goarchFeatureInfo[goarch].features[f].Implies { + visit(dep) + } + } + visit(base) + // Drop base + list = list[1:] + // Put in "nice" order + slices.Reverse(list) + // Combine into a comment-ready form + switch len(list) { + case 0: + return "" + case 1: + return list[0] + case 2: + return list[0] + " and " + list[1] + default: + list[len(list)-1] = "and " + list[len(list)-1] + return strings.Join(list, ", ") + } +} + func writeSIMDFeatures(ops []Operation) *bytes.Buffer { // Gather all features type featureKey struct { @@ -606,13 +674,36 @@ func writeSIMDFeatures(ops []Operation) *bytes.Buffer { featureSet[featureKey{op.GoArch, feature}] = struct{}{} } } - features := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { + featureKeys := slices.SortedFunc(maps.Keys(featureSet), func(a, b featureKey) int { if c := cmp.Compare(a.GoArch, b.GoArch); c != 0 { return c } return compareNatural(a.Feature, b.Feature) }) + // TODO: internal/cpu doesn't enforce these at all. You can even do + // GODEBUG=cpu.avx=off and it will happily turn off AVX without turning off + // AVX2. We need to push these dependencies into it somehow. + type feature struct { + featureKey + FeatureVar string + Virtual bool + Implies []string + ImpliesAll string + } + var features []feature + for _, k := range featureKeys { + featureVar := goarchFeatureInfo[k.GoArch].featureVar + fi := goarchFeatureInfo[k.GoArch].features[k.Feature] + features = append(features, feature{ + featureKey: k, + FeatureVar: featureVar, + Virtual: fi.Virtual, + Implies: fi.Implies, + ImpliesAll: featureImplies(k.GoArch, k.Feature), + }) + } + // If we ever have the same feature name on more than one GOARCH, we'll have // to be more careful about this. t := templateOf(simdFeaturesTemplate, "features") diff --git a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go index 876ffabe3d..96d096688f 100644 --- a/src/simd/archsimd/_gen/simdgen/gen_simdssa.go +++ b/src/simd/archsimd/_gen/simdgen/gen_simdssa.go @@ -133,7 +133,7 @@ func writeSIMDSSA(ops []Operation) *bytes.Buffer { if mem == NoMem && op.hasMaskedMerging(maskType, shapeOut) { regShapeMerging := regShape if shapeOut != OneVregOutAtIn { - // We have to copy the slice here becasue the sort will be visible from other + // We have to copy the slice here because the sort will be visible from other // aliases when no reslicing is happening. newIn := make([]Operand, len(op.In), len(op.In)+1) copy(newIn, op.In) diff --git a/src/simd/archsimd/_gen/simdgen/xed.go b/src/simd/archsimd/_gen/simdgen/xed.go index 4ba6738e7e..5d6fac64d0 100644 --- a/src/simd/archsimd/_gen/simdgen/xed.go +++ b/src/simd/archsimd/_gen/simdgen/xed.go @@ -5,7 +5,6 @@ package main import ( - "cmp" "fmt" "log" "maps" @@ -78,7 +77,7 @@ func loadXED(xedPath string) []*unify.Value { switch { case inst.RealOpcode == "N": return // Skip unstable instructions - case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA")): + case !(strings.HasPrefix(inst.Extension, "AVX") || strings.HasPrefix(inst.Extension, "SHA") || inst.Extension == "FMA"): // We're only interested in AVX and SHA instructions. return } @@ -210,16 +209,9 @@ func loadXED(xedPath string) []*unify.Value { } log.Printf("%d unhandled CPU features for %d instructions (use -v for details)", len(unknownFeatures), nInst) } else { - keys := slices.SortedFunc(maps.Keys(unknownFeatures), func(a, b cpuFeatureKey) int { - return cmp.Or(cmp.Compare(a.Extension, b.Extension), - cmp.Compare(a.ISASet, b.ISASet)) - }) + keys := slices.Sorted(maps.Keys(unknownFeatures)) for _, key := range keys { - if key.ISASet == "" || key.ISASet == key.Extension { - log.Printf("unhandled Extension %s", key.Extension) - } else { - log.Printf("unhandled Extension %s and ISASet %s", key.Extension, key.ISASet) - } + log.Printf("unhandled ISASet %s", key) log.Printf(" opcodes: %s", slices.Sorted(maps.Keys(unknownFeatures[key]))) } } @@ -763,16 +755,24 @@ func instToUVal1(inst *xeddata.Inst, ops []operand, feature string, variant inst // decodeCPUFeature returns the CPU feature name required by inst. These match // the names of the "Has*" feature checks in the simd package. func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { - key := cpuFeatureKey{ - Extension: inst.Extension, - ISASet: isaSetStrip.ReplaceAllLiteralString(inst.ISASet, ""), + isaSet := inst.ISASet + if isaSet == "" { + // Older instructions don't have an ISA set. Use their "extension" + // instead. + isaSet = inst.Extension } - feat, ok := cpuFeatureMap[key] + // We require AVX512VL to use AVX512 at all, so strip off the vector length + // suffixes. + if strings.HasPrefix(isaSet, "AVX512") { + isaSet = isaSetVL.ReplaceAllLiteralString(isaSet, "") + } + + feat, ok := cpuFeatureMap[isaSet] if !ok { - imap := unknownFeatures[key] + imap := unknownFeatures[isaSet] if imap == nil { imap = make(map[string]struct{}) - unknownFeatures[key] = imap + unknownFeatures[isaSet] = imap } imap[inst.Opcode()] = struct{}{} return "", false @@ -783,45 +783,76 @@ func decodeCPUFeature(inst *xeddata.Inst) (string, bool) { return feat, true } -var isaSetStrip = regexp.MustCompile("_(128N?|256N?|512)$") +var isaSetVL = regexp.MustCompile("_(128N?|256N?|512)$") -type cpuFeatureKey struct { - Extension, ISASet string -} - -// cpuFeatureMap maps from XED's "EXTENSION" and "ISA_SET" to a CPU feature name -// that can be used in the SIMD API. -var cpuFeatureMap = map[cpuFeatureKey]string{ - {"SHA", "SHA"}: "SHA", - - {"AVX", ""}: "AVX", - {"AVX_VNNI", "AVX_VNNI"}: "AVXVNNI", - {"AVX2", ""}: "AVX2", - {"AVXAES", ""}: "AVX, AES", +// cpuFeatureMap maps from XED's "ISA_SET" (or "EXTENSION") to a CPU feature +// name to expose in the SIMD feature check API. +// +// See XED's datafiles/*/cpuid.xed.txt for how ISA set names map to CPUID flags. +var cpuFeatureMap = map[string]string{ + "AVX": "AVX", + "AVX_VNNI": "AVXVNNI", + "AVX2": "AVX2", + "AVXAES": "AVXAES", + "SHA": "SHA", + "FMA": "FMA", // AVX-512 foundational features. We combine all of these into one "AVX512" feature. - {"AVX512EVEX", "AVX512F"}: "AVX512", - {"AVX512EVEX", "AVX512CD"}: "AVX512", - {"AVX512EVEX", "AVX512BW"}: "AVX512", - {"AVX512EVEX", "AVX512DQ"}: "AVX512", - // AVX512VL doesn't appear explicitly in the ISASet. I guess it's implied by - // the vector length suffix. + "AVX512F": "AVX512", + "AVX512BW": "AVX512", + "AVX512CD": "AVX512", + "AVX512DQ": "AVX512", + // AVX512VL doesn't appear as its own ISASet; instead, the CPUID flag is + // required by the *_128 and *_256 ISASets. We fold it into "AVX512" anyway. // AVX-512 extension features - {"AVX512EVEX", "AVX512_BITALG"}: "AVX512BITALG", - {"AVX512EVEX", "AVX512_GFNI"}: "AVX512GFNI", - {"AVX512EVEX", "AVX512_VBMI2"}: "AVX512VBMI2", - {"AVX512EVEX", "AVX512_VBMI"}: "AVX512VBMI", - {"AVX512EVEX", "AVX512_VNNI"}: "AVX512VNNI", - {"AVX512EVEX", "AVX512_VPOPCNTDQ"}: "AVX512VPOPCNTDQ", - {"AVX512EVEX", "AVX512_VAES"}: "AVX512VAES", - {"AVX512EVEX", "AVX512_VPCLMULQDQ"}: "AVX512VPCLMULQDQ", + "AVX512_BITALG": "AVX512BITALG", + "AVX512_GFNI": "AVX512GFNI", + "AVX512_VBMI": "AVX512VBMI", + "AVX512_VBMI2": "AVX512VBMI2", + "AVX512_VNNI": "AVX512VNNI", + "AVX512_VPOPCNTDQ": "AVX512VPOPCNTDQ", + "AVX512_VAES": "AVX512VAES", + "AVX512_VPCLMULQDQ": "AVX512VPCLMULQDQ", // AVX 10.2 (not yet supported) - {"AVX512EVEX", "AVX10_2_RC"}: "ignore", + "AVX10_2_RC": "ignore", +} + +func init() { + // TODO: In general, Intel doesn't make any guarantees about what flags are + // set, so this means our feature checks need to ensure these, just to be + // sure. + var features = map[string]featureInfo{ + "AVX2": {Implies: []string{"AVX"}}, + "AVX512": {Implies: []string{"AVX2"}}, + + "AVXAES": {Virtual: true, Implies: []string{"AVX", "AES"}}, + "FMA": {Implies: []string{"AVX"}}, + + // AVX-512 subfeatures. + "AVX512BITALG": {Implies: []string{"AVX512"}}, + "AVX512GFNI": {Implies: []string{"AVX512"}}, + "AVX512VBMI": {Implies: []string{"AVX512"}}, + "AVX512VBMI2": {Implies: []string{"AVX512"}}, + "AVX512VNNI": {Implies: []string{"AVX512"}}, + "AVX512VPOPCNTDQ": {Implies: []string{"AVX512"}}, + "AVX512VAES": {Implies: []string{"AVX512"}}, + + // AVX-VNNI and AVX-IFMA are "backports" of the AVX512-VNNI/IFMA + // instructions to VEX encoding, limited to 256 bit vectors. They're + // intended for lower end CPUs that want to support VNNI/IFMA without + // supporting AVX-512. As such, they're built on AVX2's VEX encoding. + "AVXVNNI": {Implies: []string{"AVX2"}}, + "AVXIFMA": {Implies: []string{"AVX2"}}, + } + registerFeatureInfo("amd64", goarchFeatures{ + featureVar: "X86", + features: features, + }) } -var unknownFeatures = map[cpuFeatureKey]map[string]struct{}{} +var unknownFeatures = map[string]map[string]struct{}{} // hasOptionalMask returns whether there is an optional mask operand in ops. func hasOptionalMask(ops []operand) bool { diff --git a/src/simd/archsimd/cpu.go b/src/simd/archsimd/cpu.go index d0c0ff5426..8069ee7f26 100644 --- a/src/simd/archsimd/cpu.go +++ b/src/simd/archsimd/cpu.go @@ -10,14 +10,6 @@ type X86Features struct{} var X86 X86Features -// AES returns whether the CPU supports the AES feature. -// -// AES is defined on all GOARCHes, but will only return true on -// GOARCH amd64. -func (X86Features) AES() bool { - return cpu.X86.HasAES -} - // AVX returns whether the CPU supports the AVX feature. // // AVX is defined on all GOARCHes, but will only return true on @@ -28,6 +20,8 @@ func (X86Features) AVX() bool { // AVX2 returns whether the CPU supports the AVX2 feature. // +// If it returns true, then the CPU also supports AVX. +// // AVX2 is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX2() bool { @@ -41,6 +35,8 @@ func (X86Features) AVX2() bool { // Nearly every CPU that has shipped with any support for AVX-512 has // supported all five of these features. // +// If it returns true, then the CPU also supports AVX and AVX2. +// // AVX512 is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512() bool { @@ -49,6 +45,8 @@ func (X86Features) AVX512() bool { // AVX512BITALG returns whether the CPU supports the AVX512BITALG feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512BITALG is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512BITALG() bool { @@ -57,6 +55,8 @@ func (X86Features) AVX512BITALG() bool { // AVX512GFNI returns whether the CPU supports the AVX512GFNI feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512GFNI is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512GFNI() bool { @@ -65,6 +65,8 @@ func (X86Features) AVX512GFNI() bool { // AVX512VAES returns whether the CPU supports the AVX512VAES feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512VAES is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512VAES() bool { @@ -73,6 +75,8 @@ func (X86Features) AVX512VAES() bool { // AVX512VBMI returns whether the CPU supports the AVX512VBMI feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512VBMI is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512VBMI() bool { @@ -81,6 +85,8 @@ func (X86Features) AVX512VBMI() bool { // AVX512VBMI2 returns whether the CPU supports the AVX512VBMI2 feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512VBMI2 is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512VBMI2() bool { @@ -89,6 +95,8 @@ func (X86Features) AVX512VBMI2() bool { // AVX512VNNI returns whether the CPU supports the AVX512VNNI feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512VNNI is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512VNNI() bool { @@ -105,20 +113,44 @@ func (X86Features) AVX512VPCLMULQDQ() bool { // AVX512VPOPCNTDQ returns whether the CPU supports the AVX512VPOPCNTDQ feature. // +// If it returns true, then the CPU also supports AVX, AVX2, and AVX512. +// // AVX512VPOPCNTDQ is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVX512VPOPCNTDQ() bool { return cpu.X86.HasAVX512VPOPCNTDQ } +// AVXAES returns whether the CPU supports the AVXAES feature. +// +// If it returns true, then the CPU also supports AES and AVX. +// +// AVXAES is defined on all GOARCHes, but will only return true on +// GOARCH amd64. +func (X86Features) AVXAES() bool { + return cpu.X86.HasAVX && cpu.X86.HasAES +} + // AVXVNNI returns whether the CPU supports the AVXVNNI feature. // +// If it returns true, then the CPU also supports AVX and AVX2. +// // AVXVNNI is defined on all GOARCHes, but will only return true on // GOARCH amd64. func (X86Features) AVXVNNI() bool { return cpu.X86.HasAVXVNNI } +// FMA returns whether the CPU supports the FMA feature. +// +// If it returns true, then the CPU also supports AVX. +// +// FMA is defined on all GOARCHes, but will only return true on +// GOARCH amd64. +func (X86Features) FMA() bool { + return cpu.X86.HasFMA +} + // SHA returns whether the CPU supports the SHA feature. // // SHA is defined on all GOARCHes, but will only return true on diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go index bb162c4ff9..ec50cc72c5 100644 --- a/src/simd/archsimd/ops_amd64.go +++ b/src/simd/archsimd/ops_amd64.go @@ -11,7 +11,7 @@ package archsimd // y is the chunk of dw array in use. // result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) // -// Asm: VAESDECLAST, CPU Feature: AVX, AES +// Asm: VAESDECLAST, CPU Feature: AVXAES func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16 // AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -37,7 +37,7 @@ func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64 // y is the chunk of dw array in use. // result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) // -// Asm: VAESDEC, CPU Feature: AVX, AES +// Asm: VAESDEC, CPU Feature: AVXAES func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16 // AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -63,7 +63,7 @@ func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64 // y is the chunk of w array in use. // result = AddRoundKey((ShiftRows(SubBytes(x))), y) // -// Asm: VAESENCLAST, CPU Feature: AVX, AES +// Asm: VAESENCLAST, CPU Feature: AVXAES func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16 // AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -89,7 +89,7 @@ func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64 // y is the chunk of w array in use. // result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) // -// Asm: VAESENC, CPU Feature: AVX, AES +// Asm: VAESENC, CPU Feature: AVXAES func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16 // AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. @@ -114,7 +114,7 @@ func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64 // x is the chunk of w array in use. // result = InvMixColumns(x) // -// Asm: VAESIMC, CPU Feature: AVX, AES +// Asm: VAESIMC, CPU Feature: AVXAES func (x Uint32x4) AESInvMixColumns() Uint32x4 /* AESRoundKeyGenAssist */ @@ -129,7 +129,7 @@ func (x Uint32x4) AESInvMixColumns() Uint32x4 // // rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table. // -// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES +// Asm: VAESKEYGENASSIST, CPU Feature: AVXAES func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4 /* Abs */ @@ -4088,12 +4088,12 @@ func (x Uint64x8) Mul(y Uint64x8) Uint64x8 // MulAdd performs a fused (x * y) + z. // -// Asm: VFMADD213PS, CPU Feature: AVX512 +// Asm: VFMADD213PS, CPU Feature: FMA func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4 // MulAdd performs a fused (x * y) + z. // -// Asm: VFMADD213PS, CPU Feature: AVX512 +// Asm: VFMADD213PS, CPU Feature: FMA func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8 // MulAdd performs a fused (x * y) + z. @@ -4103,12 +4103,12 @@ func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16 // MulAdd performs a fused (x * y) + z. // -// Asm: VFMADD213PD, CPU Feature: AVX512 +// Asm: VFMADD213PD, CPU Feature: FMA func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2 // MulAdd performs a fused (x * y) + z. // -// Asm: VFMADD213PD, CPU Feature: AVX512 +// Asm: VFMADD213PD, CPU Feature: FMA func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4 // MulAdd performs a fused (x * y) + z. @@ -4120,12 +4120,12 @@ func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. // -// Asm: VFMADDSUB213PS, CPU Feature: AVX512 +// Asm: VFMADDSUB213PS, CPU Feature: FMA func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. // -// Asm: VFMADDSUB213PS, CPU Feature: AVX512 +// Asm: VFMADDSUB213PS, CPU Feature: FMA func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. @@ -4135,12 +4135,12 @@ func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. // -// Asm: VFMADDSUB213PD, CPU Feature: AVX512 +// Asm: VFMADDSUB213PD, CPU Feature: FMA func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. // -// Asm: VFMADDSUB213PD, CPU Feature: AVX512 +// Asm: VFMADDSUB213PD, CPU Feature: FMA func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4 // MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. @@ -4210,12 +4210,12 @@ func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. // -// Asm: VFMSUBADD213PS, CPU Feature: AVX512 +// Asm: VFMSUBADD213PS, CPU Feature: FMA func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. // -// Asm: VFMSUBADD213PS, CPU Feature: AVX512 +// Asm: VFMSUBADD213PS, CPU Feature: FMA func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. @@ -4225,12 +4225,12 @@ func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. // -// Asm: VFMSUBADD213PD, CPU Feature: AVX512 +// Asm: VFMSUBADD213PD, CPU Feature: FMA func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. // -// Asm: VFMSUBADD213PD, CPU Feature: AVX512 +// Asm: VFMSUBADD213PD, CPU Feature: FMA func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4 // MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. |
