aboutsummaryrefslogtreecommitdiff
path: root/src/simd
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2025-09-12 18:45:39 +0000
committerJunyang Shao <shaojunyang@google.com>2025-09-18 11:06:42 -0700
commit443b7aeddb82d90345b8e7c8a4ef7c145dac7ce4 (patch)
tree8c7f84f776a079a0745a22d7cd0bca1ef056c9a8 /src/simd
parentbdd30e25caa0b69e335ba1f1f48566924850fa4b (diff)
downloadgo-443b7aeddb82d90345b8e7c8a4ef7c145dac7ce4.tar.xz
[dev.simd] cmd/compile, simd/_gen: make rewrite rules consistent on CPU Features
The previous CL left a bug in the xed parser so that the generator can generate rules rewriting an AVX instruction to AVX512 instruction. This CL fixes that. Change-Id: I0df7e7dc6c936ce7add24a757ce7f44a15917fef Reviewed-on: https://go-review.googlesource.com/c/go/+/703399 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/simd')
-rw-r--r--src/simd/_gen/simdgen/gen_utility.go16
-rw-r--r--src/simd/_gen/simdgen/xed.go33
2 files changed, 37 insertions, 12 deletions
diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/_gen/simdgen/gen_utility.go
index 3fb1edfab4..78a214783b 100644
--- a/src/simd/_gen/simdgen/gen_utility.go
+++ b/src/simd/_gen/simdgen/gen_utility.go
@@ -632,7 +632,21 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
if isAVX512(i) && !isAVX512(j) {
return 1
}
- return strings.Compare(i.CPUFeature, j.CPUFeature)
+ if i.CPUFeature != j.CPUFeature {
+ return strings.Compare(i.CPUFeature, j.CPUFeature)
+ }
+ // Weirdly Intel sometimes has duplicated definitions for the same instruction,
+ // this confuses the XED mem-op merge logic: [MemFeature] will only be attached to an instruction
+ // for only once, which means that for essentially duplicated instructions only one will have the
+ // proper [MemFeature] set. We have to make this sort deterministic for [MemFeature].
+ if i.MemFeatures != nil && j.MemFeatures == nil {
+ return -1
+ }
+ if i.MemFeatures == nil && j.MemFeatures != nil {
+ return 1
+ }
+ // Their order does not matter anymore, at least for now.
+ return 0
})
}
deduped = append(deduped, dup[0])
diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/_gen/simdgen/xed.go
index 411c8bcf5c..e521f0c8d4 100644
--- a/src/simd/_gen/simdgen/xed.go
+++ b/src/simd/_gen/simdgen/xed.go
@@ -9,6 +9,7 @@ import (
"fmt"
"log"
"maps"
+ "reflect"
"regexp"
"slices"
"strconv"
@@ -137,14 +138,24 @@ func loadXED(xedPath string) []*unify.Value {
}
if len(o.ops) == len(m.ops) {
for j := range o.ops {
- v1, ok3 := o.ops[j].(operandVReg)
- v2, ok4 := m.ops[j].(operandVReg)
- if !ok3 || !ok4 {
- continue
- }
- if v1.vecShape != v2.vecShape {
- // A mismatch, skip this memOp
- continue outer
+ if reflect.TypeOf(o.ops[j]) == reflect.TypeOf(m.ops[j]) {
+ v1, ok3 := o.ops[j].(operandVReg)
+ v2, _ := m.ops[j].(operandVReg)
+ if !ok3 {
+ continue
+ }
+ if v1.vecShape != v2.vecShape {
+ // A mismatch, skip this memOp
+ continue outer
+ }
+ } else {
+ _, ok3 := o.ops[j].(operandVReg)
+ _, ok4 := m.ops[j].(operandMem)
+ // The only difference must be the vreg and mem, no other cases.
+ if !ok3 || !ok4 {
+ // A mismatch, skip this memOp
+ continue outer
+ }
}
}
// Found a match, break early
@@ -155,10 +166,10 @@ func loadXED(xedPath string) []*unify.Value {
// Remove the match from memOps, it's now merged to this pure vreg operation
if matchIdx != -1 {
memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...)
+ // Merge is done by adding a new field
+ // Right now we only have vbcst
+ addFields["memFeatures"] = "vbcst"
}
- // Merge is done by adding a new field
- // Right now we only have vbcst
- addFields["memFeatures"] = "vbcst"
}
}
appendDefs(o.inst, o.ops, addFields)