From 443b7aeddb82d90345b8e7c8a4ef7c145dac7ce4 Mon Sep 17 00:00:00 2001 From: Junyang Shao Date: Fri, 12 Sep 2025 18:45:39 +0000 Subject: [dev.simd] cmd/compile, simd/_gen: make rewrite rules consistent on CPU Features The previous CL left a bug in the xed parser so that the generator can generate rules rewriting an AVX instruction to AVX512 instruction. This CL fixes that. Change-Id: I0df7e7dc6c936ce7add24a757ce7f44a15917fef Reviewed-on: https://go-review.googlesource.com/c/go/+/703399 Reviewed-by: David Chase LUCI-TryBot-Result: Go LUCI --- src/simd/_gen/simdgen/gen_utility.go | 16 +++++++++++++++- src/simd/_gen/simdgen/xed.go | 33 ++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 12 deletions(-) (limited to 'src/simd') diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/_gen/simdgen/gen_utility.go index 3fb1edfab4..78a214783b 100644 --- a/src/simd/_gen/simdgen/gen_utility.go +++ b/src/simd/_gen/simdgen/gen_utility.go @@ -632,7 +632,21 @@ func dedupGodef(ops []Operation) ([]Operation, error) { if isAVX512(i) && !isAVX512(j) { return 1 } - return strings.Compare(i.CPUFeature, j.CPUFeature) + if i.CPUFeature != j.CPUFeature { + return strings.Compare(i.CPUFeature, j.CPUFeature) + } + // Weirdly Intel sometimes has duplicated definitions for the same instruction, + // this confuses the XED mem-op merge logic: [MemFeature] will only be attached to an instruction + // for only once, which means that for essentially duplicated instructions only one will have the + // proper [MemFeature] set. We have to make this sort deterministic for [MemFeature]. + if i.MemFeatures != nil && j.MemFeatures == nil { + return -1 + } + if i.MemFeatures == nil && j.MemFeatures != nil { + return 1 + } + // Their order does not matter anymore, at least for now. + return 0 }) } deduped = append(deduped, dup[0]) diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/_gen/simdgen/xed.go index 411c8bcf5c..e521f0c8d4 100644 --- a/src/simd/_gen/simdgen/xed.go +++ b/src/simd/_gen/simdgen/xed.go @@ -9,6 +9,7 @@ import ( "fmt" "log" "maps" + "reflect" "regexp" "slices" "strconv" @@ -137,14 +138,24 @@ func loadXED(xedPath string) []*unify.Value { } if len(o.ops) == len(m.ops) { for j := range o.ops { - v1, ok3 := o.ops[j].(operandVReg) - v2, ok4 := m.ops[j].(operandVReg) - if !ok3 || !ok4 { - continue - } - if v1.vecShape != v2.vecShape { - // A mismatch, skip this memOp - continue outer + if reflect.TypeOf(o.ops[j]) == reflect.TypeOf(m.ops[j]) { + v1, ok3 := o.ops[j].(operandVReg) + v2, _ := m.ops[j].(operandVReg) + if !ok3 { + continue + } + if v1.vecShape != v2.vecShape { + // A mismatch, skip this memOp + continue outer + } + } else { + _, ok3 := o.ops[j].(operandVReg) + _, ok4 := m.ops[j].(operandMem) + // The only difference must be the vreg and mem, no other cases. + if !ok3 || !ok4 { + // A mismatch, skip this memOp + continue outer + } } } // Found a match, break early @@ -155,10 +166,10 @@ func loadXED(xedPath string) []*unify.Value { // Remove the match from memOps, it's now merged to this pure vreg operation if matchIdx != -1 { memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...) + // Merge is done by adding a new field + // Right now we only have vbcst + addFields["memFeatures"] = "vbcst" } - // Merge is done by adding a new field - // Right now we only have vbcst - addFields["memFeatures"] = "vbcst" } } appendDefs(o.inst, o.ops, addFields) -- cgit v1.3