diff options
Diffstat (limited to 'src/simd')
| -rw-r--r-- | src/simd/_gen/simdgen/gen_utility.go | 16 | ||||
| -rw-r--r-- | src/simd/_gen/simdgen/xed.go | 33 |
2 files changed, 37 insertions, 12 deletions
diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/_gen/simdgen/gen_utility.go index 3fb1edfab4..78a214783b 100644 --- a/src/simd/_gen/simdgen/gen_utility.go +++ b/src/simd/_gen/simdgen/gen_utility.go @@ -632,7 +632,21 @@ func dedupGodef(ops []Operation) ([]Operation, error) { if isAVX512(i) && !isAVX512(j) { return 1 } - return strings.Compare(i.CPUFeature, j.CPUFeature) + if i.CPUFeature != j.CPUFeature { + return strings.Compare(i.CPUFeature, j.CPUFeature) + } + // Weirdly Intel sometimes has duplicated definitions for the same instruction, + // this confuses the XED mem-op merge logic: [MemFeature] will only be attached to an instruction + // for only once, which means that for essentially duplicated instructions only one will have the + // proper [MemFeature] set. We have to make this sort deterministic for [MemFeature]. + if i.MemFeatures != nil && j.MemFeatures == nil { + return -1 + } + if i.MemFeatures == nil && j.MemFeatures != nil { + return 1 + } + // Their order does not matter anymore, at least for now. + return 0 }) } deduped = append(deduped, dup[0]) diff --git a/src/simd/_gen/simdgen/xed.go b/src/simd/_gen/simdgen/xed.go index 411c8bcf5c..e521f0c8d4 100644 --- a/src/simd/_gen/simdgen/xed.go +++ b/src/simd/_gen/simdgen/xed.go @@ -9,6 +9,7 @@ import ( "fmt" "log" "maps" + "reflect" "regexp" "slices" "strconv" @@ -137,14 +138,24 @@ func loadXED(xedPath string) []*unify.Value { } if len(o.ops) == len(m.ops) { for j := range o.ops { - v1, ok3 := o.ops[j].(operandVReg) - v2, ok4 := m.ops[j].(operandVReg) - if !ok3 || !ok4 { - continue - } - if v1.vecShape != v2.vecShape { - // A mismatch, skip this memOp - continue outer + if reflect.TypeOf(o.ops[j]) == reflect.TypeOf(m.ops[j]) { + v1, ok3 := o.ops[j].(operandVReg) + v2, _ := m.ops[j].(operandVReg) + if !ok3 { + continue + } + if v1.vecShape != v2.vecShape { + // A mismatch, skip this memOp + continue outer + } + } else { + _, ok3 := o.ops[j].(operandVReg) + _, ok4 := m.ops[j].(operandMem) + // The only difference must be the vreg and mem, no other cases. + if !ok3 || !ok4 { + // A mismatch, skip this memOp + continue outer + } } } // Found a match, break early @@ -155,10 +166,10 @@ func loadXED(xedPath string) []*unify.Value { // Remove the match from memOps, it's now merged to this pure vreg operation if matchIdx != -1 { memOps[opcode] = append(memOps[opcode][:matchIdx], memOps[opcode][matchIdx+1:]...) + // Merge is done by adding a new field + // Right now we only have vbcst + addFields["memFeatures"] = "vbcst" } - // Merge is done by adding a new field - // Right now we only have vbcst - addFields["memFeatures"] = "vbcst" } } appendDefs(o.inst, o.ops, addFields) |
