aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go72
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go324
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go736
-rw-r--r--src/simd/_gen/simdgen/gen_simdMachineOps.go4
-rw-r--r--src/simd/_gen/simdgen/gen_utility.go6
-rw-r--r--src/simd/_gen/simdgen/ops/Compares/categories.yaml12
-rw-r--r--src/simd/ops_amd64.go216
7 files changed, 642 insertions, 728 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index 82f5bfdb3f..0727f626fb 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -1231,18 +1231,18 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VSUBPSMasked512", argLength: 3, reg: w2kw, asm: "VSUBPS", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "SHA1RNDS4128", argLength: 2, reg: v21, asm: "SHA1RNDS4", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VAESKEYGENASSIST128", argLength: 1, reg: v11, asm: "VAESKEYGENASSIST", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
- {name: "VCMPPD128", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VCMPPD256", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VCMPPD512", argLength: 2, reg: w2k, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPDMasked128", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPDMasked256", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPDMasked512", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPSMasked128", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPSMasked256", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VCMPPSMasked512", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPD128", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VCMPPD256", argLength: 2, reg: v21, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VCMPPD512", argLength: 2, reg: w2k, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPDMasked128", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPDMasked256", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPDMasked512", argLength: 3, reg: w2kk, asm: "VCMPPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPS128", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VCMPPS256", argLength: 2, reg: v21, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VCMPPS512", argLength: 2, reg: w2k, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPSMasked128", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPSMasked256", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VCMPPSMasked512", argLength: 3, reg: w2kk, asm: "VCMPPS", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VEXTRACTF64X4256", argLength: 1, reg: w11, asm: "VEXTRACTF64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VEXTRACTF128128", argLength: 1, reg: v11, asm: "VEXTRACTF128", aux: "UInt8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VEXTRACTI64X4256", argLength: 1, reg: w11, asm: "VEXTRACTI64X4", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -1270,37 +1270,37 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPALIGNRMasked256", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPALIGNRMasked512", argLength: 3, reg: w2kw, asm: "VPALIGNR", aux: "UInt8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUBMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUQMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
- {name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "UInt8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPERM2F128256", argLength: 2, reg: v21, asm: "VPERM2F128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERM2I128256", argLength: 2, reg: v21, asm: "VPERM2I128", aux: "UInt8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPEXTRB128", argLength: 1, reg: wgp, asm: "VPEXTRB", aux: "UInt8", commutative: false, typ: "int8", resultInArg0: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 3e589f8738..a875ac69f1 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -38629,11 +38629,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPD128",
- auxType: auxUInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVCMPPD,
+ name: "VCMPPD128",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVCMPPD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38645,11 +38644,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPD256",
- auxType: auxUInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVCMPPD,
+ name: "VCMPPD256",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVCMPPD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38661,11 +38659,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPD512",
- auxType: auxUInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVCMPPD,
+ name: "VCMPPD512",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVCMPPD,
reg: regInfo{
inputs: []inputInfo{
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -38677,11 +38674,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPDMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVCMPPD,
+ name: "VCMPPDMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVCMPPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38694,11 +38690,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPDMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVCMPPD,
+ name: "VCMPPDMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVCMPPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38711,11 +38706,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPDMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVCMPPD,
+ name: "VCMPPDMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVCMPPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38728,11 +38722,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPS128",
- auxType: auxUInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVCMPPS,
+ name: "VCMPPS128",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVCMPPS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38744,11 +38737,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPS256",
- auxType: auxUInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVCMPPS,
+ name: "VCMPPS256",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVCMPPS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -38760,11 +38752,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPS512",
- auxType: auxUInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVCMPPS,
+ name: "VCMPPS512",
+ auxType: auxUInt8,
+ argLen: 2,
+ asm: x86.AVCMPPS,
reg: regInfo{
inputs: []inputInfo{
{0, 281474976645120}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -38776,11 +38767,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPSMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVCMPPS,
+ name: "VCMPPSMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVCMPPS,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38793,11 +38783,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPSMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVCMPPS,
+ name: "VCMPPSMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVCMPPS,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -38810,11 +38799,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VCMPPSMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVCMPPS,
+ name: "VCMPPSMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVCMPPS,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39237,11 +39225,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPBMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPB,
+ name: "VPCMPBMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPB,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39254,11 +39241,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPBMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPB,
+ name: "VPCMPBMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPB,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39271,11 +39257,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPBMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPB,
+ name: "VPCMPBMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPB,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39303,11 +39288,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPDMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPD,
+ name: "VPCMPDMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39320,11 +39304,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPDMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPD,
+ name: "VPCMPDMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39337,11 +39320,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPDMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPD,
+ name: "VPCMPDMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39369,11 +39351,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPQMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPQ,
+ name: "VPCMPQMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39386,11 +39367,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPQMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPQ,
+ name: "VPCMPQMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39403,11 +39383,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPQMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPQ,
+ name: "VPCMPQMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39435,11 +39414,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUBMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUB,
+ name: "VPCMPUBMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUB,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39452,11 +39430,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUBMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUB,
+ name: "VPCMPUBMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUB,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39469,11 +39446,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUBMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUB,
+ name: "VPCMPUBMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUB,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39501,11 +39477,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUDMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUD,
+ name: "VPCMPUDMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39518,11 +39493,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUDMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUD,
+ name: "VPCMPUDMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39535,11 +39509,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUDMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUD,
+ name: "VPCMPUDMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39567,11 +39540,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUQMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUQ,
+ name: "VPCMPUQMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39584,11 +39556,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUQMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUQ,
+ name: "VPCMPUQMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39601,11 +39572,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUQMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUQ,
+ name: "VPCMPUQMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39633,11 +39603,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUWMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUW,
+ name: "VPCMPUWMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39650,11 +39619,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUWMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUW,
+ name: "VPCMPUWMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39667,11 +39635,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUWMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUW,
+ name: "VPCMPUWMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39699,11 +39666,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPWMasked128",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPW,
+ name: "VPCMPWMasked128",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPW,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39716,11 +39682,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPWMasked256",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPW,
+ name: "VPCMPWMasked256",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPW,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -39733,11 +39698,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPWMasked512",
- auxType: auxUInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPW,
+ name: "VPCMPWMasked512",
+ auxType: auxUInt8,
+ argLen: 3,
+ asm: x86.AVPCMPW,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index a6fc4bfbaf..99956c56a0 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -28757,26 +28757,23 @@ func rewriteValueAMD64_OpAMD64VCMPPD512(v *Value) bool {
// result: (VCMPPD512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPD512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg3(x, ptr, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPD512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg3(x, ptr, mem)
+ return true
}
return false
}
@@ -28789,27 +28786,24 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked128(v *Value) bool {
// result: (VCMPPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload128 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload128 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPDMasked128load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -28822,27 +28816,24 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked256(v *Value) bool {
// result: (VCMPPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload256 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload256 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPDMasked256load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -28855,27 +28846,24 @@ func rewriteValueAMD64_OpAMD64VCMPPDMasked512(v *Value) bool {
// result: (VCMPPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPDMasked512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -28887,26 +28875,23 @@ func rewriteValueAMD64_OpAMD64VCMPPS512(v *Value) bool {
// result: (VCMPPS512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPS512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg3(x, ptr, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPS512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg3(x, ptr, mem)
+ return true
}
return false
}
@@ -28919,27 +28904,24 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked128(v *Value) bool {
// result: (VCMPPSMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload128 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPSMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload128 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPSMasked128load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -28952,27 +28934,24 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked256(v *Value) bool {
// result: (VCMPPSMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload256 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPSMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload256 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPSMasked256load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -28985,27 +28964,24 @@ func rewriteValueAMD64_OpAMD64VCMPPSMasked512(v *Value) bool {
// result: (VCMPPSMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VCMPPSMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VCMPPSMasked512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52600,27 +52576,24 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked128(v *Value) bool {
// result: (VPCMPDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload128 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload128 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPDMasked128load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52633,27 +52606,24 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked256(v *Value) bool {
// result: (VPCMPDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload256 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload256 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPDMasked256load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52666,27 +52636,24 @@ func rewriteValueAMD64_OpAMD64VPCMPDMasked512(v *Value) bool {
// result: (VPCMPDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPDMasked512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52841,27 +52808,24 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked128(v *Value) bool {
// result: (VPCMPQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload128 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload128 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPQMasked128load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52874,27 +52838,24 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked256(v *Value) bool {
// result: (VPCMPQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload256 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload256 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPQMasked256load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52907,27 +52868,24 @@ func rewriteValueAMD64_OpAMD64VPCMPQMasked512(v *Value) bool {
// result: (VPCMPQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPQMasked512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -52968,27 +52926,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked128(v *Value) bool {
// result: (VPCMPUDMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload128 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPUDMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload128 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPUDMasked128load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -53001,27 +52956,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked256(v *Value) bool {
// result: (VPCMPUDMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload256 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPUDMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload256 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPUDMasked256load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -53034,27 +52986,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUDMasked512(v *Value) bool {
// result: (VPCMPUDMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPUDMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPUDMasked512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -53095,27 +53044,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked128(v *Value) bool {
// result: (VPCMPUQMasked128load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload128 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPUQMasked128load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload128 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPUQMasked128load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -53128,27 +53074,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked256(v *Value) bool {
// result: (VPCMPUQMasked256load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload256 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPUQMasked256load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload256 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPUQMasked256load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
@@ -53161,27 +53104,24 @@ func rewriteValueAMD64_OpAMD64VPCMPUQMasked512(v *Value) bool {
// result: (VPCMPUQMasked512load {sym} [makeValAndOff(int32(int8(c)),off)] x ptr mask mem)
for {
c := auxIntToUint8(v.AuxInt)
- for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
- x := v_0
- l := v_1
- if l.Op != OpAMD64VMOVDQUload512 {
- continue
- }
- off := auxIntToInt32(l.AuxInt)
- sym := auxToSym(l.Aux)
- mem := l.Args[1]
- ptr := l.Args[0]
- mask := v_2
- if !(canMergeLoad(v, l) && clobber(l)) {
- continue
- }
- v.reset(OpAMD64VPCMPUQMasked512load)
- v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
- v.Aux = symToAux(sym)
- v.AddArg4(x, ptr, mask, mem)
- return true
+ x := v_0
+ l := v_1
+ if l.Op != OpAMD64VMOVDQUload512 {
+ break
}
- break
+ off := auxIntToInt32(l.AuxInt)
+ sym := auxToSym(l.Aux)
+ mem := l.Args[1]
+ ptr := l.Args[0]
+ mask := v_2
+ if !(canMergeLoad(v, l) && clobber(l)) {
+ break
+ }
+ v.reset(OpAMD64VPCMPUQMasked512load)
+ v.AuxInt = valAndOffToAuxInt(makeValAndOff(int32(int8(c)), off))
+ v.Aux = symToAux(sym)
+ v.AddArg4(x, ptr, mask, mem)
+ return true
}
return false
}
diff --git a/src/simd/_gen/simdgen/gen_simdMachineOps.go b/src/simd/_gen/simdgen/gen_simdMachineOps.go
index e8cf792d42..3d99dd2a81 100644
--- a/src/simd/_gen/simdgen/gen_simdMachineOps.go
+++ b/src/simd/_gen/simdgen/gen_simdMachineOps.go
@@ -98,6 +98,10 @@ func writeSIMDMachineOps(ops []Operation) *bytes.Buffer {
mOpOrder = append(mOpOrder, asm)
continue
}
+ if !op.Commutative && other.Commutative { // if there's a non-commutative version of the op, it wins.
+ best[asm] = op
+ continue
+ }
// see if "op" is better than "other"
if countOverrides(op.In)+countOverrides(op.Out) < countOverrides(other.In)+countOverrides(other.Out) {
best[asm] = op
diff --git a/src/simd/_gen/simdgen/gen_utility.go b/src/simd/_gen/simdgen/gen_utility.go
index 424ba41466..50dcdd2618 100644
--- a/src/simd/_gen/simdgen/gen_utility.go
+++ b/src/simd/_gen/simdgen/gen_utility.go
@@ -666,6 +666,12 @@ func dedupGodef(ops []Operation) ([]Operation, error) {
if i.MemFeatures == nil && j.MemFeatures != nil {
return 1
}
+ if i.Commutative != j.Commutative {
+ if j.Commutative {
+ return -1
+ }
+ return 1
+ }
// Their order does not matter anymore, at least for now.
return 0
})
diff --git a/src/simd/_gen/simdgen/ops/Compares/categories.yaml b/src/simd/_gen/simdgen/ops/Compares/categories.yaml
index aa07ade27e..4b639d7a34 100644
--- a/src/simd/_gen/simdgen/ops/Compares/categories.yaml
+++ b/src/simd/_gen/simdgen/ops/Compares/categories.yaml
@@ -10,17 +10,17 @@
constImm: 0
commutative: true
documentation: !string |-
- // NAME compares for equality.
+ // NAME returns x equals y, elementwise.
- go: Less
constImm: 1
commutative: false
documentation: !string |-
- // NAME compares for less than.
+ // NAME returns x less-than y, elementwise.
- go: LessEqual
constImm: 2
commutative: false
documentation: !string |-
- // NAME compares for less than or equal.
+ // NAME returns x less-than-or-equals y, elementwise.
- go: IsNan # For float only.
constImm: 3
commutative: true
@@ -30,14 +30,14 @@
constImm: 4
commutative: true
documentation: !string |-
- // NAME compares for inequality.
+ // NAME returns x not-equals y, elementwise.
- go: GreaterEqual
constImm: 13
commutative: false
documentation: !string |-
- // NAME compares for greater than or equal.
+ // NAME returns x greater-than-or-equals y, elementwise.
- go: Greater
constImm: 14
commutative: false
documentation: !string |-
- // NAME compares for greater than.
+ // NAME returns x greater-than y, elementwise.
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go
index 7280e873a0..82774e05ad 100644
--- a/src/simd/ops_amd64.go
+++ b/src/simd/ops_amd64.go
@@ -2022,152 +2022,152 @@ func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16
/* Equal */
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Int8x16) Equal(y Int8x16) Mask8x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Int8x32) Equal(y Int8x32) Mask8x32
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func (x Int8x64) Equal(y Int8x64) Mask8x64
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Int16x8) Equal(y Int16x8) Mask16x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Int16x16) Equal(y Int16x16) Mask16x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func (x Int16x32) Equal(y Int16x32) Mask16x32
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Int32x4) Equal(y Int32x4) Mask32x4
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Int32x8) Equal(y Int32x8) Mask32x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func (x Int32x16) Equal(y Int32x16) Mask32x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Int64x2) Equal(y Int64x2) Mask64x2
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Int64x4) Equal(y Int64x4) Mask64x4
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func (x Int64x8) Equal(y Int64x8) Mask64x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX
func (x Uint8x16) Equal(y Uint8x16) Mask8x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX2
func (x Uint8x32) Equal(y Uint8x32) Mask8x32
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQB, CPU Feature: AVX512
func (x Uint8x64) Equal(y Uint8x64) Mask8x64
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX
func (x Uint16x8) Equal(y Uint16x8) Mask16x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX2
func (x Uint16x16) Equal(y Uint16x16) Mask16x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQW, CPU Feature: AVX512
func (x Uint16x32) Equal(y Uint16x32) Mask16x32
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX
func (x Uint32x4) Equal(y Uint32x4) Mask32x4
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX2
func (x Uint32x8) Equal(y Uint32x8) Mask32x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQD, CPU Feature: AVX512
func (x Uint32x16) Equal(y Uint32x16) Mask32x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX
func (x Uint64x2) Equal(y Uint64x2) Mask64x2
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX2
func (x Uint64x4) Equal(y Uint64x4) Mask64x4
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VPCMPEQQ, CPU Feature: AVX512
func (x Uint64x8) Equal(y Uint64x8) Mask64x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Equal(y Float32x4) Mask32x4
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Equal(y Float32x8) Mask32x8
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Equal(y Float32x16) Mask32x16
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Equal(y Float64x2) Mask64x2
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Equal(y Float64x4) Mask64x4
-// Equal compares for equality.
+// Equal returns x equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Equal(y Float64x8) Mask64x8
@@ -3081,184 +3081,184 @@ func (x Uint64x8) GetLo() Uint64x4
/* Greater */
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX
func (x Int8x16) Greater(y Int8x16) Mask8x16
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX2
func (x Int8x32) Greater(y Int8x32) Mask8x32
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTB, CPU Feature: AVX512
func (x Int8x64) Greater(y Int8x64) Mask8x64
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX
func (x Int16x8) Greater(y Int16x8) Mask16x8
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX2
func (x Int16x16) Greater(y Int16x16) Mask16x16
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTW, CPU Feature: AVX512
func (x Int16x32) Greater(y Int16x32) Mask16x32
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX
func (x Int32x4) Greater(y Int32x4) Mask32x4
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX2
func (x Int32x8) Greater(y Int32x8) Mask32x8
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTD, CPU Feature: AVX512
func (x Int32x16) Greater(y Int32x16) Mask32x16
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX
func (x Int64x2) Greater(y Int64x2) Mask64x2
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX2
func (x Int64x4) Greater(y Int64x4) Mask64x4
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPGTQ, CPU Feature: AVX512
func (x Int64x8) Greater(y Int64x8) Mask64x8
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Greater(y Float32x4) Mask32x4
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Greater(y Float32x8) Mask32x8
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Greater(y Float32x16) Mask32x16
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Greater(y Float64x2) Mask64x2
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Greater(y Float64x4) Mask64x4
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Greater(y Float64x8) Mask64x8
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Greater(y Uint8x64) Mask8x64
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Greater(y Uint16x32) Mask16x32
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Greater(y Uint32x16) Mask32x16
-// Greater compares for greater than.
+// Greater returns x greater-than y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) Greater(y Uint64x8) Mask64x8
/* GreaterEqual */
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16
-// GreaterEqual compares for greater than or equal.
+// GreaterEqual returns x greater-than-or-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8
@@ -3547,144 +3547,144 @@ func (x Uint64x8) LeadingZeros() Uint64x8
/* Less */
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) Less(y Float32x4) Mask32x4
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) Less(y Float32x8) Mask32x8
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) Less(y Float32x16) Mask32x16
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) Less(y Float64x2) Mask64x2
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) Less(y Float64x4) Mask64x4
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) Less(y Float64x8) Mask64x8
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) Less(y Int8x64) Mask8x64
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) Less(y Int16x32) Mask16x32
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) Less(y Int32x16) Mask32x16
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) Less(y Int64x8) Mask64x8
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) Less(y Uint8x64) Mask8x64
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) Less(y Uint16x32) Mask16x32
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) Less(y Uint32x16) Mask32x16
-// Less compares for less than.
+// Less returns x less-than y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) Less(y Uint64x8) Mask64x8
/* LessEqual */
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) LessEqual(y Float32x4) Mask32x4
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) LessEqual(y Float32x8) Mask32x8
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) LessEqual(y Float32x16) Mask32x16
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) LessEqual(y Float64x2) Mask64x2
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) LessEqual(y Float64x4) Mask64x4
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) LessEqual(y Float64x8) Mask64x8
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) LessEqual(y Int8x64) Mask8x64
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) LessEqual(y Int16x32) Mask16x32
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) LessEqual(y Int32x16) Mask32x16
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) LessEqual(y Int64x8) Mask64x8
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16
-// LessEqual compares for less than or equal.
+// LessEqual returns x less-than-or-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8
@@ -4271,72 +4271,72 @@ func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8
/* NotEqual */
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x4) NotEqual(y Float32x4) Mask32x4
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX
func (x Float32x8) NotEqual(y Float32x8) Mask32x8
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPS, CPU Feature: AVX512
func (x Float32x16) NotEqual(y Float32x16) Mask32x16
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x2) NotEqual(y Float64x2) Mask64x2
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX
func (x Float64x4) NotEqual(y Float64x4) Mask64x4
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VCMPPD, CPU Feature: AVX512
func (x Float64x8) NotEqual(y Float64x8) Mask64x8
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPB, CPU Feature: AVX512
func (x Int8x64) NotEqual(y Int8x64) Mask8x64
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPW, CPU Feature: AVX512
func (x Int16x32) NotEqual(y Int16x32) Mask16x32
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPD, CPU Feature: AVX512
func (x Int32x16) NotEqual(y Int32x16) Mask32x16
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPQ, CPU Feature: AVX512
func (x Int64x8) NotEqual(y Int64x8) Mask64x8
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUB, CPU Feature: AVX512
func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUW, CPU Feature: AVX512
func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUD, CPU Feature: AVX512
func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16
-// NotEqual compares for inequality.
+// NotEqual returns x not-equals y, elementwise.
//
// Asm: VPCMPUQ, CPU Feature: AVX512
func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8