aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2025-07-23 07:37:14 +0000
committerJunyang Shao <shaojunyang@google.com>2025-07-31 15:44:39 -0700
commit6f7a1164e797f694c535ebf5f2c9722845a732cd (patch)
tree568f0485bd9727cef24c2b2735665a1a34df1cad /src/cmd/compile
parent41054cdb1cd9f2a7400668d385ec1a030d90389c (diff)
downloadgo-6f7a1164e797f694c535ebf5f2c9722845a732cd.tar.xz
[dev.simd] cmd/compile, simd: support store to bits for mask
This CL is partially generated by CL 689775. Change-Id: I0c36fd2a44706c88db1a1d5ea4a6d0b9f891d85f Reviewed-on: https://go-review.googlesource.com/c/go/+/689795 Reviewed-by: David Chase <drchase@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/compile')
-rw-r--r--src/cmd/compile/internal/amd64/simdssa.go32
-rw-r--r--src/cmd/compile/internal/amd64/ssa.go4
-rw-r--r--src/cmd/compile/internal/ssa/_gen/AMD64.rules16
-rw-r--r--src/cmd/compile/internal/ssa/_gen/AMD64Ops.go4
-rw-r--r--src/cmd/compile/internal/ssa/_gen/genericOps.go13
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64.rules28
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go44
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdgenericOps.go34
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go635
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go316
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics.go17
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go36
12 files changed, 856 insertions, 323 deletions
diff --git a/src/cmd/compile/internal/amd64/simdssa.go b/src/cmd/compile/internal/amd64/simdssa.go
index 67179ef12d..f374cd25d0 100644
--- a/src/cmd/compile/internal/amd64/simdssa.go
+++ b/src/cmd/compile/internal/amd64/simdssa.go
@@ -24,8 +24,8 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPABSQ128,
ssa.OpAMD64VPABSQ256,
ssa.OpAMD64VPABSQ512,
- ssa.OpAMD64VRCP14PS128,
- ssa.OpAMD64VRCP14PS256,
+ ssa.OpAMD64VRCPPS128,
+ ssa.OpAMD64VRCPPS256,
ssa.OpAMD64VRCP14PS512,
ssa.OpAMD64VRCP14PD128,
ssa.OpAMD64VRCP14PD256,
@@ -335,6 +335,16 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
ssa.OpAMD64VPXORQ512:
p = simdV21(s, v)
+ case ssa.OpAMD64VPCMPEQB512,
+ ssa.OpAMD64VPCMPEQW512,
+ ssa.OpAMD64VPCMPEQD512,
+ ssa.OpAMD64VPCMPEQQ512,
+ ssa.OpAMD64VPCMPGTB512,
+ ssa.OpAMD64VPCMPGTW512,
+ ssa.OpAMD64VPCMPGTD512,
+ ssa.OpAMD64VPCMPGTQ512:
+ p = simdV2k(s, v)
+
case ssa.OpAMD64VADDPSMasked128,
ssa.OpAMD64VADDPSMasked256,
ssa.OpAMD64VADDPSMasked512,
@@ -733,30 +743,30 @@ func ssaGenSIMDValue(s *ssagen.State, v *ssa.Value) bool {
case ssa.OpAMD64VCMPPS512,
ssa.OpAMD64VCMPPD512,
- ssa.OpAMD64VPCMPB512,
- ssa.OpAMD64VPCMPW512,
- ssa.OpAMD64VPCMPD512,
- ssa.OpAMD64VPCMPQ512,
- ssa.OpAMD64VPCMPUB512,
- ssa.OpAMD64VPCMPUW512,
- ssa.OpAMD64VPCMPUD512,
- ssa.OpAMD64VPCMPUQ512,
ssa.OpAMD64VPCMPUB128,
ssa.OpAMD64VPCMPUB256,
+ ssa.OpAMD64VPCMPUB512,
ssa.OpAMD64VPCMPUW128,
ssa.OpAMD64VPCMPUW256,
+ ssa.OpAMD64VPCMPUW512,
ssa.OpAMD64VPCMPUD128,
ssa.OpAMD64VPCMPUD256,
+ ssa.OpAMD64VPCMPUD512,
ssa.OpAMD64VPCMPUQ128,
ssa.OpAMD64VPCMPUQ256,
+ ssa.OpAMD64VPCMPUQ512,
ssa.OpAMD64VPCMPB128,
ssa.OpAMD64VPCMPB256,
+ ssa.OpAMD64VPCMPB512,
ssa.OpAMD64VPCMPW128,
ssa.OpAMD64VPCMPW256,
+ ssa.OpAMD64VPCMPW512,
ssa.OpAMD64VPCMPD128,
ssa.OpAMD64VPCMPD256,
+ ssa.OpAMD64VPCMPD512,
ssa.OpAMD64VPCMPQ128,
- ssa.OpAMD64VPCMPQ256:
+ ssa.OpAMD64VPCMPQ256,
+ ssa.OpAMD64VPCMPQ512:
p = simdV2kImm8(s, v)
case ssa.OpAMD64VCMPPSMasked128,
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 0fafd69f54..7338c16cda 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -1468,10 +1468,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
ssagen.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = simdOrMaskReg(v)
- case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512:
+ case ssa.OpAMD64VMOVDQUstore128, ssa.OpAMD64VMOVDQUstore256, ssa.OpAMD64VMOVDQUstore512, ssa.OpAMD64KMOVQstore:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
- p.From.Reg = simdReg(v.Args[1])
+ p.From.Reg = simdOrMaskReg(v.Args[1])
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
ssagen.AddAux(&p.To, v)
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index bb7513795d..5a21c95df9 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1698,6 +1698,22 @@
(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
+(StoreMask8x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
+(StoreMask8x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
+(StoreMask8x64 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
+
+(StoreMask16x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
+(StoreMask16x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
+(StoreMask16x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
+
+(StoreMask32x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
+(StoreMask32x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
+(StoreMask32x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
+
+(StoreMask64x2 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
+(StoreMask64x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
+(StoreMask64x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
+
(Load <t> ptr mem) && t.Size() == 16 => (VMOVDQUload128 ptr mem)
(Store {t} ptr val mem) && t.Size() == 16 => (VMOVDQUstore128 ptr val mem)
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
index ec335f67f8..cd4b5b2a06 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@@ -234,7 +234,8 @@ func init() {
wfpw = regInfo{inputs: []regMask{w, fp}, outputs: wonly}
wfpkw = regInfo{inputs: []regMask{w, fp, mask}, outputs: wonly}
- kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
+ kload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: maskonly}
+ kstore = regInfo{inputs: []regMask{gpspsb, mask, 0}}
prefreg = regInfo{inputs: []regMask{gpspsbg}}
)
@@ -1318,6 +1319,7 @@ func init() {
{name: "VZEROALL", argLength: 0, asm: "VZEROALL"},
{name: "KMOVQload", argLength: 2, reg: kload, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Read"},
+ {name: "KMOVQstore", argLength: 3, reg: kstore, asm: "KMOVQ", aux: "SymOff", faultOnNilArg0: true, symEffect: "Write"},
}
var AMD64blocks = []blockData{
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go
index 6257396a6f..716fe9b881 100644
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -678,6 +678,19 @@ var genericOps = []opData{
{name: "LoadMask64x2", argLength: 2}, // arg0 = ptr, arg1 = mem
{name: "LoadMask64x4", argLength: 2}, // arg0 = ptr, arg1 = mem
{name: "LoadMask64x8", argLength: 2}, // arg0 = ptr, arg1 = mem
+
+ {name: "StoreMask8x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask8x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask8x64", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask16x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask16x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask16x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask32x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask32x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask32x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask64x2", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask64x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "StoreMask64x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
}
// kind controls successors implicit exit
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index e5f17bdb1b..fb153acf66 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -152,8 +152,8 @@
(AndNotMaskedUint64x2 x y mask) => (VPANDNQMasked128 x y (VPMOVVec64x2ToM <types.TypeMask> mask))
(AndNotMaskedUint64x4 x y mask) => (VPANDNQMasked256 x y (VPMOVVec64x4ToM <types.TypeMask> mask))
(AndNotMaskedUint64x8 x y mask) => (VPANDNQMasked512 x y (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ApproximateReciprocalFloat32x4 ...) => (VRCP14PS128 ...)
-(ApproximateReciprocalFloat32x8 ...) => (VRCP14PS256 ...)
+(ApproximateReciprocalFloat32x4 ...) => (VRCPPS128 ...)
+(ApproximateReciprocalFloat32x8 ...) => (VRCPPS256 ...)
(ApproximateReciprocalFloat32x16 ...) => (VRCP14PS512 ...)
(ApproximateReciprocalFloat64x2 ...) => (VRCP14PD128 ...)
(ApproximateReciprocalFloat64x4 ...) => (VRCP14PD256 ...)
@@ -305,28 +305,28 @@
(EqualFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [0] x y))
(EqualInt8x16 ...) => (VPCMPEQB128 ...)
(EqualInt8x32 ...) => (VPCMPEQB256 ...)
-(EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [0] x y))
+(EqualInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPEQB512 x y))
(EqualInt16x8 ...) => (VPCMPEQW128 ...)
(EqualInt16x16 ...) => (VPCMPEQW256 ...)
-(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [0] x y))
+(EqualInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPEQW512 x y))
(EqualInt32x4 ...) => (VPCMPEQD128 ...)
(EqualInt32x8 ...) => (VPCMPEQD256 ...)
-(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [0] x y))
+(EqualInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPEQD512 x y))
(EqualInt64x2 ...) => (VPCMPEQQ128 ...)
(EqualInt64x4 ...) => (VPCMPEQQ256 ...)
-(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [0] x y))
+(EqualInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
(EqualUint8x16 ...) => (VPCMPEQB128 ...)
(EqualUint8x32 ...) => (VPCMPEQB256 ...)
-(EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
+(EqualUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPEQB512 x y))
(EqualUint16x8 ...) => (VPCMPEQW128 ...)
(EqualUint16x16 ...) => (VPCMPEQW256 ...)
-(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPUW512 [0] x y))
+(EqualUint16x32 x y) => (VPMOVMToVec16x32 (VPCMPEQW512 x y))
(EqualUint32x4 ...) => (VPCMPEQD128 ...)
(EqualUint32x8 ...) => (VPCMPEQD256 ...)
-(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPUD512 [0] x y))
+(EqualUint32x16 x y) => (VPMOVMToVec32x16 (VPCMPEQD512 x y))
(EqualUint64x2 ...) => (VPCMPEQQ128 ...)
(EqualUint64x4 ...) => (VPCMPEQQ256 ...)
-(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y))
+(EqualUint64x8 x y) => (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
(EqualMaskedFloat32x4 x y mask) => (VPMOVMToVec32x4 (VCMPPSMasked128 [0] x y (VPMOVVec32x4ToM <types.TypeMask> mask)))
(EqualMaskedFloat32x8 x y mask) => (VPMOVMToVec32x8 (VCMPPSMasked256 [0] x y (VPMOVVec32x8ToM <types.TypeMask> mask)))
(EqualMaskedFloat32x16 x y mask) => (VPMOVMToVec32x16 (VCMPPSMasked512 [0] x y (VPMOVVec32x16ToM <types.TypeMask> mask)))
@@ -453,16 +453,16 @@
(GreaterFloat64x8 x y) => (VPMOVMToVec64x8 (VCMPPD512 [14] x y))
(GreaterInt8x16 ...) => (VPCMPGTB128 ...)
(GreaterInt8x32 ...) => (VPCMPGTB256 ...)
-(GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPB512 [14] x y))
+(GreaterInt8x64 x y) => (VPMOVMToVec8x64 (VPCMPGTB512 x y))
(GreaterInt16x8 ...) => (VPCMPGTW128 ...)
(GreaterInt16x16 ...) => (VPCMPGTW256 ...)
-(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPW512 [14] x y))
+(GreaterInt16x32 x y) => (VPMOVMToVec16x32 (VPCMPGTW512 x y))
(GreaterInt32x4 ...) => (VPCMPGTD128 ...)
(GreaterInt32x8 ...) => (VPCMPGTD256 ...)
-(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPD512 [14] x y))
+(GreaterInt32x16 x y) => (VPMOVMToVec32x16 (VPCMPGTD512 x y))
(GreaterInt64x2 ...) => (VPCMPGTQ128 ...)
(GreaterInt64x4 ...) => (VPCMPGTQ256 ...)
-(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPQ512 [14] x y))
+(GreaterInt64x8 x y) => (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
(GreaterUint8x16 x y) => (VPMOVMToVec8x16 (VPCMPUB128 [14] x y))
(GreaterUint8x32 x y) => (VPMOVMToVec8x32 (VPCMPUB256 [14] x y))
(GreaterUint8x64 x y) => (VPMOVMToVec8x64 (VPCMPUB512 [14] x y))
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
index a7a3c9715c..5a51e4400a 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go
@@ -33,7 +33,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VADDPS128", argLength: 2, reg: v21, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VADDPSMasked128", argLength: 3, reg: w2kw, asm: "VADDPS", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VADDSUBPS128", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec128", resultInArg0: false},
- {name: "VRCP14PS128", argLength: 1, reg: w11, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
+ {name: "VRCPPS128", argLength: 1, reg: v11, asm: "VRCPPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRCP14PSMasked128", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRSQRTPS128", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VRSQRT14PSMasked128", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -63,7 +63,7 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VADDPS256", argLength: 2, reg: v21, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VADDPSMasked256", argLength: 3, reg: w2kw, asm: "VADDPS", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VADDSUBPS256", argLength: 2, reg: v21, asm: "VADDSUBPS", commutative: false, typ: "Vec256", resultInArg0: false},
- {name: "VRCP14PS256", argLength: 1, reg: w11, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
+ {name: "VRCPPS256", argLength: 1, reg: v11, asm: "VRCPPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VRCP14PSMasked256", argLength: 2, reg: wkw, asm: "VRCP14PS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VRSQRTPS256", argLength: 1, reg: v11, asm: "VRSQRTPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VRSQRT14PSMasked256", argLength: 2, reg: wkw, asm: "VRSQRT14PS", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -224,6 +224,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPADDW512", argLength: 2, reg: w21, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDWMasked512", argLength: 3, reg: w2kw, asm: "VPADDW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPCOMPRESSWMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSW", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPCMPEQW512", argLength: 2, reg: w2k, asm: "VPCMPEQW", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPGTW512", argLength: 2, reg: w2k, asm: "VPCMPGTW", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPMAXSW512", argLength: 2, reg: w21, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXSWMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSW", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINSW512", argLength: 2, reg: w21, asm: "VPMINSW", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -305,6 +307,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPANDND512", argLength: 2, reg: w21, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPANDNDMasked512", argLength: 3, reg: w2kw, asm: "VPANDND", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCOMPRESSDMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPCMPEQD512", argLength: 2, reg: w2k, asm: "VPCMPEQD", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPGTD512", argLength: 2, reg: w2k, asm: "VPCMPGTD", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPMAXSD512", argLength: 2, reg: w21, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXSDMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSD", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINSD512", argLength: 2, reg: w21, asm: "VPMINSD", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -526,6 +530,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPANDNQ512", argLength: 2, reg: w21, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPANDNQMasked512", argLength: 3, reg: w2kw, asm: "VPANDNQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPCOMPRESSQMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSQ", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPCMPEQQ512", argLength: 2, reg: w2k, asm: "VPCMPEQQ", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPGTQ512", argLength: 2, reg: w2k, asm: "VPCMPGTQ", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPMAXSQ512", argLength: 2, reg: w21, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXSQMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSQ", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINSQ512", argLength: 2, reg: w21, asm: "VPMINSQ", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -611,6 +617,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPADDB512", argLength: 2, reg: w21, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPADDBMasked512", argLength: 3, reg: w2kw, asm: "VPADDB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPCOMPRESSBMasked512", argLength: 2, reg: wkw, asm: "VPCOMPRESSB", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPCMPEQB512", argLength: 2, reg: w2k, asm: "VPCMPEQB", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPGTB512", argLength: 2, reg: w2k, asm: "VPCMPGTB", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPMAXSB512", argLength: 2, reg: w21, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMAXSBMasked512", argLength: 3, reg: w2kw, asm: "VPMAXSB", commutative: true, typ: "Vec512", resultInArg0: false},
{name: "VPMINSB512", argLength: 2, reg: w21, asm: "VPMINSB", commutative: true, typ: "Vec512", resultInArg0: false},
@@ -692,10 +700,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINUD128", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMINUDMasked128", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec128", resultInArg0: false},
{name: "VPMULUDQ128", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec128", resultInArg0: false},
- {name: "VPERMI2D128", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPERMI2PS128", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
- {name: "VPERMI2PSMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2D128", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPERMI2DMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec128", resultInArg0: true},
+ {name: "VPERMI2PSMasked128", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec128", resultInArg0: true},
{name: "VPSRLD128", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLDMasked128", argLength: 3, reg: wfpkw, asm: "VPSRLD", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSRLVD128", argLength: 2, reg: v21, asm: "VPSRLVD", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -705,12 +713,12 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINUD256", argLength: 2, reg: v21, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUDMasked256", argLength: 3, reg: w2kw, asm: "VPMINUD", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULUDQ256", argLength: 2, reg: v21, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMD256", argLength: 2, reg: v21, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
- {name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMPS256", argLength: 2, reg: v21, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMI2PS256", argLength: 3, reg: w31, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
- {name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2D256", argLength: 3, reg: w31, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPERMI2PSMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PS", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2DMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2D", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPERMPSMasked256", argLength: 3, reg: w2kw, asm: "VPERMPS", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMDMasked256", argLength: 3, reg: w2kw, asm: "VPERMD", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSRLD256", argLength: 2, reg: vfpv, asm: "VPSRLD", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -735,10 +743,10 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPMINUQ256", argLength: 2, reg: w21, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMINUQMasked256", argLength: 3, reg: w2kw, asm: "VPMINUQ", commutative: true, typ: "Vec256", resultInArg0: false},
{name: "VPMULUDQMasked256", argLength: 3, reg: w2kw, asm: "VPMULUDQ", commutative: true, typ: "Vec256", resultInArg0: false},
- {name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMPD256", argLength: 2, reg: w21, asm: "VPERMPD", commutative: false, typ: "Vec256", resultInArg0: false},
- {name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMQ256", argLength: 2, reg: w21, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPERMI2PD256", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
+ {name: "VPERMI2Q256", argLength: 3, reg: w31, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPERMI2PDMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPERMI2QMasked256", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec256", resultInArg0: true},
{name: "VPERMQMasked256", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec256", resultInArg0: false},
@@ -759,8 +767,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPERMI2PD512", argLength: 3, reg: w31, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPERMI2QMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2Q", commutative: false, typ: "Vec512", resultInArg0: true},
{name: "VPERMI2PDMasked512", argLength: 4, reg: w3kw, asm: "VPERMI2PD", commutative: false, typ: "Vec512", resultInArg0: true},
- {name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPERMPDMasked512", argLength: 3, reg: w2kw, asm: "VPERMPD", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPERMQMasked512", argLength: 3, reg: w2kw, asm: "VPERMQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLQ512", argLength: 2, reg: wfpw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLQMasked512", argLength: 3, reg: wfpkw, asm: "VPSRLQ", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSRLVQ512", argLength: 2, reg: w21, asm: "VPSRLVQ", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -858,8 +866,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHLDWMasked256", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSHRDW256", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSHRDWMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
- {name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPW512", argLength: 2, reg: w2k, asm: "VPCMPW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPSHLDW512", argLength: 2, reg: w21, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSHLDWMasked512", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPSHRDW512", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -872,8 +880,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHLDWMasked128", argLength: 3, reg: w2kw, asm: "VPSHLDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSHRDW128", argLength: 2, reg: w21, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPSHRDWMasked128", argLength: 3, reg: w2kw, asm: "VPSHRDW", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
- {name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPD512", argLength: 2, reg: w2k, asm: "VPCMPD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPROLD512", argLength: 1, reg: w11, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPROLDMasked512", argLength: 2, reg: wkw, asm: "VPROLD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPRORD512", argLength: 1, reg: w11, asm: "VPRORD", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -926,8 +934,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPSHLDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHLDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSHRDQ256", argLength: 2, reg: w21, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPSHRDQMasked256", argLength: 3, reg: w2kw, asm: "VPSHRDQ", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
- {name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPQ512", argLength: 2, reg: w2k, asm: "VPCMPQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPROLQ512", argLength: 1, reg: w11, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPROLQMasked512", argLength: 2, reg: wkw, asm: "VPROLQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VPRORQ512", argLength: 1, reg: w11, asm: "VPRORQ", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
@@ -944,16 +952,16 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VEXTRACTI128128", argLength: 1, reg: v11, asm: "VEXTRACTI128", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VPCMPB256", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VINSERTI128256", argLength: 2, reg: v21, asm: "VINSERTI128", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
- {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPB512", argLength: 2, reg: w2k, asm: "VPCMPB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUWMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW256", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUWMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUW512", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUWMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUW", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUW128", argLength: 2, reg: w2k, asm: "VPCMPUW", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUDMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUD512", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUDMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUD128", argLength: 2, reg: w2k, asm: "VPCMPUD", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUDMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUD", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
@@ -962,8 +970,8 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VPCMPUQ128", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQMasked256", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQ256", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUQMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUQ", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
+ {name: "VPCMPUQ512", argLength: 2, reg: w2k, asm: "VPCMPUQ", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
{name: "VPCMPUBMasked128", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VGF2P8AFFINEQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB128", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec128", resultInArg0: false},
@@ -976,11 +984,11 @@ func simdAMD64Ops(v11, v21, v2k, vkv, v2kv, v2kk, v31, v3kv, vgpv, vgp, vfpv, vf
{name: "VGF2P8AFFINEINVQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked256", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec256", resultInArg0: false},
{name: "VPCMPUB256", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
- {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VPCMPUBMasked512", argLength: 3, reg: w2kk, asm: "VPCMPUB", aux: "Int8", commutative: true, typ: "Mask", resultInArg0: false},
{name: "VGF2P8AFFINEQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VGF2P8AFFINEINVQB512", argLength: 2, reg: w21, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VGF2P8AFFINEINVQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEINVQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
{name: "VGF2P8AFFINEQBMasked512", argLength: 3, reg: w2kw, asm: "VGF2P8AFFINEQB", aux: "Int8", commutative: false, typ: "Vec512", resultInArg0: false},
+ {name: "VPCMPUB512", argLength: 2, reg: w2k, asm: "VPCMPUB", aux: "Int8", commutative: false, typ: "Mask", resultInArg0: false},
}
}
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index c8fe1e9eee..7b016b517d 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -912,10 +912,10 @@ func simdGenericOps() []opData {
{name: "PermuteUint16x16", argLength: 2, commutative: false},
{name: "Permute2Uint16x16", argLength: 3, commutative: false},
{name: "Permute2Int16x16", argLength: 3, commutative: false},
- {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
{name: "Permute2MaskedUint16x16", argLength: 4, commutative: false},
- {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
+ {name: "Permute2MaskedInt16x16", argLength: 4, commutative: false},
{name: "PermuteMaskedInt16x16", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint16x16", argLength: 3, commutative: false},
{name: "PopCountUint16x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x16", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x16", argLength: 2, commutative: true},
@@ -966,8 +966,8 @@ func simdGenericOps() []opData {
{name: "Permute2Int16x32", argLength: 3, commutative: false},
{name: "Permute2MaskedUint16x32", argLength: 4, commutative: false},
{name: "Permute2MaskedInt16x32", argLength: 4, commutative: false},
- {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
{name: "PermuteMaskedInt16x32", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint16x32", argLength: 3, commutative: false},
{name: "PopCountUint16x32", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x32", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x32", argLength: 2, commutative: true},
@@ -1018,12 +1018,12 @@ func simdGenericOps() []opData {
{name: "PairwiseSubUint16x8", argLength: 2, commutative: false},
{name: "PermuteInt16x8", argLength: 2, commutative: false},
{name: "PermuteUint16x8", argLength: 2, commutative: false},
- {name: "Permute2Int16x8", argLength: 3, commutative: false},
{name: "Permute2Uint16x8", argLength: 3, commutative: false},
+ {name: "Permute2Int16x8", argLength: 3, commutative: false},
{name: "Permute2MaskedInt16x8", argLength: 4, commutative: false},
{name: "Permute2MaskedUint16x8", argLength: 4, commutative: false},
- {name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
{name: "PermuteMaskedInt16x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint16x8", argLength: 3, commutative: false},
{name: "PopCountUint16x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint16x8", argLength: 2, commutative: false},
{name: "SaturatedAddUint16x8", argLength: 2, commutative: true},
@@ -1070,17 +1070,17 @@ func simdGenericOps() []opData {
{name: "NotEqualMaskedUint32x16", argLength: 3, commutative: true},
{name: "OrUint32x16", argLength: 2, commutative: true},
{name: "OrMaskedUint32x16", argLength: 3, commutative: true},
- {name: "PermuteFloat32x16", argLength: 2, commutative: false},
{name: "PermuteInt32x16", argLength: 2, commutative: false},
+ {name: "PermuteFloat32x16", argLength: 2, commutative: false},
{name: "PermuteUint32x16", argLength: 2, commutative: false},
{name: "Permute2Uint32x16", argLength: 3, commutative: false},
{name: "Permute2Float32x16", argLength: 3, commutative: false},
{name: "Permute2Int32x16", argLength: 3, commutative: false},
+ {name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
{name: "Permute2MaskedInt32x16", argLength: 4, commutative: false},
{name: "Permute2MaskedFloat32x16", argLength: 4, commutative: false},
- {name: "Permute2MaskedUint32x16", argLength: 4, commutative: false},
- {name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
{name: "PermuteMaskedFloat32x16", argLength: 3, commutative: false},
+ {name: "PermuteMaskedInt32x16", argLength: 3, commutative: false},
{name: "PermuteMaskedUint32x16", argLength: 3, commutative: false},
{name: "PopCountUint32x16", argLength: 1, commutative: false},
{name: "PopCountMaskedUint32x16", argLength: 2, commutative: false},
@@ -1307,15 +1307,15 @@ func simdGenericOps() []opData {
{name: "PermuteUint64x4", argLength: 2, commutative: false},
{name: "PermuteInt64x4", argLength: 2, commutative: false},
{name: "PermuteFloat64x4", argLength: 2, commutative: false},
- {name: "Permute2Float64x4", argLength: 3, commutative: false},
- {name: "Permute2Int64x4", argLength: 3, commutative: false},
{name: "Permute2Uint64x4", argLength: 3, commutative: false},
- {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
+ {name: "Permute2Int64x4", argLength: 3, commutative: false},
+ {name: "Permute2Float64x4", argLength: 3, commutative: false},
{name: "Permute2MaskedUint64x4", argLength: 4, commutative: false},
+ {name: "Permute2MaskedFloat64x4", argLength: 4, commutative: false},
{name: "Permute2MaskedInt64x4", argLength: 4, commutative: false},
+ {name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
{name: "PermuteMaskedFloat64x4", argLength: 3, commutative: false},
{name: "PermuteMaskedInt64x4", argLength: 3, commutative: false},
- {name: "PermuteMaskedUint64x4", argLength: 3, commutative: false},
{name: "PopCountUint64x4", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x4", argLength: 2, commutative: false},
{name: "RotateLeftUint64x4", argLength: 2, commutative: false},
@@ -1365,18 +1365,18 @@ func simdGenericOps() []opData {
{name: "NotEqualMaskedUint64x8", argLength: 3, commutative: true},
{name: "OrUint64x8", argLength: 2, commutative: true},
{name: "OrMaskedUint64x8", argLength: 3, commutative: true},
+ {name: "PermuteUint64x8", argLength: 2, commutative: false},
{name: "PermuteFloat64x8", argLength: 2, commutative: false},
{name: "PermuteInt64x8", argLength: 2, commutative: false},
- {name: "PermuteUint64x8", argLength: 2, commutative: false},
- {name: "Permute2Int64x8", argLength: 3, commutative: false},
{name: "Permute2Float64x8", argLength: 3, commutative: false},
{name: "Permute2Uint64x8", argLength: 3, commutative: false},
+ {name: "Permute2Int64x8", argLength: 3, commutative: false},
+ {name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
{name: "Permute2MaskedUint64x8", argLength: 4, commutative: false},
{name: "Permute2MaskedInt64x8", argLength: 4, commutative: false},
- {name: "Permute2MaskedFloat64x8", argLength: 4, commutative: false},
- {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
- {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
{name: "PermuteMaskedInt64x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedFloat64x8", argLength: 3, commutative: false},
+ {name: "PermuteMaskedUint64x8", argLength: 3, commutative: false},
{name: "PopCountUint64x8", argLength: 1, commutative: false},
{name: "PopCountMaskedUint64x8", argLength: 2, commutative: false},
{name: "RotateLeftUint64x8", argLength: 2, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index d69e714082..9db3dbaf57 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -1199,6 +1199,7 @@ const (
OpAMD64VZEROUPPER
OpAMD64VZEROALL
OpAMD64KMOVQload
+ OpAMD64KMOVQstore
OpAMD64VADDPS512
OpAMD64VADDPSMasked512
OpAMD64VRCP14PS512
@@ -1229,7 +1230,7 @@ const (
OpAMD64VADDPS128
OpAMD64VADDPSMasked128
OpAMD64VADDSUBPS128
- OpAMD64VRCP14PS128
+ OpAMD64VRCPPS128
OpAMD64VRCP14PSMasked128
OpAMD64VRSQRTPS128
OpAMD64VRSQRT14PSMasked128
@@ -1259,7 +1260,7 @@ const (
OpAMD64VADDPS256
OpAMD64VADDPSMasked256
OpAMD64VADDSUBPS256
- OpAMD64VRCP14PS256
+ OpAMD64VRCPPS256
OpAMD64VRCP14PSMasked256
OpAMD64VRSQRTPS256
OpAMD64VRSQRT14PSMasked256
@@ -1420,6 +1421,8 @@ const (
OpAMD64VPADDW512
OpAMD64VPADDWMasked512
OpAMD64VPCOMPRESSWMasked512
+ OpAMD64VPCMPEQW512
+ OpAMD64VPCMPGTW512
OpAMD64VPMAXSW512
OpAMD64VPMAXSWMasked512
OpAMD64VPMINSW512
@@ -1501,6 +1504,8 @@ const (
OpAMD64VPANDND512
OpAMD64VPANDNDMasked512
OpAMD64VPCOMPRESSDMasked512
+ OpAMD64VPCMPEQD512
+ OpAMD64VPCMPGTD512
OpAMD64VPMAXSD512
OpAMD64VPMAXSDMasked512
OpAMD64VPMINSD512
@@ -1722,6 +1727,8 @@ const (
OpAMD64VPANDNQ512
OpAMD64VPANDNQMasked512
OpAMD64VPCOMPRESSQMasked512
+ OpAMD64VPCMPEQQ512
+ OpAMD64VPCMPGTQ512
OpAMD64VPMAXSQ512
OpAMD64VPMAXSQMasked512
OpAMD64VPMINSQ512
@@ -1807,6 +1814,8 @@ const (
OpAMD64VPADDB512
OpAMD64VPADDBMasked512
OpAMD64VPCOMPRESSBMasked512
+ OpAMD64VPCMPEQB512
+ OpAMD64VPCMPGTB512
OpAMD64VPMAXSB512
OpAMD64VPMAXSBMasked512
OpAMD64VPMINSB512
@@ -1888,10 +1897,10 @@ const (
OpAMD64VPMINUD128
OpAMD64VPMINUDMasked128
OpAMD64VPMULUDQ128
- OpAMD64VPERMI2D128
OpAMD64VPERMI2PS128
- OpAMD64VPERMI2PSMasked128
+ OpAMD64VPERMI2D128
OpAMD64VPERMI2DMasked128
+ OpAMD64VPERMI2PSMasked128
OpAMD64VPSRLD128
OpAMD64VPSRLDMasked128
OpAMD64VPSRLVD128
@@ -1901,12 +1910,12 @@ const (
OpAMD64VPMINUD256
OpAMD64VPMINUDMasked256
OpAMD64VPMULUDQ256
- OpAMD64VPERMPS256
OpAMD64VPERMD256
- OpAMD64VPERMI2D256
+ OpAMD64VPERMPS256
OpAMD64VPERMI2PS256
- OpAMD64VPERMI2DMasked256
+ OpAMD64VPERMI2D256
OpAMD64VPERMI2PSMasked256
+ OpAMD64VPERMI2DMasked256
OpAMD64VPERMPSMasked256
OpAMD64VPERMDMasked256
OpAMD64VPSRLD256
@@ -1931,10 +1940,10 @@ const (
OpAMD64VPMINUQ256
OpAMD64VPMINUQMasked256
OpAMD64VPMULUDQMasked256
- OpAMD64VPERMQ256
OpAMD64VPERMPD256
- OpAMD64VPERMI2Q256
+ OpAMD64VPERMQ256
OpAMD64VPERMI2PD256
+ OpAMD64VPERMI2Q256
OpAMD64VPERMI2PDMasked256
OpAMD64VPERMI2QMasked256
OpAMD64VPERMQMasked256
@@ -1955,8 +1964,8 @@ const (
OpAMD64VPERMI2PD512
OpAMD64VPERMI2QMasked512
OpAMD64VPERMI2PDMasked512
- OpAMD64VPERMQMasked512
OpAMD64VPERMPDMasked512
+ OpAMD64VPERMQMasked512
OpAMD64VPSRLQ512
OpAMD64VPSRLQMasked512
OpAMD64VPSRLVQ512
@@ -2054,8 +2063,8 @@ const (
OpAMD64VPSHLDWMasked256
OpAMD64VPSHRDW256
OpAMD64VPSHRDWMasked256
- OpAMD64VPCMPW512
OpAMD64VPCMPWMasked512
+ OpAMD64VPCMPW512
OpAMD64VPSHLDW512
OpAMD64VPSHLDWMasked512
OpAMD64VPSHRDW512
@@ -2068,8 +2077,8 @@ const (
OpAMD64VPSHLDWMasked128
OpAMD64VPSHRDW128
OpAMD64VPSHRDWMasked128
- OpAMD64VPCMPD512
OpAMD64VPCMPDMasked512
+ OpAMD64VPCMPD512
OpAMD64VPROLD512
OpAMD64VPROLDMasked512
OpAMD64VPRORD512
@@ -2122,8 +2131,8 @@ const (
OpAMD64VPSHLDQMasked256
OpAMD64VPSHRDQ256
OpAMD64VPSHRDQMasked256
- OpAMD64VPCMPQ512
OpAMD64VPCMPQMasked512
+ OpAMD64VPCMPQ512
OpAMD64VPROLQ512
OpAMD64VPROLQMasked512
OpAMD64VPRORQ512
@@ -2140,16 +2149,16 @@ const (
OpAMD64VEXTRACTI128128
OpAMD64VPCMPB256
OpAMD64VINSERTI128256
- OpAMD64VPCMPB512
OpAMD64VPCMPBMasked512
+ OpAMD64VPCMPB512
OpAMD64VPCMPUWMasked256
OpAMD64VPCMPUW256
- OpAMD64VPCMPUW512
OpAMD64VPCMPUWMasked512
+ OpAMD64VPCMPUW512
OpAMD64VPCMPUWMasked128
OpAMD64VPCMPUW128
- OpAMD64VPCMPUD512
OpAMD64VPCMPUDMasked512
+ OpAMD64VPCMPUD512
OpAMD64VPCMPUDMasked128
OpAMD64VPCMPUD128
OpAMD64VPCMPUDMasked256
@@ -2158,8 +2167,8 @@ const (
OpAMD64VPCMPUQ128
OpAMD64VPCMPUQMasked256
OpAMD64VPCMPUQ256
- OpAMD64VPCMPUQ512
OpAMD64VPCMPUQMasked512
+ OpAMD64VPCMPUQ512
OpAMD64VPCMPUBMasked128
OpAMD64VGF2P8AFFINEQB128
OpAMD64VGF2P8AFFINEINVQB128
@@ -2172,12 +2181,12 @@ const (
OpAMD64VGF2P8AFFINEINVQBMasked256
OpAMD64VGF2P8AFFINEQBMasked256
OpAMD64VPCMPUB256
- OpAMD64VPCMPUB512
OpAMD64VPCMPUBMasked512
OpAMD64VGF2P8AFFINEQB512
OpAMD64VGF2P8AFFINEINVQB512
OpAMD64VGF2P8AFFINEINVQBMasked512
OpAMD64VGF2P8AFFINEQBMasked512
+ OpAMD64VPCMPUB512
OpARMADD
OpARMADDconst
@@ -4416,6 +4425,18 @@ const (
OpLoadMask64x2
OpLoadMask64x4
OpLoadMask64x8
+ OpStoreMask8x16
+ OpStoreMask8x32
+ OpStoreMask8x64
+ OpStoreMask16x8
+ OpStoreMask16x16
+ OpStoreMask16x32
+ OpStoreMask32x4
+ OpStoreMask32x8
+ OpStoreMask32x16
+ OpStoreMask64x2
+ OpStoreMask64x4
+ OpStoreMask64x8
OpAddFloat32x16
OpAddMaskedFloat32x16
OpApproximateReciprocalFloat32x16
@@ -5325,10 +5346,10 @@ const (
OpPermuteUint16x16
OpPermute2Uint16x16
OpPermute2Int16x16
- OpPermute2MaskedInt16x16
OpPermute2MaskedUint16x16
- OpPermuteMaskedUint16x16
+ OpPermute2MaskedInt16x16
OpPermuteMaskedInt16x16
+ OpPermuteMaskedUint16x16
OpPopCountUint16x16
OpPopCountMaskedUint16x16
OpSaturatedAddUint16x16
@@ -5379,8 +5400,8 @@ const (
OpPermute2Int16x32
OpPermute2MaskedUint16x32
OpPermute2MaskedInt16x32
- OpPermuteMaskedUint16x32
OpPermuteMaskedInt16x32
+ OpPermuteMaskedUint16x32
OpPopCountUint16x32
OpPopCountMaskedUint16x32
OpSaturatedAddUint16x32
@@ -5431,12 +5452,12 @@ const (
OpPairwiseSubUint16x8
OpPermuteInt16x8
OpPermuteUint16x8
- OpPermute2Int16x8
OpPermute2Uint16x8
+ OpPermute2Int16x8
OpPermute2MaskedInt16x8
OpPermute2MaskedUint16x8
- OpPermuteMaskedUint16x8
OpPermuteMaskedInt16x8
+ OpPermuteMaskedUint16x8
OpPopCountUint16x8
OpPopCountMaskedUint16x8
OpSaturatedAddUint16x8
@@ -5483,17 +5504,17 @@ const (
OpNotEqualMaskedUint32x16
OpOrUint32x16
OpOrMaskedUint32x16
- OpPermuteFloat32x16
OpPermuteInt32x16
+ OpPermuteFloat32x16
OpPermuteUint32x16
OpPermute2Uint32x16
OpPermute2Float32x16
OpPermute2Int32x16
+ OpPermute2MaskedUint32x16
OpPermute2MaskedInt32x16
OpPermute2MaskedFloat32x16
- OpPermute2MaskedUint32x16
- OpPermuteMaskedInt32x16
OpPermuteMaskedFloat32x16
+ OpPermuteMaskedInt32x16
OpPermuteMaskedUint32x16
OpPopCountUint32x16
OpPopCountMaskedUint32x16
@@ -5720,15 +5741,15 @@ const (
OpPermuteUint64x4
OpPermuteInt64x4
OpPermuteFloat64x4
- OpPermute2Float64x4
- OpPermute2Int64x4
OpPermute2Uint64x4
- OpPermute2MaskedFloat64x4
+ OpPermute2Int64x4
+ OpPermute2Float64x4
OpPermute2MaskedUint64x4
+ OpPermute2MaskedFloat64x4
OpPermute2MaskedInt64x4
+ OpPermuteMaskedUint64x4
OpPermuteMaskedFloat64x4
OpPermuteMaskedInt64x4
- OpPermuteMaskedUint64x4
OpPopCountUint64x4
OpPopCountMaskedUint64x4
OpRotateLeftUint64x4
@@ -5778,18 +5799,18 @@ const (
OpNotEqualMaskedUint64x8
OpOrUint64x8
OpOrMaskedUint64x8
+ OpPermuteUint64x8
OpPermuteFloat64x8
OpPermuteInt64x8
- OpPermuteUint64x8
- OpPermute2Int64x8
OpPermute2Float64x8
OpPermute2Uint64x8
+ OpPermute2Int64x8
+ OpPermute2MaskedFloat64x8
OpPermute2MaskedUint64x8
OpPermute2MaskedInt64x8
- OpPermute2MaskedFloat64x8
- OpPermuteMaskedUint64x8
- OpPermuteMaskedFloat64x8
OpPermuteMaskedInt64x8
+ OpPermuteMaskedFloat64x8
+ OpPermuteMaskedUint64x8
OpPopCountUint64x8
OpPopCountMaskedUint64x8
OpRotateLeftUint64x8
@@ -18831,6 +18852,20 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "KMOVQstore",
+ auxType: auxSymOff,
+ argLen: 3,
+ faultOnNilArg0: true,
+ symEffect: SymWrite,
+ asm: x86.AKMOVQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {1, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 72057594037977087}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 SB
+ },
+ },
+ },
+ {
name: "VADDPS512",
argLen: 2,
commutative: true,
@@ -19281,15 +19316,15 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VRCP14PS128",
+ name: "VRCPPS128",
argLen: 1,
- asm: x86.AVRCP14PS,
+ asm: x86.AVRCPPS,
reg: regInfo{
inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
@@ -19728,15 +19763,15 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VRCP14PS256",
+ name: "VRCPPS256",
argLen: 1,
- asm: x86.AVRCP14PS,
+ asm: x86.AVRCPPS,
reg: regInfo{
inputs: []inputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
- {0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
},
},
@@ -22123,6 +22158,35 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPCMPEQW512",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPCMPEQW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
+ name: "VPCMPGTW512",
+ argLen: 2,
+ asm: x86.AVPCMPGTW,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
name: "VPMAXSW512",
argLen: 2,
commutative: true,
@@ -23328,6 +23392,35 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPCMPEQD512",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPCMPEQD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
+ name: "VPCMPGTD512",
+ argLen: 2,
+ asm: x86.AVPCMPGTD,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
name: "VPMAXSD512",
argLen: 2,
commutative: true,
@@ -26665,6 +26758,35 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPCMPEQQ512",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPCMPEQQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
+ name: "VPCMPGTQ512",
+ argLen: 2,
+ asm: x86.AVPCMPGTQ,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
name: "VPMAXSQ512",
argLen: 2,
commutative: true,
@@ -27923,6 +28045,35 @@ var opcodeTable = [...]opInfo{
},
},
{
+ name: "VPCMPEQB512",
+ argLen: 2,
+ commutative: true,
+ asm: x86.AVPCMPEQB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
+ name: "VPCMPGTB512",
+ argLen: 2,
+ asm: x86.AVPCMPGTB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
+ {
name: "VPMAXSB512",
argLen: 2,
commutative: true,
@@ -29154,10 +29305,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2D128",
+ name: "VPERMI2PS128",
argLen: 3,
resultInArg0: true,
- asm: x86.AVPERMI2D,
+ asm: x86.AVPERMI2PS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29170,10 +29321,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2PS128",
+ name: "VPERMI2D128",
argLen: 3,
resultInArg0: true,
- asm: x86.AVPERMI2PS,
+ asm: x86.AVPERMI2D,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29186,10 +29337,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2PSMasked128",
+ name: "VPERMI2DMasked128",
argLen: 4,
resultInArg0: true,
- asm: x86.AVPERMI2PS,
+ asm: x86.AVPERMI2D,
reg: regInfo{
inputs: []inputInfo{
{3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -29203,10 +29354,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2DMasked128",
+ name: "VPERMI2PSMasked128",
argLen: 4,
resultInArg0: true,
- asm: x86.AVPERMI2D,
+ asm: x86.AVPERMI2PS,
reg: regInfo{
inputs: []inputInfo{
{3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -29355,9 +29506,9 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMPS256",
+ name: "VPERMD256",
argLen: 2,
- asm: x86.AVPERMPS,
+ asm: x86.AVPERMD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29369,9 +29520,9 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMD256",
+ name: "VPERMPS256",
argLen: 2,
- asm: x86.AVPERMD,
+ asm: x86.AVPERMPS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29383,10 +29534,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2D256",
+ name: "VPERMI2PS256",
argLen: 3,
resultInArg0: true,
- asm: x86.AVPERMI2D,
+ asm: x86.AVPERMI2PS,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29399,10 +29550,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2PS256",
+ name: "VPERMI2D256",
argLen: 3,
resultInArg0: true,
- asm: x86.AVPERMI2PS,
+ asm: x86.AVPERMI2D,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29415,10 +29566,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2DMasked256",
+ name: "VPERMI2PSMasked256",
argLen: 4,
resultInArg0: true,
- asm: x86.AVPERMI2D,
+ asm: x86.AVPERMI2PS,
reg: regInfo{
inputs: []inputInfo{
{3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -29432,10 +29583,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2PSMasked256",
+ name: "VPERMI2DMasked256",
argLen: 4,
resultInArg0: true,
- asm: x86.AVPERMI2PS,
+ asm: x86.AVPERMI2D,
reg: regInfo{
inputs: []inputInfo{
{3, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -29817,9 +29968,9 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMQ256",
+ name: "VPERMPD256",
argLen: 2,
- asm: x86.AVPERMQ,
+ asm: x86.AVPERMPD,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -29831,9 +29982,9 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMPD256",
+ name: "VPERMQ256",
argLen: 2,
- asm: x86.AVPERMPD,
+ asm: x86.AVPERMQ,
reg: regInfo{
inputs: []inputInfo{
{0, 281472829161472}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X27 X28 X29 X30 X31
@@ -29845,10 +29996,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2Q256",
+ name: "VPERMI2PD256",
argLen: 3,
resultInArg0: true,
- asm: x86.AVPERMI2Q,
+ asm: x86.AVPERMI2PD,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -29861,10 +30012,10 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMI2PD256",
+ name: "VPERMI2Q256",
argLen: 3,
resultInArg0: true,
- asm: x86.AVPERMI2PD,
+ asm: x86.AVPERMI2Q,
reg: regInfo{
inputs: []inputInfo{
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
@@ -30186,9 +30337,9 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMQMasked512",
+ name: "VPERMPDMasked512",
argLen: 3,
- asm: x86.AVPERMQ,
+ asm: x86.AVPERMPD,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -30201,9 +30352,9 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPERMPDMasked512",
+ name: "VPERMQMasked512",
argLen: 3,
- asm: x86.AVPERMPD,
+ asm: x86.AVPERMQ,
reg: regInfo{
inputs: []inputInfo{
{2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31686,15 +31837,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPW512",
+ name: "VPCMPWMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPW,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31702,16 +31854,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPWMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPW,
+ name: "VPCMPW512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPW,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31904,15 +32054,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPD512",
+ name: "VPCMPDMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPD,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -31920,16 +32071,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPDMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPD,
+ name: "VPCMPD512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPD,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -32723,15 +32872,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPQ512",
+ name: "VPCMPQMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPQ,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -32739,16 +32889,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPQMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPQ,
+ name: "VPCMPQ512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPQ,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -32998,15 +33146,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPB512",
+ name: "VPCMPBMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPB,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33014,16 +33163,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPBMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPB,
+ name: "VPCMPB512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPB,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33063,15 +33210,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUW512",
+ name: "VPCMPUWMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33079,16 +33227,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUWMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUW,
+ name: "VPCMPUW512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPUW,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33128,15 +33274,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUD512",
+ name: "VPCMPUDMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33144,16 +33291,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUDMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUD,
+ name: "VPCMPUD512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPUD,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33289,15 +33434,16 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUQ512",
+ name: "VPCMPUQMasked512",
auxType: auxInt8,
- argLen: 2,
+ argLen: 3,
commutative: true,
asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33305,16 +33451,14 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUQMasked512",
- auxType: auxInt8,
- argLen: 3,
- commutative: true,
- asm: x86.AVPCMPUQ,
+ name: "VPCMPUQ512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPUQ,
reg: regInfo{
inputs: []inputInfo{
- {2, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
},
outputs: []outputInfo{
{0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
@@ -33510,22 +33654,6 @@ var opcodeTable = [...]opInfo{
},
},
{
- name: "VPCMPUB512",
- auxType: auxInt8,
- argLen: 2,
- commutative: true,
- asm: x86.AVPCMPUB,
- reg: regInfo{
- inputs: []inputInfo{
- {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
- },
- outputs: []outputInfo{
- {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
- },
- },
- },
- {
name: "VPCMPUBMasked512",
auxType: auxInt8,
argLen: 3,
@@ -33604,6 +33732,21 @@ var opcodeTable = [...]opInfo{
},
},
},
+ {
+ name: "VPCMPUB512",
+ auxType: auxInt8,
+ argLen: 2,
+ asm: x86.AVPCMPUB,
+ reg: regInfo{
+ inputs: []inputInfo{
+ {0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ {1, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
+ },
+ outputs: []outputInfo{
+ {0, 71494644084506624}, // K1 K2 K3 K4 K5 K6 K7
+ },
+ },
+ },
{
name: "ADD",
@@ -60817,6 +60960,78 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
+ name: "StoreMask8x16",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask8x32",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask8x64",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask16x8",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask16x16",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask16x32",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask32x4",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask32x8",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask32x16",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask64x2",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask64x4",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
+ name: "StoreMask64x8",
+ auxType: auxTyp,
+ argLen: 3,
+ generic: true,
+ },
+ {
name: "AddFloat32x16",
argLen: 2,
commutative: true,
@@ -65677,22 +65892,22 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "Permute2MaskedInt16x16",
+ name: "Permute2MaskedUint16x16",
argLen: 4,
generic: true,
},
{
- name: "Permute2MaskedUint16x16",
+ name: "Permute2MaskedInt16x16",
argLen: 4,
generic: true,
},
{
- name: "PermuteMaskedUint16x16",
+ name: "PermuteMaskedInt16x16",
argLen: 3,
generic: true,
},
{
- name: "PermuteMaskedInt16x16",
+ name: "PermuteMaskedUint16x16",
argLen: 3,
generic: true,
},
@@ -65964,12 +66179,12 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "PermuteMaskedUint16x32",
+ name: "PermuteMaskedInt16x32",
argLen: 3,
generic: true,
},
{
- name: "PermuteMaskedInt16x32",
+ name: "PermuteMaskedUint16x32",
argLen: 3,
generic: true,
},
@@ -66242,12 +66457,12 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "Permute2Int16x8",
+ name: "Permute2Uint16x8",
argLen: 3,
generic: true,
},
{
- name: "Permute2Uint16x8",
+ name: "Permute2Int16x8",
argLen: 3,
generic: true,
},
@@ -66262,12 +66477,12 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "PermuteMaskedUint16x8",
+ name: "PermuteMaskedInt16x8",
argLen: 3,
generic: true,
},
{
- name: "PermuteMaskedInt16x8",
+ name: "PermuteMaskedUint16x8",
argLen: 3,
generic: true,
},
@@ -66519,12 +66734,12 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "PermuteFloat32x16",
+ name: "PermuteInt32x16",
argLen: 2,
generic: true,
},
{
- name: "PermuteInt32x16",
+ name: "PermuteFloat32x16",
argLen: 2,
generic: true,
},
@@ -66549,27 +66764,27 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "Permute2MaskedInt32x16",
+ name: "Permute2MaskedUint32x16",
argLen: 4,
generic: true,
},
{
- name: "Permute2MaskedFloat32x16",
+ name: "Permute2MaskedInt32x16",
argLen: 4,
generic: true,
},
{
- name: "Permute2MaskedUint32x16",
+ name: "Permute2MaskedFloat32x16",
argLen: 4,
generic: true,
},
{
- name: "PermuteMaskedInt32x16",
+ name: "PermuteMaskedFloat32x16",
argLen: 3,
generic: true,
},
{
- name: "PermuteMaskedFloat32x16",
+ name: "PermuteMaskedInt32x16",
argLen: 3,
generic: true,
},
@@ -67774,7 +67989,7 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "Permute2Float64x4",
+ name: "Permute2Uint64x4",
argLen: 3,
generic: true,
},
@@ -67784,17 +67999,17 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "Permute2Uint64x4",
+ name: "Permute2Float64x4",
argLen: 3,
generic: true,
},
{
- name: "Permute2MaskedFloat64x4",
+ name: "Permute2MaskedUint64x4",
argLen: 4,
generic: true,
},
{
- name: "Permute2MaskedUint64x4",
+ name: "Permute2MaskedFloat64x4",
argLen: 4,
generic: true,
},
@@ -67804,17 +68019,17 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "PermuteMaskedFloat64x4",
+ name: "PermuteMaskedUint64x4",
argLen: 3,
generic: true,
},
{
- name: "PermuteMaskedInt64x4",
+ name: "PermuteMaskedFloat64x4",
argLen: 3,
generic: true,
},
{
- name: "PermuteMaskedUint64x4",
+ name: "PermuteMaskedInt64x4",
argLen: 3,
generic: true,
},
@@ -68082,52 +68297,52 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "PermuteFloat64x8",
+ name: "PermuteUint64x8",
argLen: 2,
generic: true,
},
{
- name: "PermuteInt64x8",
+ name: "PermuteFloat64x8",
argLen: 2,
generic: true,
},
{
- name: "PermuteUint64x8",
+ name: "PermuteInt64x8",
argLen: 2,
generic: true,
},
{
- name: "Permute2Int64x8",
+ name: "Permute2Float64x8",
argLen: 3,
generic: true,
},
{
- name: "Permute2Float64x8",
+ name: "Permute2Uint64x8",
argLen: 3,
generic: true,
},
{
- name: "Permute2Uint64x8",
+ name: "Permute2Int64x8",
argLen: 3,
generic: true,
},
{
- name: "Permute2MaskedUint64x8",
+ name: "Permute2MaskedFloat64x8",
argLen: 4,
generic: true,
},
{
- name: "Permute2MaskedInt64x8",
+ name: "Permute2MaskedUint64x8",
argLen: 4,
generic: true,
},
{
- name: "Permute2MaskedFloat64x8",
+ name: "Permute2MaskedInt64x8",
argLen: 4,
generic: true,
},
{
- name: "PermuteMaskedUint64x8",
+ name: "PermuteMaskedInt64x8",
argLen: 3,
generic: true,
},
@@ -68137,7 +68352,7 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "PermuteMaskedInt64x8",
+ name: "PermuteMaskedUint64x8",
argLen: 3,
generic: true,
},
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 0ff19a680e..ecd4a21f43 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -985,10 +985,10 @@ func rewriteValueAMD64(v *Value) bool {
v.Op = OpAMD64VRCP14PS512
return true
case OpApproximateReciprocalFloat32x4:
- v.Op = OpAMD64VRCP14PS128
+ v.Op = OpAMD64VRCPPS128
return true
case OpApproximateReciprocalFloat32x8:
- v.Op = OpAMD64VRCP14PS256
+ v.Op = OpAMD64VRCPPS256
return true
case OpApproximateReciprocalFloat64x2:
v.Op = OpAMD64VRCP14PD128
@@ -5184,6 +5184,30 @@ func rewriteValueAMD64(v *Value) bool {
return true
case OpStore:
return rewriteValueAMD64_OpStore(v)
+ case OpStoreMask16x16:
+ return rewriteValueAMD64_OpStoreMask16x16(v)
+ case OpStoreMask16x32:
+ return rewriteValueAMD64_OpStoreMask16x32(v)
+ case OpStoreMask16x8:
+ return rewriteValueAMD64_OpStoreMask16x8(v)
+ case OpStoreMask32x16:
+ return rewriteValueAMD64_OpStoreMask32x16(v)
+ case OpStoreMask32x4:
+ return rewriteValueAMD64_OpStoreMask32x4(v)
+ case OpStoreMask32x8:
+ return rewriteValueAMD64_OpStoreMask32x8(v)
+ case OpStoreMask64x2:
+ return rewriteValueAMD64_OpStoreMask64x2(v)
+ case OpStoreMask64x4:
+ return rewriteValueAMD64_OpStoreMask64x4(v)
+ case OpStoreMask64x8:
+ return rewriteValueAMD64_OpStoreMask64x8(v)
+ case OpStoreMask8x16:
+ return rewriteValueAMD64_OpStoreMask8x16(v)
+ case OpStoreMask8x32:
+ return rewriteValueAMD64_OpStoreMask8x32(v)
+ case OpStoreMask8x64:
+ return rewriteValueAMD64_OpStoreMask8x64(v)
case OpSub16:
v.Op = OpAMD64SUBL
return true
@@ -33388,13 +33412,12 @@ func rewriteValueAMD64_OpEqualInt16x32(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualInt16x32 x y)
- // result: (VPMOVMToVec16x32 (VPCMPW512 [0] x y))
+ // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec16x32)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -33406,13 +33429,12 @@ func rewriteValueAMD64_OpEqualInt32x16(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualInt32x16 x y)
- // result: (VPMOVMToVec32x16 (VPCMPD512 [0] x y))
+ // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec32x16)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -33424,13 +33446,12 @@ func rewriteValueAMD64_OpEqualInt64x8(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualInt64x8 x y)
- // result: (VPMOVMToVec64x8 (VPCMPQ512 [0] x y))
+ // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec64x8)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -33442,13 +33463,12 @@ func rewriteValueAMD64_OpEqualInt8x64(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualInt8x64 x y)
- // result: (VPMOVMToVec8x64 (VPCMPB512 [0] x y))
+ // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec8x64)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -34120,13 +34140,12 @@ func rewriteValueAMD64_OpEqualUint16x32(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint16x32 x y)
- // result: (VPMOVMToVec16x32 (VPCMPUW512 [0] x y))
+ // result: (VPMOVMToVec16x32 (VPCMPEQW512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec16x32)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUW512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQW512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -34138,13 +34157,12 @@ func rewriteValueAMD64_OpEqualUint32x16(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint32x16 x y)
- // result: (VPMOVMToVec32x16 (VPCMPUD512 [0] x y))
+ // result: (VPMOVMToVec32x16 (VPCMPEQD512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec32x16)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUD512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQD512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -34156,13 +34174,12 @@ func rewriteValueAMD64_OpEqualUint64x8(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint64x8 x y)
- // result: (VPMOVMToVec64x8 (VPCMPUQ512 [0] x y))
+ // result: (VPMOVMToVec64x8 (VPCMPEQQ512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec64x8)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUQ512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQQ512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -34174,13 +34191,12 @@ func rewriteValueAMD64_OpEqualUint8x64(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (EqualUint8x64 x y)
- // result: (VPMOVMToVec8x64 (VPCMPUB512 [0] x y))
+ // result: (VPMOVMToVec8x64 (VPCMPEQB512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec8x64)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPUB512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(0)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPEQB512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -36279,13 +36295,12 @@ func rewriteValueAMD64_OpGreaterInt16x32(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (GreaterInt16x32 x y)
- // result: (VPMOVMToVec16x32 (VPCMPW512 [14] x y))
+ // result: (VPMOVMToVec16x32 (VPCMPGTW512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec16x32)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPW512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(14)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTW512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -36297,13 +36312,12 @@ func rewriteValueAMD64_OpGreaterInt32x16(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (GreaterInt32x16 x y)
- // result: (VPMOVMToVec32x16 (VPCMPD512 [14] x y))
+ // result: (VPMOVMToVec32x16 (VPCMPGTD512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec32x16)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPD512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(14)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTD512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -36315,13 +36329,12 @@ func rewriteValueAMD64_OpGreaterInt64x8(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (GreaterInt64x8 x y)
- // result: (VPMOVMToVec64x8 (VPCMPQ512 [14] x y))
+ // result: (VPMOVMToVec64x8 (VPCMPGTQ512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec64x8)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPQ512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(14)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTQ512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -36333,13 +36346,12 @@ func rewriteValueAMD64_OpGreaterInt8x64(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (GreaterInt8x64 x y)
- // result: (VPMOVMToVec8x64 (VPCMPB512 [14] x y))
+ // result: (VPMOVMToVec8x64 (VPCMPGTB512 x y))
for {
x := v_0
y := v_1
v.reset(OpAMD64VPMOVMToVec8x64)
- v0 := b.NewValue0(v.Pos, OpAMD64VPCMPB512, typ.Mask)
- v0.AuxInt = int8ToAuxInt(14)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPCMPGTB512, typ.Mask)
v0.AddArg2(x, y)
v.AddArg(v0)
return true
@@ -53277,6 +53289,234 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
}
return false
}
+func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask16x16 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask16x32 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask16x8 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask32x16 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask32x4 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask32x8 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask64x2 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask64x4 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask64x8 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask8x16 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask8x32 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
+func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
+ v_2 := v.Args[2]
+ v_1 := v.Args[1]
+ v_0 := v.Args[0]
+ b := v.Block
+ // match: (StoreMask8x64 {t} ptr val mem)
+ // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
+ for {
+ t := auxToType(v.Aux)
+ ptr := v_0
+ val := v_1
+ mem := v_2
+ v.reset(OpAMD64KMOVQstore)
+ v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
+ v0.AddArg(val)
+ v.AddArg3(ptr, v0, mem)
+ return true
+ }
+}
func rewriteValueAMD64_OpSubMaskedFloat32x16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index e012b536b5..0284729a52 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -1791,6 +1791,23 @@ func simdLoadMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ss
}
}
+func simdStoreMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
+ opCodes := map[int]map[int]ssa.Op{
+ 8: {16: ssa.OpStoreMask8x16, 32: ssa.OpStoreMask8x32, 64: ssa.OpStoreMask8x64},
+ 16: {8: ssa.OpStoreMask16x8, 16: ssa.OpStoreMask16x16, 32: ssa.OpStoreMask16x32},
+ 32: {4: ssa.OpStoreMask32x4, 8: ssa.OpStoreMask32x8, 16: ssa.OpStoreMask32x16},
+ 64: {2: ssa.OpStoreMask64x2, 4: ssa.OpStoreMask64x4, 8: ssa.OpStoreMask64x8},
+ }
+ op := opCodes[elemBits][lanes]
+ if op == 0 {
+ panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
+ }
+ s.vars[memVar] = s.newValue3A(op, types.TypeMem, types.TypeMask, args[1], args[0], s.mem())
+ return nil
+ }
+}
+
// findIntrinsic returns a function which builds the SSA equivalent of the
// function identified by the symbol sym. If sym is not an intrinsic call, returns nil.
func findIntrinsic(sym *types.Sym) intrinsicBuilder {
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 8040a187bd..8b3b08f886 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -310,34 +310,34 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Float64x2.DotProdBroadcast", opLen2(ssa.OpDotProdBroadcastFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x16.Equal", opLen2(ssa.OpEqualInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Equal", opLen2(ssa.OpEqualInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Equal", opLen2(ssa.OpEqualInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.Equal", opLen2(ssa.OpEqualInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Equal", opLen2(ssa.OpEqualInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Equal", opLen2(ssa.OpEqualInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Equal", opLen2(ssa.OpEqualInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Equal", opLen2(ssa.OpEqualInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Equal", opLen2(ssa.OpEqualUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Equal", opLen2(ssa.OpEqualUint8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint16x8.Equal", opLen2(ssa.OpEqualUint16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint16x16.Equal", opLen2(ssa.OpEqualUint16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint32x4.Equal", opLen2(ssa.OpEqualUint32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint32x8.Equal", opLen2(ssa.OpEqualUint32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint64x2.Equal", opLen2(ssa.OpEqualUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Equal", opLen2(ssa.OpEqualUint64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Equal", opLen2(ssa.OpEqualFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Equal", opLen2(ssa.OpEqualFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Equal", opLen2(ssa.OpEqualFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Equal", opLen2(ssa.OpEqualFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Equal", opLen2(ssa.OpEqualFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Equal", opLen2(ssa.OpEqualFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x64.Equal", opLen2(ssa.OpEqualInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x32.Equal", opLen2(ssa.OpEqualInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x16.Equal", opLen2(ssa.OpEqualInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x8.Equal", opLen2(ssa.OpEqualInt64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x64.Equal", opLen2(ssa.OpEqualUint8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint16x32.Equal", opLen2(ssa.OpEqualUint16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint32x16.Equal", opLen2(ssa.OpEqualUint32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint64x8.Equal", opLen2(ssa.OpEqualUint64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.EqualMasked", opLen3(ssa.OpEqualMaskedFloat32x16, types.TypeVec512), sys.AMD64)
@@ -458,22 +458,22 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.GetElem", opLen1Imm8(ssa.OpGetElemUint64x2, types.Types[types.TUINT64], 0), sys.AMD64)
addF(simdPackage, "Int8x16.Greater", opLen2(ssa.OpGreaterInt8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int8x32.Greater", opLen2(ssa.OpGreaterInt8x32, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int16x8.Greater", opLen2(ssa.OpGreaterInt16x8, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int16x16.Greater", opLen2(ssa.OpGreaterInt16x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int32x4.Greater", opLen2(ssa.OpGreaterInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int32x8.Greater", opLen2(ssa.OpGreaterInt32x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int64x2.Greater", opLen2(ssa.OpGreaterInt64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Int64x4.Greater", opLen2(ssa.OpGreaterInt64x4, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float32x4.Greater", opLen2(ssa.OpGreaterFloat32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float32x8.Greater", opLen2(ssa.OpGreaterFloat32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float32x16.Greater", opLen2(ssa.OpGreaterFloat32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Float64x2.Greater", opLen2(ssa.OpGreaterFloat64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Float64x4.Greater", opLen2(ssa.OpGreaterFloat64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Float64x8.Greater", opLen2(ssa.OpGreaterFloat64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x64.Greater", opLen2(ssa.OpGreaterInt8x64, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int16x32.Greater", opLen2(ssa.OpGreaterInt16x32, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int32x16.Greater", opLen2(ssa.OpGreaterInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int64x8.Greater", opLen2(ssa.OpGreaterInt64x8, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Uint8x16.Greater", opLen2(ssa.OpGreaterUint8x16, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint8x32.Greater", opLen2(ssa.OpGreaterUint8x32, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint8x64.Greater", opLen2(ssa.OpGreaterUint8x64, types.TypeVec512), sys.AMD64)
@@ -2137,59 +2137,71 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
+ addF(simdPackage, "Mask8x16.StoreToBits", simdStoreMask(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x32.AsMask8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
+ addF(simdPackage, "Mask8x32.StoreToBits", simdStoreMask(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x64.AsMask8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
+ addF(simdPackage, "Mask8x64.StoreToBits", simdStoreMask(8, 64), sys.AMD64)
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x8.AsMask16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
+ addF(simdPackage, "Mask16x8.StoreToBits", simdStoreMask(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x16.AsMask16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
+ addF(simdPackage, "Mask16x16.StoreToBits", simdStoreMask(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x32.AsMask16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
+ addF(simdPackage, "Mask16x32.StoreToBits", simdStoreMask(16, 32), sys.AMD64)
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x4.AsMask32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
+ addF(simdPackage, "Mask32x4.StoreToBits", simdStoreMask(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x8.AsMask32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
+ addF(simdPackage, "Mask32x8.StoreToBits", simdStoreMask(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x16.AsMask32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
+ addF(simdPackage, "Mask32x16.StoreToBits", simdStoreMask(32, 16), sys.AMD64)
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x2.AsMask64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
+ addF(simdPackage, "Mask64x2.StoreToBits", simdStoreMask(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x4.AsMask64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
+ addF(simdPackage, "Mask64x4.StoreToBits", simdStoreMask(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x8.AsMask64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
+ addF(simdPackage, "Mask64x8.StoreToBits", simdStoreMask(64, 8), sys.AMD64)
}