aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/compile
diff options
context:
space:
mode:
authorJunyang Shao <shaojunyang@google.com>2025-10-10 17:42:59 +0000
committerJunyang Shao <shaojunyang@google.com>2025-10-14 12:26:37 -0700
commit2e71cf1a2a6f289cb0d5e1acaca472394d95600e (patch)
treedb34852fac313466997ab818d5b4714e6a7c92e5 /src/cmd/compile
parentc4fbf3b4cff14c1a0208b45101e4955414ab1c03 (diff)
downloadgo-2e71cf1a2a6f289cb0d5e1acaca472394d95600e.tar.xz
[dev.simd] cmd/compile, simd: remove mask load and stores
We have convert mask to bits already, the API of mask load and stores are inconsistent with them, also mask load and stores could just be hidden behind peepholes. So this CL removes them, the next CL will add the peephole for them. Change-Id: Ifa7d23fb52bb0efd1785935ead4d703927f16d2b Reviewed-on: https://go-review.googlesource.com/c/go/+/710915 Reviewed-by: Cherry Mui <cherryyz@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Diffstat (limited to 'src/cmd/compile')
-rw-r--r--src/cmd/compile/internal/ssa/_gen/AMD64.rules35
-rw-r--r--src/cmd/compile/internal/ssa/_gen/genericOps.go27
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go156
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go492
-rw-r--r--src/cmd/compile/internal/ssagen/intrinsics.go34
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go24
6 files changed, 1 insertions, 767 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64.rules b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
index 3689c12411..2b44871960 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@@ -1641,41 +1641,6 @@
// SIMD lowering rules
-// Mask loads
-(LoadMask8x16 <t> ptr mem) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask8x32 <t> ptr mem) => (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask8x64 <t> ptr mem) => (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask16x8 <t> ptr mem) => (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask16x16 <t> ptr mem) => (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask16x32 <t> ptr mem) => (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask32x4 <t> ptr mem) => (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask32x8 <t> ptr mem) => (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask32x16 <t> ptr mem) => (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(LoadMask64x2 <t> ptr mem) => (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
-(LoadMask64x4 <t> ptr mem) => (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
-(LoadMask64x8 <t> ptr mem) => (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
-
-(StoreMask8x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
-(StoreMask8x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
-(StoreMask8x64 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
-
-(StoreMask16x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
-(StoreMask16x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
-(StoreMask16x32 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
-
-(StoreMask32x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
-(StoreMask32x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
-(StoreMask32x16 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
-
-(StoreMask64x2 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
-(StoreMask64x4 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
-(StoreMask64x8 {t} ptr val mem) => (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
-
-// TODO is this correct? Should we just do it all from 64-bits?
-
// Mask conversions
// integers to masks
(Cvt16toMask8x16 <t> x) => (VPMOVMToVec8x16 <types.TypeVec128> (KMOVWk <t> x))
diff --git a/src/cmd/compile/internal/ssa/_gen/genericOps.go b/src/cmd/compile/internal/ssa/_gen/genericOps.go
index 6b94fea819..18bd8d7fe9 100644
--- a/src/cmd/compile/internal/ssa/_gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/genericOps.go
@@ -676,32 +676,7 @@ var genericOps = []opData{
{name: "PrefetchCacheStreamed", argLength: 2, hasSideEffects: true}, // Do non-temporal or streamed prefetch arg0 to cache. arg0=addr, arg1=memory.
// SIMD
- {name: "ZeroSIMD", argLength: 0}, // zero value of a vector
- {name: "LoadMask8x16", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask8x32", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask8x64", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask16x8", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask16x16", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask16x32", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask32x4", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask32x8", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask32x16", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask64x2", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask64x4", argLength: 2}, // arg0 = ptr, arg1 = mem
- {name: "LoadMask64x8", argLength: 2}, // arg0 = ptr, arg1 = mem
-
- {name: "StoreMask8x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask8x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask8x64", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask16x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask16x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask16x32", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask32x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask32x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask32x16", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask64x2", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask64x4", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
- {name: "StoreMask64x8", argLength: 3, typ: "Mem", aux: "Typ"}, // Store arg1 to arg0. arg2=memory, aux=type. Returns memory.
+ {name: "ZeroSIMD", argLength: 0}, // zero value of a vector
// Convert integers to masks
{name: "Cvt16toMask8x16", argLength: 1}, // arg0 = integer mask value
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index aef077bb8e..08b6bffd0e 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -5364,30 +5364,6 @@ const (
OpPrefetchCache
OpPrefetchCacheStreamed
OpZeroSIMD
- OpLoadMask8x16
- OpLoadMask8x32
- OpLoadMask8x64
- OpLoadMask16x8
- OpLoadMask16x16
- OpLoadMask16x32
- OpLoadMask32x4
- OpLoadMask32x8
- OpLoadMask32x16
- OpLoadMask64x2
- OpLoadMask64x4
- OpLoadMask64x8
- OpStoreMask8x16
- OpStoreMask8x32
- OpStoreMask8x64
- OpStoreMask16x8
- OpStoreMask16x16
- OpStoreMask16x32
- OpStoreMask32x4
- OpStoreMask32x8
- OpStoreMask32x16
- OpStoreMask64x2
- OpStoreMask64x4
- OpStoreMask64x8
OpCvt16toMask8x16
OpCvt32toMask8x32
OpCvt64toMask8x64
@@ -75966,138 +75942,6 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "LoadMask8x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask8x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask8x64",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask16x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask16x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask16x32",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask32x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask32x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask32x16",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask64x2",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask64x4",
- argLen: 2,
- generic: true,
- },
- {
- name: "LoadMask64x8",
- argLen: 2,
- generic: true,
- },
- {
- name: "StoreMask8x16",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask8x32",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask8x64",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask16x8",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask16x16",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask16x32",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask32x4",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask32x8",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask32x16",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask64x2",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask64x4",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
- name: "StoreMask64x8",
- auxType: auxTyp,
- argLen: 3,
- generic: true,
- },
- {
name: "Cvt16toMask8x16",
argLen: 1,
generic: true,
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 84bb4c1148..5220a0a73c 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -3769,30 +3769,6 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpLessUint8x64(v)
case OpLoad:
return rewriteValueAMD64_OpLoad(v)
- case OpLoadMask16x16:
- return rewriteValueAMD64_OpLoadMask16x16(v)
- case OpLoadMask16x32:
- return rewriteValueAMD64_OpLoadMask16x32(v)
- case OpLoadMask16x8:
- return rewriteValueAMD64_OpLoadMask16x8(v)
- case OpLoadMask32x16:
- return rewriteValueAMD64_OpLoadMask32x16(v)
- case OpLoadMask32x4:
- return rewriteValueAMD64_OpLoadMask32x4(v)
- case OpLoadMask32x8:
- return rewriteValueAMD64_OpLoadMask32x8(v)
- case OpLoadMask64x2:
- return rewriteValueAMD64_OpLoadMask64x2(v)
- case OpLoadMask64x4:
- return rewriteValueAMD64_OpLoadMask64x4(v)
- case OpLoadMask64x8:
- return rewriteValueAMD64_OpLoadMask64x8(v)
- case OpLoadMask8x16:
- return rewriteValueAMD64_OpLoadMask8x16(v)
- case OpLoadMask8x32:
- return rewriteValueAMD64_OpLoadMask8x32(v)
- case OpLoadMask8x64:
- return rewriteValueAMD64_OpLoadMask8x64(v)
case OpLoadMasked16:
return rewriteValueAMD64_OpLoadMasked16(v)
case OpLoadMasked32:
@@ -5636,30 +5612,6 @@ func rewriteValueAMD64(v *Value) bool {
return true
case OpStore:
return rewriteValueAMD64_OpStore(v)
- case OpStoreMask16x16:
- return rewriteValueAMD64_OpStoreMask16x16(v)
- case OpStoreMask16x32:
- return rewriteValueAMD64_OpStoreMask16x32(v)
- case OpStoreMask16x8:
- return rewriteValueAMD64_OpStoreMask16x8(v)
- case OpStoreMask32x16:
- return rewriteValueAMD64_OpStoreMask32x16(v)
- case OpStoreMask32x4:
- return rewriteValueAMD64_OpStoreMask32x4(v)
- case OpStoreMask32x8:
- return rewriteValueAMD64_OpStoreMask32x8(v)
- case OpStoreMask64x2:
- return rewriteValueAMD64_OpStoreMask64x2(v)
- case OpStoreMask64x4:
- return rewriteValueAMD64_OpStoreMask64x4(v)
- case OpStoreMask64x8:
- return rewriteValueAMD64_OpStoreMask64x8(v)
- case OpStoreMask8x16:
- return rewriteValueAMD64_OpStoreMask8x16(v)
- case OpStoreMask8x32:
- return rewriteValueAMD64_OpStoreMask8x32(v)
- case OpStoreMask8x64:
- return rewriteValueAMD64_OpStoreMask8x64(v)
case OpStoreMasked16:
return rewriteValueAMD64_OpStoreMasked16(v)
case OpStoreMasked32:
@@ -54997,222 +54949,6 @@ func rewriteValueAMD64_OpLoad(v *Value) bool {
}
return false
}
-func rewriteValueAMD64_OpLoadMask16x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask16x16 <t> ptr mem)
- // result: (VPMOVMToVec16x16 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec16x16)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask16x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask16x32 <t> ptr mem)
- // result: (VPMOVMToVec16x32 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec16x32)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask16x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask16x8 <t> ptr mem)
- // result: (VPMOVMToVec16x8 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec16x8)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask32x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask32x16 <t> ptr mem)
- // result: (VPMOVMToVec32x16 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec32x16)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask32x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask32x4 <t> ptr mem)
- // result: (VPMOVMToVec32x4 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec32x4)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask32x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask32x8 <t> ptr mem)
- // result: (VPMOVMToVec32x8 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec32x8)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask64x2(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask64x2 <t> ptr mem)
- // result: (VPMOVMToVec64x2 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec64x2)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask64x4(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask64x4 <t> ptr mem)
- // result: (VPMOVMToVec64x4 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec64x4)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask64x8(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask64x8 <t> ptr mem)
- // result: (VPMOVMToVec64x8 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec64x8)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask8x16(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask8x16 <t> ptr mem)
- // result: (VPMOVMToVec8x16 <types.TypeVec128> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec8x16)
- v.Type = types.TypeVec128
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask8x32(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask8x32 <t> ptr mem)
- // result: (VPMOVMToVec8x32 <types.TypeVec256> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec8x32)
- v.Type = types.TypeVec256
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
-func rewriteValueAMD64_OpLoadMask8x64(v *Value) bool {
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (LoadMask8x64 <t> ptr mem)
- // result: (VPMOVMToVec8x64 <types.TypeVec512> (KMOVQload <t> ptr mem))
- for {
- t := v.Type
- ptr := v_0
- mem := v_1
- v.reset(OpAMD64VPMOVMToVec8x64)
- v.Type = types.TypeVec512
- v0 := b.NewValue0(v.Pos, OpAMD64KMOVQload, t)
- v0.AddArg2(ptr, mem)
- v.AddArg(v0)
- return true
- }
-}
func rewriteValueAMD64_OpLoadMasked16(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@@ -59830,234 +59566,6 @@ func rewriteValueAMD64_OpStore(v *Value) bool {
}
return false
}
-func rewriteValueAMD64_OpStoreMask16x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask16x16 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec16x16ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x16ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask16x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask16x32 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec16x32ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x32ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask16x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask16x8 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec16x8ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec16x8ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask32x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask32x16 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec32x16ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x16ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask32x4(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask32x4 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec32x4ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x4ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask32x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask32x8 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec32x8ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec32x8ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask64x2(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask64x2 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec64x2ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x2ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask64x4(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask64x4 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec64x4ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x4ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask64x8(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask64x8 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec64x8ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec64x8ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask8x16(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask8x16 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec8x16ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x16ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask8x32(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask8x32 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec8x32ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x32ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
-func rewriteValueAMD64_OpStoreMask8x64(v *Value) bool {
- v_2 := v.Args[2]
- v_1 := v.Args[1]
- v_0 := v.Args[0]
- b := v.Block
- // match: (StoreMask8x64 {t} ptr val mem)
- // result: (KMOVQstore ptr (VPMOVVec8x64ToM <t> val) mem)
- for {
- t := auxToType(v.Aux)
- ptr := v_0
- val := v_1
- mem := v_2
- v.reset(OpAMD64KMOVQstore)
- v0 := b.NewValue0(v.Pos, OpAMD64VPMOVVec8x64ToM, t)
- v0.AddArg(val)
- v.AddArg3(ptr, v0, mem)
- return true
- }
-}
func rewriteValueAMD64_OpStoreMasked16(v *Value) bool {
v_3 := v.Args[3]
v_2 := v.Args[2]
diff --git a/src/cmd/compile/internal/ssagen/intrinsics.go b/src/cmd/compile/internal/ssagen/intrinsics.go
index 6561cbe9a2..f663680fc4 100644
--- a/src/cmd/compile/internal/ssagen/intrinsics.go
+++ b/src/cmd/compile/internal/ssagen/intrinsics.go
@@ -2024,13 +2024,6 @@ func simdStore() func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
}
}
-var loadMaskOpcodes = map[int]map[int]ssa.Op{
- 8: {16: ssa.OpLoadMask8x16, 32: ssa.OpLoadMask8x32, 64: ssa.OpLoadMask8x64},
- 16: {8: ssa.OpLoadMask16x8, 16: ssa.OpLoadMask16x16, 32: ssa.OpLoadMask16x32},
- 32: {4: ssa.OpLoadMask32x4, 8: ssa.OpLoadMask32x8, 16: ssa.OpLoadMask32x16},
- 64: {2: ssa.OpLoadMask64x2, 4: ssa.OpLoadMask64x4, 8: ssa.OpLoadMask64x8},
-}
-
var cvtVToMaskOpcodes = map[int]map[int]ssa.Op{
8: {16: ssa.OpCvt16toMask8x16, 32: ssa.OpCvt32toMask8x32, 64: ssa.OpCvt64toMask8x64},
16: {8: ssa.OpCvt8toMask16x8, 16: ssa.OpCvt16toMask16x16, 32: ssa.OpCvt32toMask16x32},
@@ -2045,33 +2038,6 @@ var cvtMaskToVOpcodes = map[int]map[int]ssa.Op{
64: {2: ssa.OpCvtMask64x2to8, 4: ssa.OpCvtMask64x4to8, 8: ssa.OpCvtMask64x8to8},
}
-func simdLoadMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- op := loadMaskOpcodes[elemBits][lanes]
- if op == 0 {
- panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
- }
- return s.newValue2(op, types.TypeMask, args[0], s.mem())
- }
-}
-
-func simdStoreMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
- opCodes := map[int]map[int]ssa.Op{
- 8: {16: ssa.OpStoreMask8x16, 32: ssa.OpStoreMask8x32, 64: ssa.OpStoreMask8x64},
- 16: {8: ssa.OpStoreMask16x8, 16: ssa.OpStoreMask16x16, 32: ssa.OpStoreMask16x32},
- 32: {4: ssa.OpStoreMask32x4, 8: ssa.OpStoreMask32x8, 16: ssa.OpStoreMask32x16},
- 64: {2: ssa.OpStoreMask64x2, 4: ssa.OpStoreMask64x4, 8: ssa.OpStoreMask64x8},
- }
- op := opCodes[elemBits][lanes]
- if op == 0 {
- panic(fmt.Sprintf("Unknown mask shape: Mask%dx%d", elemBits, lanes))
- }
- s.vars[memVar] = s.newValue3A(op, types.TypeMem, types.TypeMask, args[1], args[0], s.mem())
- return nil
- }
-}
-
func simdCvtVToMask(elemBits, lanes int) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
op := cvtVToMaskOpcodes[elemBits][lanes]
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index f2e82d234c..47be7d67a4 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -1685,96 +1685,72 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Int8x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x16.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask8x16.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask8x16FromBits", simdLoadMask(8, 16), sys.AMD64)
- addF(simdPackage, "Mask8x16.StoreToBits", simdStoreMask(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x16FromBits", simdCvtVToMask(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x16.ToBits", simdCvtMaskToV(8, 16), sys.AMD64)
addF(simdPackage, "Mask8x32.AsInt8x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x32.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask8x32.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask8x32FromBits", simdLoadMask(8, 32), sys.AMD64)
- addF(simdPackage, "Mask8x32.StoreToBits", simdStoreMask(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x32FromBits", simdCvtVToMask(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x32.ToBits", simdCvtMaskToV(8, 32), sys.AMD64)
addF(simdPackage, "Mask8x64.AsInt8x64", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int8x64.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask8x64.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask8x64.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask8x64FromBits", simdLoadMask(8, 64), sys.AMD64)
- addF(simdPackage, "Mask8x64.StoreToBits", simdStoreMask(8, 64), sys.AMD64)
addF(simdPackage, "Mask8x64FromBits", simdCvtVToMask(8, 64), sys.AMD64)
addF(simdPackage, "Mask8x64.ToBits", simdCvtMaskToV(8, 64), sys.AMD64)
addF(simdPackage, "Mask16x8.AsInt16x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x8.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask16x8.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask16x8FromBits", simdLoadMask(16, 8), sys.AMD64)
- addF(simdPackage, "Mask16x8.StoreToBits", simdStoreMask(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x8FromBits", simdCvtVToMask(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x8.ToBits", simdCvtMaskToV(16, 8), sys.AMD64)
addF(simdPackage, "Mask16x16.AsInt16x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x16.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask16x16.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask16x16FromBits", simdLoadMask(16, 16), sys.AMD64)
- addF(simdPackage, "Mask16x16.StoreToBits", simdStoreMask(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x16FromBits", simdCvtVToMask(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x16.ToBits", simdCvtMaskToV(16, 16), sys.AMD64)
addF(simdPackage, "Mask16x32.AsInt16x32", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int16x32.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask16x32.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask16x32.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask16x32FromBits", simdLoadMask(16, 32), sys.AMD64)
- addF(simdPackage, "Mask16x32.StoreToBits", simdStoreMask(16, 32), sys.AMD64)
addF(simdPackage, "Mask16x32FromBits", simdCvtVToMask(16, 32), sys.AMD64)
addF(simdPackage, "Mask16x32.ToBits", simdCvtMaskToV(16, 32), sys.AMD64)
addF(simdPackage, "Mask32x4.AsInt32x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x4.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask32x4.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask32x4FromBits", simdLoadMask(32, 4), sys.AMD64)
- addF(simdPackage, "Mask32x4.StoreToBits", simdStoreMask(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x4FromBits", simdCvtVToMask(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x4.ToBits", simdCvtMaskToV(32, 4), sys.AMD64)
addF(simdPackage, "Mask32x8.AsInt32x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x8.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask32x8.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask32x8FromBits", simdLoadMask(32, 8), sys.AMD64)
- addF(simdPackage, "Mask32x8.StoreToBits", simdStoreMask(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x8FromBits", simdCvtVToMask(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x8.ToBits", simdCvtMaskToV(32, 8), sys.AMD64)
addF(simdPackage, "Mask32x16.AsInt32x16", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int32x16.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask32x16.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask32x16.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask32x16FromBits", simdLoadMask(32, 16), sys.AMD64)
- addF(simdPackage, "Mask32x16.StoreToBits", simdStoreMask(32, 16), sys.AMD64)
addF(simdPackage, "Mask32x16FromBits", simdCvtVToMask(32, 16), sys.AMD64)
addF(simdPackage, "Mask32x16.ToBits", simdCvtMaskToV(32, 16), sys.AMD64)
addF(simdPackage, "Mask64x2.AsInt64x2", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x2.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x2.And", opLen2(ssa.OpAndInt32x4, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Mask64x2.Or", opLen2(ssa.OpOrInt32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "LoadMask64x2FromBits", simdLoadMask(64, 2), sys.AMD64)
- addF(simdPackage, "Mask64x2.StoreToBits", simdStoreMask(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x2FromBits", simdCvtVToMask(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x2.ToBits", simdCvtMaskToV(64, 2), sys.AMD64)
addF(simdPackage, "Mask64x4.AsInt64x4", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x4.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x4.And", opLen2(ssa.OpAndInt32x8, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Mask64x4.Or", opLen2(ssa.OpOrInt32x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "LoadMask64x4FromBits", simdLoadMask(64, 4), sys.AMD64)
- addF(simdPackage, "Mask64x4.StoreToBits", simdStoreMask(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x4FromBits", simdCvtVToMask(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x4.ToBits", simdCvtMaskToV(64, 4), sys.AMD64)
addF(simdPackage, "Mask64x8.AsInt64x8", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Int64x8.asMask", func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value { return args[0] }, sys.AMD64)
addF(simdPackage, "Mask64x8.And", opLen2(ssa.OpAndInt32x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Mask64x8.Or", opLen2(ssa.OpOrInt32x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "LoadMask64x8FromBits", simdLoadMask(64, 8), sys.AMD64)
- addF(simdPackage, "Mask64x8.StoreToBits", simdStoreMask(64, 8), sys.AMD64)
addF(simdPackage, "Mask64x8FromBits", simdCvtVToMask(64, 8), sys.AMD64)
addF(simdPackage, "Mask64x8.ToBits", simdCvtMaskToV(64, 8), sys.AMD64)
}