aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCherry Mui <cherryyz@google.com>2025-12-29 11:30:33 -0500
committerCherry Mui <cherryyz@google.com>2025-12-29 16:20:42 -0800
commit76dddce29302681bdddafd4cbc27db66802414dd (patch)
treea3170ce19a1c23e87bc59e9b5374a172189da693
parent6ecdd2fc6e58c0cd2b788032a85f7845af10c922 (diff)
downloadgo-76dddce29302681bdddafd4cbc27db66802414dd.tar.xz
simd/archsimd: remove redundant suffix of ExtendLo operations
For methods like ExtendLo2ToInt64x2, the last "x2" is redundant, as it is already mentioned in "Lo2". Remove it, so it is just ExtendLo2ToInt64. Change-Id: I490afd818c40bb7a4ef15c249723895735bd6488 Reviewed-on: https://go-review.googlesource.com/c/go/+/733100 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdAMD64.rules40
-rw-r--r--src/cmd/compile/internal/ssa/_gen/simdgenericOps.go40
-rw-r--r--src/cmd/compile/internal/ssa/opGen.go80
-rw-r--r--src/cmd/compile/internal/ssa/rewriteAMD64.go40
-rw-r--r--src/cmd/compile/internal/ssagen/simdintrinsics.go40
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml40
-rw-r--r--src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml40
-rw-r--r--src/simd/archsimd/ops_amd64.go104
8 files changed, 212 insertions, 212 deletions
diff --git a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
index a7a6a3f7a1..8a5b70da30 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/simdAMD64.rules
@@ -376,26 +376,26 @@
(ExpandUint64x2 x mask) => (VPEXPANDQMasked128 x (VPMOVVec64x2ToM <types.TypeMask> mask))
(ExpandUint64x4 x mask) => (VPEXPANDQMasked256 x (VPMOVVec64x4ToM <types.TypeMask> mask))
(ExpandUint64x8 x mask) => (VPEXPANDQMasked512 x (VPMOVVec64x8ToM <types.TypeMask> mask))
-(ExtendLo2ToInt64x2Int8x16 ...) => (VPMOVSXBQ128 ...)
-(ExtendLo2ToInt64x2Int16x8 ...) => (VPMOVSXWQ128 ...)
-(ExtendLo2ToInt64x2Int32x4 ...) => (VPMOVSXDQ128 ...)
-(ExtendLo2ToUint64x2Uint8x16 ...) => (VPMOVZXBQ128 ...)
-(ExtendLo2ToUint64x2Uint16x8 ...) => (VPMOVZXWQ128 ...)
-(ExtendLo2ToUint64x2Uint32x4 ...) => (VPMOVZXDQ128 ...)
-(ExtendLo4ToInt32x4Int8x16 ...) => (VPMOVSXBD128 ...)
-(ExtendLo4ToInt32x4Int16x8 ...) => (VPMOVSXWD128 ...)
-(ExtendLo4ToInt64x4Int8x16 ...) => (VPMOVSXBQ256 ...)
-(ExtendLo4ToInt64x4Int16x8 ...) => (VPMOVSXWQ256 ...)
-(ExtendLo4ToUint32x4Uint8x16 ...) => (VPMOVZXBD128 ...)
-(ExtendLo4ToUint32x4Uint16x8 ...) => (VPMOVZXWD128 ...)
-(ExtendLo4ToUint64x4Uint8x16 ...) => (VPMOVZXBQ256 ...)
-(ExtendLo4ToUint64x4Uint16x8 ...) => (VPMOVZXWQ256 ...)
-(ExtendLo8ToInt16x8Int8x16 ...) => (VPMOVSXBW128 ...)
-(ExtendLo8ToInt32x8Int8x16 ...) => (VPMOVSXBD256 ...)
-(ExtendLo8ToInt64x8Int8x16 ...) => (VPMOVSXBQ512 ...)
-(ExtendLo8ToUint16x8Uint8x16 ...) => (VPMOVZXBW128 ...)
-(ExtendLo8ToUint32x8Uint8x16 ...) => (VPMOVZXBD256 ...)
-(ExtendLo8ToUint64x8Uint8x16 ...) => (VPMOVZXBQ512 ...)
+(ExtendLo2ToInt64Int8x16 ...) => (VPMOVSXBQ128 ...)
+(ExtendLo2ToInt64Int16x8 ...) => (VPMOVSXWQ128 ...)
+(ExtendLo2ToInt64Int32x4 ...) => (VPMOVSXDQ128 ...)
+(ExtendLo2ToUint64Uint8x16 ...) => (VPMOVZXBQ128 ...)
+(ExtendLo2ToUint64Uint16x8 ...) => (VPMOVZXWQ128 ...)
+(ExtendLo2ToUint64Uint32x4 ...) => (VPMOVZXDQ128 ...)
+(ExtendLo4ToInt32Int8x16 ...) => (VPMOVSXBD128 ...)
+(ExtendLo4ToInt32Int16x8 ...) => (VPMOVSXWD128 ...)
+(ExtendLo4ToInt64Int8x16 ...) => (VPMOVSXBQ256 ...)
+(ExtendLo4ToInt64Int16x8 ...) => (VPMOVSXWQ256 ...)
+(ExtendLo4ToUint32Uint8x16 ...) => (VPMOVZXBD128 ...)
+(ExtendLo4ToUint32Uint16x8 ...) => (VPMOVZXWD128 ...)
+(ExtendLo4ToUint64Uint8x16 ...) => (VPMOVZXBQ256 ...)
+(ExtendLo4ToUint64Uint16x8 ...) => (VPMOVZXWQ256 ...)
+(ExtendLo8ToInt16Int8x16 ...) => (VPMOVSXBW128 ...)
+(ExtendLo8ToInt32Int8x16 ...) => (VPMOVSXBD256 ...)
+(ExtendLo8ToInt64Int8x16 ...) => (VPMOVSXBQ512 ...)
+(ExtendLo8ToUint16Uint8x16 ...) => (VPMOVZXBW128 ...)
+(ExtendLo8ToUint32Uint8x16 ...) => (VPMOVZXBD256 ...)
+(ExtendLo8ToUint64Uint8x16 ...) => (VPMOVZXBQ512 ...)
(ExtendToInt16Int8x16 ...) => (VPMOVSXBW256 ...)
(ExtendToInt16Int8x32 ...) => (VPMOVSXBW512 ...)
(ExtendToInt32Int8x16 ...) => (VPMOVSXBD512 ...)
diff --git a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
index fd43f3d61f..af1007cd54 100644
--- a/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/simdgenericOps.go
@@ -364,26 +364,26 @@ func simdGenericOps() []opData {
{name: "ExpandUint64x2", argLength: 2, commutative: false},
{name: "ExpandUint64x4", argLength: 2, commutative: false},
{name: "ExpandUint64x8", argLength: 2, commutative: false},
- {name: "ExtendLo2ToInt64x2Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo2ToInt64x2Int16x8", argLength: 1, commutative: false},
- {name: "ExtendLo2ToInt64x2Int32x4", argLength: 1, commutative: false},
- {name: "ExtendLo2ToUint64x2Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo2ToUint64x2Uint16x8", argLength: 1, commutative: false},
- {name: "ExtendLo2ToUint64x2Uint32x4", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt32x4Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt32x4Int16x8", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt64x4Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToInt64x4Int16x8", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint32x4Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint32x4Uint16x8", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint64x4Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo4ToUint64x4Uint16x8", argLength: 1, commutative: false},
- {name: "ExtendLo8ToInt16x8Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToInt32x8Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToInt64x8Int8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToUint16x8Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToUint32x8Uint8x16", argLength: 1, commutative: false},
- {name: "ExtendLo8ToUint64x8Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToInt64Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToInt64Int16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToInt64Int32x4", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToUint64Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToUint64Uint16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo2ToUint64Uint32x4", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt32Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt32Int16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt64Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToInt64Int16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint32Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint32Uint16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint64Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo4ToUint64Uint16x8", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToInt16Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToInt32Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToInt64Int8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToUint16Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToUint32Uint8x16", argLength: 1, commutative: false},
+ {name: "ExtendLo8ToUint64Uint8x16", argLength: 1, commutative: false},
{name: "ExtendToInt16Int8x16", argLength: 1, commutative: false},
{name: "ExtendToInt16Int8x32", argLength: 1, commutative: false},
{name: "ExtendToInt32Int8x16", argLength: 1, commutative: false},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index f318adfd2f..d2ba15f740 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -6518,26 +6518,26 @@ const (
OpExpandUint64x2
OpExpandUint64x4
OpExpandUint64x8
- OpExtendLo2ToInt64x2Int8x16
- OpExtendLo2ToInt64x2Int16x8
- OpExtendLo2ToInt64x2Int32x4
- OpExtendLo2ToUint64x2Uint8x16
- OpExtendLo2ToUint64x2Uint16x8
- OpExtendLo2ToUint64x2Uint32x4
- OpExtendLo4ToInt32x4Int8x16
- OpExtendLo4ToInt32x4Int16x8
- OpExtendLo4ToInt64x4Int8x16
- OpExtendLo4ToInt64x4Int16x8
- OpExtendLo4ToUint32x4Uint8x16
- OpExtendLo4ToUint32x4Uint16x8
- OpExtendLo4ToUint64x4Uint8x16
- OpExtendLo4ToUint64x4Uint16x8
- OpExtendLo8ToInt16x8Int8x16
- OpExtendLo8ToInt32x8Int8x16
- OpExtendLo8ToInt64x8Int8x16
- OpExtendLo8ToUint16x8Uint8x16
- OpExtendLo8ToUint32x8Uint8x16
- OpExtendLo8ToUint64x8Uint8x16
+ OpExtendLo2ToInt64Int8x16
+ OpExtendLo2ToInt64Int16x8
+ OpExtendLo2ToInt64Int32x4
+ OpExtendLo2ToUint64Uint8x16
+ OpExtendLo2ToUint64Uint16x8
+ OpExtendLo2ToUint64Uint32x4
+ OpExtendLo4ToInt32Int8x16
+ OpExtendLo4ToInt32Int16x8
+ OpExtendLo4ToInt64Int8x16
+ OpExtendLo4ToInt64Int16x8
+ OpExtendLo4ToUint32Uint8x16
+ OpExtendLo4ToUint32Uint16x8
+ OpExtendLo4ToUint64Uint8x16
+ OpExtendLo4ToUint64Uint16x8
+ OpExtendLo8ToInt16Int8x16
+ OpExtendLo8ToInt32Int8x16
+ OpExtendLo8ToInt64Int8x16
+ OpExtendLo8ToUint16Uint8x16
+ OpExtendLo8ToUint32Uint8x16
+ OpExtendLo8ToUint64Uint8x16
OpExtendToInt16Int8x16
OpExtendToInt16Int8x32
OpExtendToInt32Int8x16
@@ -90896,102 +90896,102 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
- name: "ExtendLo2ToInt64x2Int8x16",
+ name: "ExtendLo2ToInt64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToInt64x2Int16x8",
+ name: "ExtendLo2ToInt64Int16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToInt64x2Int32x4",
+ name: "ExtendLo2ToInt64Int32x4",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToUint64x2Uint8x16",
+ name: "ExtendLo2ToUint64Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToUint64x2Uint16x8",
+ name: "ExtendLo2ToUint64Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo2ToUint64x2Uint32x4",
+ name: "ExtendLo2ToUint64Uint32x4",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt32x4Int8x16",
+ name: "ExtendLo4ToInt32Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt32x4Int16x8",
+ name: "ExtendLo4ToInt32Int16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt64x4Int8x16",
+ name: "ExtendLo4ToInt64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToInt64x4Int16x8",
+ name: "ExtendLo4ToInt64Int16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint32x4Uint8x16",
+ name: "ExtendLo4ToUint32Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint32x4Uint16x8",
+ name: "ExtendLo4ToUint32Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint64x4Uint8x16",
+ name: "ExtendLo4ToUint64Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo4ToUint64x4Uint16x8",
+ name: "ExtendLo4ToUint64Uint16x8",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToInt16x8Int8x16",
+ name: "ExtendLo8ToInt16Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToInt32x8Int8x16",
+ name: "ExtendLo8ToInt32Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToInt64x8Int8x16",
+ name: "ExtendLo8ToInt64Int8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToUint16x8Uint8x16",
+ name: "ExtendLo8ToUint16Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToUint32x8Uint8x16",
+ name: "ExtendLo8ToUint32Uint8x16",
argLen: 1,
generic: true,
},
{
- name: "ExtendLo8ToUint64x8Uint8x16",
+ name: "ExtendLo8ToUint64Uint8x16",
argLen: 1,
generic: true,
},
diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go
index 5fed6a8063..0c04410074 100644
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@@ -3274,64 +3274,64 @@ func rewriteValueAMD64(v *Value) bool {
return rewriteValueAMD64_OpExpandUint8x32(v)
case OpExpandUint8x64:
return rewriteValueAMD64_OpExpandUint8x64(v)
- case OpExtendLo2ToInt64x2Int16x8:
+ case OpExtendLo2ToInt64Int16x8:
v.Op = OpAMD64VPMOVSXWQ128
return true
- case OpExtendLo2ToInt64x2Int32x4:
+ case OpExtendLo2ToInt64Int32x4:
v.Op = OpAMD64VPMOVSXDQ128
return true
- case OpExtendLo2ToInt64x2Int8x16:
+ case OpExtendLo2ToInt64Int8x16:
v.Op = OpAMD64VPMOVSXBQ128
return true
- case OpExtendLo2ToUint64x2Uint16x8:
+ case OpExtendLo2ToUint64Uint16x8:
v.Op = OpAMD64VPMOVZXWQ128
return true
- case OpExtendLo2ToUint64x2Uint32x4:
+ case OpExtendLo2ToUint64Uint32x4:
v.Op = OpAMD64VPMOVZXDQ128
return true
- case OpExtendLo2ToUint64x2Uint8x16:
+ case OpExtendLo2ToUint64Uint8x16:
v.Op = OpAMD64VPMOVZXBQ128
return true
- case OpExtendLo4ToInt32x4Int16x8:
+ case OpExtendLo4ToInt32Int16x8:
v.Op = OpAMD64VPMOVSXWD128
return true
- case OpExtendLo4ToInt32x4Int8x16:
+ case OpExtendLo4ToInt32Int8x16:
v.Op = OpAMD64VPMOVSXBD128
return true
- case OpExtendLo4ToInt64x4Int16x8:
+ case OpExtendLo4ToInt64Int16x8:
v.Op = OpAMD64VPMOVSXWQ256
return true
- case OpExtendLo4ToInt64x4Int8x16:
+ case OpExtendLo4ToInt64Int8x16:
v.Op = OpAMD64VPMOVSXBQ256
return true
- case OpExtendLo4ToUint32x4Uint16x8:
+ case OpExtendLo4ToUint32Uint16x8:
v.Op = OpAMD64VPMOVZXWD128
return true
- case OpExtendLo4ToUint32x4Uint8x16:
+ case OpExtendLo4ToUint32Uint8x16:
v.Op = OpAMD64VPMOVZXBD128
return true
- case OpExtendLo4ToUint64x4Uint16x8:
+ case OpExtendLo4ToUint64Uint16x8:
v.Op = OpAMD64VPMOVZXWQ256
return true
- case OpExtendLo4ToUint64x4Uint8x16:
+ case OpExtendLo4ToUint64Uint8x16:
v.Op = OpAMD64VPMOVZXBQ256
return true
- case OpExtendLo8ToInt16x8Int8x16:
+ case OpExtendLo8ToInt16Int8x16:
v.Op = OpAMD64VPMOVSXBW128
return true
- case OpExtendLo8ToInt32x8Int8x16:
+ case OpExtendLo8ToInt32Int8x16:
v.Op = OpAMD64VPMOVSXBD256
return true
- case OpExtendLo8ToInt64x8Int8x16:
+ case OpExtendLo8ToInt64Int8x16:
v.Op = OpAMD64VPMOVSXBQ512
return true
- case OpExtendLo8ToUint16x8Uint8x16:
+ case OpExtendLo8ToUint16Uint8x16:
v.Op = OpAMD64VPMOVZXBW128
return true
- case OpExtendLo8ToUint32x8Uint8x16:
+ case OpExtendLo8ToUint32Uint8x16:
v.Op = OpAMD64VPMOVZXBD256
return true
- case OpExtendLo8ToUint64x8Uint8x16:
+ case OpExtendLo8ToUint64Uint8x16:
v.Op = OpAMD64VPMOVZXBQ512
return true
case OpExtendToInt16Int8x16:
diff --git a/src/cmd/compile/internal/ssagen/simdintrinsics.go b/src/cmd/compile/internal/ssagen/simdintrinsics.go
index 8b9fe3afdc..5a95761228 100644
--- a/src/cmd/compile/internal/ssagen/simdintrinsics.go
+++ b/src/cmd/compile/internal/ssagen/simdintrinsics.go
@@ -388,26 +388,26 @@ func simdIntrinsics(addF func(pkg, fn string, b intrinsicBuilder, archFamilies .
addF(simdPackage, "Uint64x2.Expand", opLen2(ssa.OpExpandUint64x2, types.TypeVec128), sys.AMD64)
addF(simdPackage, "Uint64x4.Expand", opLen2(ssa.OpExpandUint64x4, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Uint64x8.Expand", opLen2(ssa.OpExpandUint64x8, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x8.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int32x4.ExtendLo2ToInt64x2", opLen1(ssa.OpExtendLo2ToInt64x2Int32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x8.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint32x4.ExtendLo2ToUint64x2", opLen1(ssa.OpExtendLo2ToUint64x2Uint32x4, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int16x8.ExtendLo4ToInt32x4", opLen1(ssa.OpExtendLo4ToInt32x4Int16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int16x8.ExtendLo4ToInt64x4", opLen1(ssa.OpExtendLo4ToInt64x4Int16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint16x8.ExtendLo4ToUint32x4", opLen1(ssa.OpExtendLo4ToUint32x4Uint16x8, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint16x8.ExtendLo4ToUint64x4", opLen1(ssa.OpExtendLo4ToUint64x4Uint16x8, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo8ToInt16x8", opLen1(ssa.OpExtendLo8ToInt16x8Int8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo8ToInt32x8", opLen1(ssa.OpExtendLo8ToInt32x8Int8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Int8x16.ExtendLo8ToInt64x8", opLen1(ssa.OpExtendLo8ToInt64x8Int8x16, types.TypeVec512), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo8ToUint16x8", opLen1(ssa.OpExtendLo8ToUint16x8Uint8x16, types.TypeVec128), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo8ToUint32x8", opLen1(ssa.OpExtendLo8ToUint32x8Uint8x16, types.TypeVec256), sys.AMD64)
- addF(simdPackage, "Uint8x16.ExtendLo8ToUint64x8", opLen1(ssa.OpExtendLo8ToUint64x8Uint8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int32x4.ExtendLo2ToInt64", opLen1(ssa.OpExtendLo2ToInt64Int32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint32x4.ExtendLo2ToUint64", opLen1(ssa.OpExtendLo2ToUint64Uint32x4, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int16x8.ExtendLo4ToInt32", opLen1(ssa.OpExtendLo4ToInt32Int16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int16x8.ExtendLo4ToInt64", opLen1(ssa.OpExtendLo4ToInt64Int16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint16x8.ExtendLo4ToUint32", opLen1(ssa.OpExtendLo4ToUint32Uint16x8, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint16x8.ExtendLo4ToUint64", opLen1(ssa.OpExtendLo4ToUint64Uint16x8, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo8ToInt16", opLen1(ssa.OpExtendLo8ToInt16Int8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo8ToInt32", opLen1(ssa.OpExtendLo8ToInt32Int8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Int8x16.ExtendLo8ToInt64", opLen1(ssa.OpExtendLo8ToInt64Int8x16, types.TypeVec512), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo8ToUint16", opLen1(ssa.OpExtendLo8ToUint16Uint8x16, types.TypeVec128), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo8ToUint32", opLen1(ssa.OpExtendLo8ToUint32Uint8x16, types.TypeVec256), sys.AMD64)
+ addF(simdPackage, "Uint8x16.ExtendLo8ToUint64", opLen1(ssa.OpExtendLo8ToUint64Uint8x16, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x16, types.TypeVec256), sys.AMD64)
addF(simdPackage, "Int8x32.ExtendToInt16", opLen1(ssa.OpExtendToInt16Int8x32, types.TypeVec512), sys.AMD64)
addF(simdPackage, "Int8x16.ExtendToInt32", opLen1(ssa.OpExtendToInt32Int8x16, types.TypeVec512), sys.AMD64)
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
index 88e4840920..698e6d9956 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/categories.yaml
@@ -135,83 +135,83 @@
documentation: !string |-
// NAME zero-extends element values to uint64.
# low-part only Int <-> Int conversions
-- go: ExtendLo8ToUint16x8
+- go: ExtendLo8ToUint16
commutative: false
documentation: !string |-
// NAME zero-extends 8 lowest vector element values to uint16.
-- go: ExtendLo8ToInt16x8
+- go: ExtendLo8ToInt16
commutative: false
documentation: !string |-
// NAME sign-extends 8 lowest vector element values to int16.
-- go: ExtendLo4ToUint32x4
+- go: ExtendLo4ToUint32
commutative: false
documentation: !string |-
// NAME zero-extends 4 lowest vector element values to uint32.
-- go: ExtendLo4ToInt32x4
+- go: ExtendLo4ToInt32
commutative: false
documentation: !string |-
// NAME sign-extends 4 lowest vector element values to int32.
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
commutative: false
documentation: !string |-
// NAME zero-extends 2 lowest vector element values to uint64.
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
commutative: false
documentation: !string |-
// NAME sign-extends 2 lowest vector element values to int64.
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
commutative: false
documentation: !string |-
// NAME zero-extends 2 lowest vector element values to uint64.
-- go: ExtendLo4ToUint64x4
+- go: ExtendLo4ToUint64
commutative: false
documentation: !string |-
// NAME zero-extends 4 lowest vector element values to uint64.
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
commutative: false
documentation: !string |-
// NAME sign-extends 2 lowest vector element values to int64.
-- go: ExtendLo4ToInt64x4
+- go: ExtendLo4ToInt64
commutative: false
documentation: !string |-
// NAME sign-extends 4 lowest vector element values to int64.
-- go: ExtendLo4ToUint32x4
+- go: ExtendLo4ToUint32
commutative: false
documentation: !string |-
// NAME zero-extends 4 lowest vector element values to uint32.
-- go: ExtendLo8ToUint32x8
+- go: ExtendLo8ToUint32
commutative: false
documentation: !string |-
// NAME zero-extends 8 lowest vector element values to uint32.
-- go: ExtendLo4ToInt32x4
+- go: ExtendLo4ToInt32
commutative: false
documentation: !string |-
// NAME sign-extends 4 lowest vector element values to int32.
-- go: ExtendLo8ToInt32x8
+- go: ExtendLo8ToInt32
commutative: false
documentation: !string |-
// NAME sign-extends 8 lowest vector element values to int32.
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
commutative: false
documentation: !string |-
// NAME zero-extends 2 lowest vector element values to uint64.
-- go: ExtendLo4ToUint64x4
+- go: ExtendLo4ToUint64
commutative: false
documentation: !string |-
// NAME zero-extends 4 lowest vector element values to uint64.
-- go: ExtendLo8ToUint64x8
+- go: ExtendLo8ToUint64
commutative: false
documentation: !string |-
// NAME zero-extends 8 lowest vector element values to uint64.
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
commutative: false
documentation: !string |-
// NAME sign-extends 2 lowest vector element values to int64.
-- go: ExtendLo4ToInt64x4
+- go: ExtendLo4ToInt64
commutative: false
documentation: !string |-
// NAME sign-extends 4 lowest vector element values to int64.
-- go: ExtendLo8ToInt64x8
+- go: ExtendLo8ToInt64
commutative: false
documentation: !string |-
// NAME sign-extends 8 lowest vector element values to int64.
diff --git a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
index f436be23ef..2f19d12616 100644
--- a/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
+++ b/src/simd/archsimd/_gen/simdgen/ops/Converts/go.yaml
@@ -491,7 +491,7 @@
# low-part only conversions.
# uint8->uint16
-- go: ExtendLo8ToUint16x8
+- go: ExtendLo8ToUint16
regexpTag: "convert"
asm: "VPMOVZXBW"
in:
@@ -499,7 +499,7 @@
out:
- *u16x8
# int8->int16
-- go: ExtendLo8ToInt16x8
+- go: ExtendLo8ToInt16
regexpTag: "convert"
asm: "VPMOVSXBW"
in:
@@ -507,7 +507,7 @@
out:
- *i16x8
# uint16->uint32
-- go: ExtendLo4ToUint32x4
+- go: ExtendLo4ToUint32
regexpTag: "convert"
asm: "VPMOVZXWD"
in:
@@ -515,7 +515,7 @@
out:
- *u32x4
# int16->int32
-- go: ExtendLo4ToInt32x4
+- go: ExtendLo4ToInt32
regexpTag: "convert"
asm: "VPMOVSXWD"
in:
@@ -523,7 +523,7 @@
out:
- *i32x4
# uint32 -> uint64
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
regexpTag: "convert"
asm: "VPMOVZXDQ"
in:
@@ -534,7 +534,7 @@
elemBits: 64
bits: 128
# int32 -> int64
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
regexpTag: "convert"
asm: "VPMOVSXDQ"
in:
@@ -545,14 +545,14 @@
elemBits: 64
bits: 128
# uint16 -> uint64
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
regexpTag: "convert"
asm: "VPMOVZXWQ"
in:
- *u16x8
out:
- *u64x2
-- go: ExtendLo4ToUint64x4
+- go: ExtendLo4ToUint64
regexpTag: "convert"
asm: "VPMOVZXWQ"
in:
@@ -560,14 +560,14 @@
out:
- *u64x4
# int16 -> int64
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
regexpTag: "convert"
asm: "VPMOVSXWQ"
in:
- *i16x8
out:
- *i64x2
-- go: ExtendLo4ToInt64x4
+- go: ExtendLo4ToInt64
regexpTag: "convert"
asm: "VPMOVSXWQ"
in:
@@ -575,14 +575,14 @@
out:
- *i64x4
# uint8 -> uint32
-- go: ExtendLo4ToUint32x4
+- go: ExtendLo4ToUint32
regexpTag: "convert"
asm: "VPMOVZXBD"
in:
- *u8x16
out:
- *u32x4
-- go: ExtendLo8ToUint32x8
+- go: ExtendLo8ToUint32
regexpTag: "convert"
asm: "VPMOVZXBD"
in:
@@ -590,14 +590,14 @@
out:
- *u32x8
# int8 -> int32
-- go: ExtendLo4ToInt32x4
+- go: ExtendLo4ToInt32
regexpTag: "convert"
asm: "VPMOVSXBD"
in:
- *i8x16
out:
- *i32x4
-- go: ExtendLo8ToInt32x8
+- go: ExtendLo8ToInt32
regexpTag: "convert"
asm: "VPMOVSXBD"
in:
@@ -605,21 +605,21 @@
out:
- *i32x8
# uint8 -> uint64
-- go: ExtendLo2ToUint64x2
+- go: ExtendLo2ToUint64
regexpTag: "convert"
asm: "VPMOVZXBQ"
in:
- *u8x16
out:
- *u64x2
-- go: ExtendLo4ToUint64x4
+- go: ExtendLo4ToUint64
regexpTag: "convert"
asm: "VPMOVZXBQ"
in:
- *u8x16
out:
- *u64x4
-- go: ExtendLo8ToUint64x8
+- go: ExtendLo8ToUint64
regexpTag: "convert"
asm: "VPMOVZXBQ"
in:
@@ -627,21 +627,21 @@
out:
- *u64x8
# int8 -> int64
-- go: ExtendLo2ToInt64x2
+- go: ExtendLo2ToInt64
regexpTag: "convert"
asm: "VPMOVSXBQ"
in:
- *i8x16
out:
- *i64x2
-- go: ExtendLo4ToInt64x4
+- go: ExtendLo4ToInt64
regexpTag: "convert"
asm: "VPMOVSXBQ"
in:
- *i8x16
out:
- *i64x4
-- go: ExtendLo8ToInt64x8
+- go: ExtendLo8ToInt64
regexpTag: "convert"
asm: "VPMOVSXBQ"
in:
diff --git a/src/simd/archsimd/ops_amd64.go b/src/simd/archsimd/ops_amd64.go
index 20ae3b1cc2..522a98caea 100644
--- a/src/simd/archsimd/ops_amd64.go
+++ b/src/simd/archsimd/ops_amd64.go
@@ -2325,129 +2325,129 @@ func (x Uint64x4) Expand(mask Mask64x4) Uint64x4
// Asm: VPEXPANDQ, CPU Feature: AVX512
func (x Uint64x8) Expand(mask Mask64x8) Uint64x8
-/* ExtendLo2ToInt64x2 */
+/* ExtendLo2ToInt64 */
-// ExtendLo2ToInt64x2 sign-extends 2 lowest vector element values to int64.
+// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX
-func (x Int8x16) ExtendLo2ToInt64x2() Int64x2
+func (x Int8x16) ExtendLo2ToInt64() Int64x2
-// ExtendLo2ToInt64x2 sign-extends 2 lowest vector element values to int64.
+// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX
-func (x Int16x8) ExtendLo2ToInt64x2() Int64x2
+func (x Int16x8) ExtendLo2ToInt64() Int64x2
-// ExtendLo2ToInt64x2 sign-extends 2 lowest vector element values to int64.
+// ExtendLo2ToInt64 sign-extends 2 lowest vector element values to int64.
//
// Asm: VPMOVSXDQ, CPU Feature: AVX
-func (x Int32x4) ExtendLo2ToInt64x2() Int64x2
+func (x Int32x4) ExtendLo2ToInt64() Int64x2
-/* ExtendLo2ToUint64x2 */
+/* ExtendLo2ToUint64 */
-// ExtendLo2ToUint64x2 zero-extends 2 lowest vector element values to uint64.
+// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX
-func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2
+func (x Uint8x16) ExtendLo2ToUint64() Uint64x2
-// ExtendLo2ToUint64x2 zero-extends 2 lowest vector element values to uint64.
+// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX
-func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2
+func (x Uint16x8) ExtendLo2ToUint64() Uint64x2
-// ExtendLo2ToUint64x2 zero-extends 2 lowest vector element values to uint64.
+// ExtendLo2ToUint64 zero-extends 2 lowest vector element values to uint64.
//
// Asm: VPMOVZXDQ, CPU Feature: AVX
-func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2
+func (x Uint32x4) ExtendLo2ToUint64() Uint64x2
-/* ExtendLo4ToInt32x4 */
+/* ExtendLo4ToInt32 */
-// ExtendLo4ToInt32x4 sign-extends 4 lowest vector element values to int32.
+// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX
-func (x Int8x16) ExtendLo4ToInt32x4() Int32x4
+func (x Int8x16) ExtendLo4ToInt32() Int32x4
-// ExtendLo4ToInt32x4 sign-extends 4 lowest vector element values to int32.
+// ExtendLo4ToInt32 sign-extends 4 lowest vector element values to int32.
//
// Asm: VPMOVSXWD, CPU Feature: AVX
-func (x Int16x8) ExtendLo4ToInt32x4() Int32x4
+func (x Int16x8) ExtendLo4ToInt32() Int32x4
-/* ExtendLo4ToInt64x4 */
+/* ExtendLo4ToInt64 */
-// ExtendLo4ToInt64x4 sign-extends 4 lowest vector element values to int64.
+// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX2
-func (x Int8x16) ExtendLo4ToInt64x4() Int64x4
+func (x Int8x16) ExtendLo4ToInt64() Int64x4
-// ExtendLo4ToInt64x4 sign-extends 4 lowest vector element values to int64.
+// ExtendLo4ToInt64 sign-extends 4 lowest vector element values to int64.
//
// Asm: VPMOVSXWQ, CPU Feature: AVX2
-func (x Int16x8) ExtendLo4ToInt64x4() Int64x4
+func (x Int16x8) ExtendLo4ToInt64() Int64x4
-/* ExtendLo4ToUint32x4 */
+/* ExtendLo4ToUint32 */
-// ExtendLo4ToUint32x4 zero-extends 4 lowest vector element values to uint32.
+// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX
-func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4
+func (x Uint8x16) ExtendLo4ToUint32() Uint32x4
-// ExtendLo4ToUint32x4 zero-extends 4 lowest vector element values to uint32.
+// ExtendLo4ToUint32 zero-extends 4 lowest vector element values to uint32.
//
// Asm: VPMOVZXWD, CPU Feature: AVX
-func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4
+func (x Uint16x8) ExtendLo4ToUint32() Uint32x4
-/* ExtendLo4ToUint64x4 */
+/* ExtendLo4ToUint64 */
-// ExtendLo4ToUint64x4 zero-extends 4 lowest vector element values to uint64.
+// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX2
-func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4
+func (x Uint8x16) ExtendLo4ToUint64() Uint64x4
-// ExtendLo4ToUint64x4 zero-extends 4 lowest vector element values to uint64.
+// ExtendLo4ToUint64 zero-extends 4 lowest vector element values to uint64.
//
// Asm: VPMOVZXWQ, CPU Feature: AVX2
-func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4
+func (x Uint16x8) ExtendLo4ToUint64() Uint64x4
-/* ExtendLo8ToInt16x8 */
+/* ExtendLo8ToInt16 */
-// ExtendLo8ToInt16x8 sign-extends 8 lowest vector element values to int16.
+// ExtendLo8ToInt16 sign-extends 8 lowest vector element values to int16.
//
// Asm: VPMOVSXBW, CPU Feature: AVX
-func (x Int8x16) ExtendLo8ToInt16x8() Int16x8
+func (x Int8x16) ExtendLo8ToInt16() Int16x8
-/* ExtendLo8ToInt32x8 */
+/* ExtendLo8ToInt32 */
-// ExtendLo8ToInt32x8 sign-extends 8 lowest vector element values to int32.
+// ExtendLo8ToInt32 sign-extends 8 lowest vector element values to int32.
//
// Asm: VPMOVSXBD, CPU Feature: AVX2
-func (x Int8x16) ExtendLo8ToInt32x8() Int32x8
+func (x Int8x16) ExtendLo8ToInt32() Int32x8
-/* ExtendLo8ToInt64x8 */
+/* ExtendLo8ToInt64 */
-// ExtendLo8ToInt64x8 sign-extends 8 lowest vector element values to int64.
+// ExtendLo8ToInt64 sign-extends 8 lowest vector element values to int64.
//
// Asm: VPMOVSXBQ, CPU Feature: AVX512
-func (x Int8x16) ExtendLo8ToInt64x8() Int64x8
+func (x Int8x16) ExtendLo8ToInt64() Int64x8
-/* ExtendLo8ToUint16x8 */
+/* ExtendLo8ToUint16 */
-// ExtendLo8ToUint16x8 zero-extends 8 lowest vector element values to uint16.
+// ExtendLo8ToUint16 zero-extends 8 lowest vector element values to uint16.
//
// Asm: VPMOVZXBW, CPU Feature: AVX
-func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8
+func (x Uint8x16) ExtendLo8ToUint16() Uint16x8
-/* ExtendLo8ToUint32x8 */
+/* ExtendLo8ToUint32 */
-// ExtendLo8ToUint32x8 zero-extends 8 lowest vector element values to uint32.
+// ExtendLo8ToUint32 zero-extends 8 lowest vector element values to uint32.
//
// Asm: VPMOVZXBD, CPU Feature: AVX2
-func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8
+func (x Uint8x16) ExtendLo8ToUint32() Uint32x8
-/* ExtendLo8ToUint64x8 */
+/* ExtendLo8ToUint64 */
-// ExtendLo8ToUint64x8 zero-extends 8 lowest vector element values to uint64.
+// ExtendLo8ToUint64 zero-extends 8 lowest vector element values to uint64.
//
// Asm: VPMOVZXBQ, CPU Feature: AVX512
-func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8
+func (x Uint8x16) ExtendLo8ToUint64() Uint64x8
/* ExtendToInt16 */