aboutsummaryrefslogtreecommitdiff
path: root/src/simd/shuffles_amd64.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/simd/shuffles_amd64.go')
-rw-r--r--src/simd/shuffles_amd64.go798
1 files changed, 544 insertions, 254 deletions
diff --git a/src/simd/shuffles_amd64.go b/src/simd/shuffles_amd64.go
index 68c840730b..c46a2d06fe 100644
--- a/src/simd/shuffles_amd64.go
+++ b/src/simd/shuffles_amd64.go
@@ -44,6 +44,16 @@ const (
_HHHH // a:y, b:y, c:y, d:y
)
+// These constants represent the source pattern for the four parameters
+// (a, b, c, d) passed to SelectFromPair and SelectFromPairGrouped for
+// two-element vectors.
+const (
+ _LL = iota
+ _HL
+ _LH
+ _HH
+)
+
// SelectFromPair returns the selection of four elements from the two
// vectors x and y, where selector values in the range 0-3 specify
// elements from x and values in the range 4-7 specify the 0-3 elements
@@ -72,53 +82,53 @@ func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -144,53 +154,53 @@ func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -216,53 +226,53 @@ func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -291,53 +301,53 @@ func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -366,53 +376,53 @@ func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -441,53 +451,53 @@ func (x Float32x8) SelectFromPairGrouped(a, b, c, d uint8, y Float32x8) Float32x
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -511,53 +521,53 @@ func (x Int32x16) SelectFromPairGrouped(a, b, c, d uint8, y Int32x16) Int32x16 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -581,53 +591,53 @@ func (x Uint32x16) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x16) Uint32x1
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -651,59 +661,339 @@ func (x Float32x16) SelectFromPairGrouped(a, b, c, d uint8, y Float32x16) Float3
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
-// cscimm converts the 4 vector element indices into a single
+// cscimm4 converts the 4 vector element indices into a single
// uint8 for use as an immediate.
-func cscimm(a, b, c, d uint8) uint8 {
+func cscimm4(a, b, c, d uint8) uint8 {
return uint8(a + b<<2 + c<<4 + d<<6)
}
+
+// cscimm2 converts the 2 vector element indices into a single
+// uint8 for use as an immediate.
+func cscimm2(a, b uint8) uint8 {
+ return uint8(a + b<<1)
+}
+
+// cscimm2g2 converts the 2 vector element indices into a single
+// uint8 for use as an immediate, but duplicated for VSHUFPD
+// to emulate grouped behavior of VSHUFPS
+func cscimm2g2(a, b uint8) uint8 {
+ g := cscimm2(a, b)
+ return g + g<<2
+}
+
+// cscimm2g4 converts the 2 vector element indices into a single
+// uint8 for use as an immediate, but with four copies for VSHUFPD
+// to emulate grouped behavior of VSHUFPS
+func cscimm2g4(a, b uint8) uint8 {
+ g := cscimm2g2(a, b)
+ return g + g<<4
+}
+
+// SelectFromPair returns the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Uint64x2) SelectFromPair(a, b uint8, y Uint64x2) Uint64x2 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstant(cscimm2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstant(cscimm2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstant(cscimm2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstant(cscimm2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the two 128-bit halves of
+// the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Uint64x4) SelectFromPairGrouped(a, b uint8, y Uint64x4) Uint64x4 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the four 128-bit subvectors
+// of the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX512
+func (x Uint64x8) SelectFromPairGrouped(a, b uint8, y Uint64x8) Uint64x8 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPair returns the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Float64x2) SelectFromPair(a, b uint8, y Float64x2) Float64x2 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstant(cscimm2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstant(cscimm2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstant(cscimm2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstant(cscimm2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the two 128-bit halves of
+// the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Float64x4) SelectFromPairGrouped(a, b uint8, y Float64x4) Float64x4 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the four 128-bit subvectors
+// of the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX512
+func (x Float64x8) SelectFromPairGrouped(a, b uint8, y Float64x8) Float64x8 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPair returns the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Int64x2) SelectFromPair(a, b uint8, y Int64x2) Int64x2 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstant(cscimm2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstant(cscimm2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstant(cscimm2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstant(cscimm2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the two 128-bit halves of
+// the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Int64x4) SelectFromPairGrouped(a, b uint8, y Int64x4) Int64x4 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the four 128-bit subvectors
+// of the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX512
+func (x Int64x8) SelectFromPairGrouped(a, b uint8, y Int64x8) Int64x8 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}