aboutsummaryrefslogtreecommitdiff
path: root/src/simd
diff options
context:
space:
mode:
authorDavid Chase <drchase@google.com>2025-09-20 16:52:07 -0400
committerDavid Chase <drchase@google.com>2025-09-26 13:11:24 -0700
commitea3b2ecd2878a694f9f42011eccb1312feb82bca (patch)
tree88b9d9075fadcb19dc676e070232b74d3e6b1d0e /src/simd
parent25c36b95d1523f22d4c46ec237acc03e00540e0a (diff)
downloadgo-ea3b2ecd2878a694f9f42011eccb1312feb82bca.tar.xz
[dev.simd] cmd/compile, simd: add 64-bit select-from-pair methods
these are in the same style as the 32-bit select-from-pair, including the grouped variant. This does not quite capture the full awesome power of VSHUFPD where it can select differently in each group; that will be some other method, that is more complex. Change-Id: I807ddd7c1256103b5b0d7c5d60bd70b185e3aaf0 Reviewed-on: https://go-review.googlesource.com/c/go/+/705695 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
Diffstat (limited to 'src/simd')
-rw-r--r--src/simd/internal/simd_test/simd_test.go120
-rw-r--r--src/simd/pkginternal_test.go112
-rw-r--r--src/simd/shuffles_amd64.go798
3 files changed, 717 insertions, 313 deletions
diff --git a/src/simd/internal/simd_test/simd_test.go b/src/simd/internal/simd_test/simd_test.go
index e38f7eea01..d00fcf5dd3 100644
--- a/src/simd/internal/simd_test/simd_test.go
+++ b/src/simd/internal/simd_test/simd_test.go
@@ -595,7 +595,7 @@ func TestIsZero(t *testing.T) {
}
}
-func TestSelectFromPairConst(t *testing.T) {
+func TestSelect4FromPairConst(t *testing.T) {
x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
@@ -652,7 +652,7 @@ func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) sim
return x.SelectFromPair(a, b, c, d, y)
}
-func TestSelectFromPairVar(t *testing.T) {
+func TestSelect4FromPairVar(t *testing.T) {
x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3})
y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7})
@@ -704,7 +704,7 @@ func TestSelectFromPairVar(t *testing.T) {
foo(hllh, 4, 0, 1, 5)
}
-func TestSelectFromPairConstGroupedFloat32x8(t *testing.T) {
+func TestSelect4FromPairConstGrouped(t *testing.T) {
x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13})
y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17})
@@ -887,5 +887,119 @@ func TestSelect128FromPairVar(t *testing.T) {
foo(cd, 2, 3)
foo(da, 3, 0)
foo(dc, 3, 2)
+}
+
+func TestSelect2FromPairConst(t *testing.T) {
+ x := simd.LoadUint64x2Slice([]uint64{0, 1})
+ y := simd.LoadUint64x2Slice([]uint64{2, 3})
+
+ ll := x.SelectFromPair(0, 1, y)
+ hh := x.SelectFromPair(3, 2, y)
+ lh := x.SelectFromPair(0, 3, y)
+ hl := x.SelectFromPair(2, 1, y)
+
+ r := make([]uint64, 2, 2)
+
+ foo := func(v simd.Uint64x2, a, b uint64) {
+ v.StoreSlice(r)
+ checkSlices[uint64](t, r, []uint64{a, b})
+ }
+
+ foo(ll, 0, 1)
+ foo(hh, 3, 2)
+ foo(lh, 0, 3)
+ foo(hl, 2, 1)
+}
+
+func TestSelect2FromPairConstGroupedUint(t *testing.T) {
+ x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11})
+ y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13})
+
+ ll := x.SelectFromPairGrouped(0, 1, y)
+ hh := x.SelectFromPairGrouped(3, 2, y)
+ lh := x.SelectFromPairGrouped(0, 3, y)
+ hl := x.SelectFromPairGrouped(2, 1, y)
+
+ r := make([]uint64, 4, 4)
+
+ foo := func(v simd.Uint64x4, a, b uint64) {
+ v.StoreSlice(r)
+ checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10})
+ }
+
+ foo(ll, 0, 1)
+ foo(hh, 3, 2)
+ foo(lh, 0, 3)
+ foo(hl, 2, 1)
+}
+
+func TestSelect2FromPairConstGroupedFloat(t *testing.T) {
+ x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11})
+ y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13})
+
+ ll := x.SelectFromPairGrouped(0, 1, y)
+ hh := x.SelectFromPairGrouped(3, 2, y)
+ lh := x.SelectFromPairGrouped(0, 3, y)
+ hl := x.SelectFromPairGrouped(2, 1, y)
+
+ r := make([]float64, 4, 4)
+
+ foo := func(v simd.Float64x4, a, b float64) {
+ v.StoreSlice(r)
+ checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10})
+ }
+
+ foo(ll, 0, 1)
+ foo(hh, 3, 2)
+ foo(lh, 0, 3)
+ foo(hl, 2, 1)
+}
+
+func TestSelect2FromPairConstGroupedInt(t *testing.T) {
+ x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11})
+ y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13})
+
+ ll := x.SelectFromPairGrouped(0, 1, y)
+ hh := x.SelectFromPairGrouped(3, 2, y)
+ lh := x.SelectFromPairGrouped(0, 3, y)
+ hl := x.SelectFromPairGrouped(2, 1, y)
+
+ r := make([]int64, 4, 4)
+
+ foo := func(v simd.Int64x4, a, b int64) {
+ v.StoreSlice(r)
+ checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10})
+ }
+
+ foo(ll, 0, 1)
+ foo(hh, 3, 2)
+ foo(lh, 0, 3)
+ foo(hl, 2, 1)
+}
+
+func TestSelect2FromPairConstGroupedInt512(t *testing.T) {
+ if !simd.HasAVX512() {
+ t.Skip("Test requires HasAVX512, not available on this hardware")
+ return
+ }
+
+ x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31})
+ y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33})
+
+ ll := x.SelectFromPairGrouped(0, 1, y)
+ hh := x.SelectFromPairGrouped(3, 2, y)
+ lh := x.SelectFromPairGrouped(0, 3, y)
+ hl := x.SelectFromPairGrouped(2, 1, y)
+
+ r := make([]int64, 8, 8)
+
+ foo := func(v simd.Int64x8, a, b int64) {
+ v.StoreSlice(r)
+ checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30})
+ }
+ foo(ll, 0, 1)
+ foo(hh, 3, 2)
+ foo(lh, 0, 3)
+ foo(hl, 2, 1)
}
diff --git a/src/simd/pkginternal_test.go b/src/simd/pkginternal_test.go
index 557a0537b4..632e24d9d9 100644
--- a/src/simd/pkginternal_test.go
+++ b/src/simd/pkginternal_test.go
@@ -99,53 +99,53 @@ func select2x4x32(x Int32x4, a, b, c, d uint8, y Int32x4) Int32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -180,53 +180,53 @@ func select2x8x32Grouped(x Int32x8, a, b, c, d uint8, y Int32x8) Int32x8 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
diff --git a/src/simd/shuffles_amd64.go b/src/simd/shuffles_amd64.go
index 68c840730b..c46a2d06fe 100644
--- a/src/simd/shuffles_amd64.go
+++ b/src/simd/shuffles_amd64.go
@@ -44,6 +44,16 @@ const (
_HHHH // a:y, b:y, c:y, d:y
)
+// These constants represent the source pattern for the four parameters
+// (a, b, c, d) passed to SelectFromPair and SelectFromPairGrouped for
+// two-element vectors.
+const (
+ _LL = iota
+ _HL
+ _LH
+ _HH
+)
+
// SelectFromPair returns the selection of four elements from the two
// vectors x and y, where selector values in the range 0-3 specify
// elements from x and values in the range 4-7 specify the 0-3 elements
@@ -72,53 +82,53 @@ func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -144,53 +154,53 @@ func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -216,53 +226,53 @@ func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstant(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstant(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstant(cscimm(a, a, b, b), x)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstant(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstant(cscimm(a, a, b, b), y)
- return z.concatSelectedConstant(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstant(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstant(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return x.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstant(cscimm(c, c, d, d), y)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstant(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstant(cscimm(c, c, d, d), x)
- return y.concatSelectedConstant(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstant(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstant(cscimm(a, c, b, d), y)
- return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstant(cscimm(b, d, a, c), y)
- return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstant(cscimm(b, c, a, d), y)
- return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstant(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstant(cscimm(a, d, b, c), y)
- return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstant(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -291,53 +301,53 @@ func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -366,53 +376,53 @@ func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -441,53 +451,53 @@ func (x Float32x8) SelectFromPairGrouped(a, b, c, d uint8, y Float32x8) Float32x
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -511,53 +521,53 @@ func (x Int32x16) SelectFromPairGrouped(a, b, c, d uint8, y Int32x16) Int32x16 {
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -581,53 +591,53 @@ func (x Uint32x16) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x16) Uint32x1
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
@@ -651,59 +661,339 @@ func (x Float32x16) SelectFromPairGrouped(a, b, c, d uint8, y Float32x16) Float3
switch pattern {
case _LLLL:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HHHH:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _LLHH:
- return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y)
case _HHLL:
- return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x)
case _HLLL:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _LHLL:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x)
case _HLHH:
- z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LHHH:
- z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y)
- return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y)
+ return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y)
case _LLLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LLHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHLH:
- z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _HHHL:
- z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x)
- return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z)
+ z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x)
+ return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z)
case _LHLH:
- z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z)
case _HLHL:
- z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z)
case _HLLH:
- z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y)
- return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y)
+ return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z)
case _LHHL:
- z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y)
- return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z)
+ z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y)
+ return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z)
}
panic("missing case, switch should be exhaustive")
}
-// cscimm converts the 4 vector element indices into a single
+// cscimm4 converts the 4 vector element indices into a single
// uint8 for use as an immediate.
-func cscimm(a, b, c, d uint8) uint8 {
+func cscimm4(a, b, c, d uint8) uint8 {
return uint8(a + b<<2 + c<<4 + d<<6)
}
+
+// cscimm2 converts the 2 vector element indices into a single
+// uint8 for use as an immediate.
+func cscimm2(a, b uint8) uint8 {
+ return uint8(a + b<<1)
+}
+
+// cscimm2g2 converts the 2 vector element indices into a single
+// uint8 for use as an immediate, but duplicated for VSHUFPD
+// to emulate grouped behavior of VSHUFPS
+func cscimm2g2(a, b uint8) uint8 {
+ g := cscimm2(a, b)
+ return g + g<<2
+}
+
+// cscimm2g4 converts the 2 vector element indices into a single
+// uint8 for use as an immediate, but with four copies for VSHUFPD
+// to emulate grouped behavior of VSHUFPS
+func cscimm2g4(a, b uint8) uint8 {
+ g := cscimm2g2(a, b)
+ return g + g<<4
+}
+
+// SelectFromPair returns the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Uint64x2) SelectFromPair(a, b uint8, y Uint64x2) Uint64x2 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstant(cscimm2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstant(cscimm2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstant(cscimm2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstant(cscimm2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the two 128-bit halves of
+// the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Uint64x4) SelectFromPairGrouped(a, b uint8, y Uint64x4) Uint64x4 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the four 128-bit subvectors
+// of the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX512
+func (x Uint64x8) SelectFromPairGrouped(a, b uint8, y Uint64x8) Uint64x8 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPair returns the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Float64x2) SelectFromPair(a, b uint8, y Float64x2) Float64x2 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstant(cscimm2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstant(cscimm2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstant(cscimm2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstant(cscimm2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the two 128-bit halves of
+// the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Float64x4) SelectFromPairGrouped(a, b uint8, y Float64x4) Float64x4 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the four 128-bit subvectors
+// of the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX512
+func (x Float64x8) SelectFromPairGrouped(a, b uint8, y Float64x8) Float64x8 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPair returns the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Int64x2) SelectFromPair(a, b uint8, y Int64x2) Int64x2 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstant(cscimm2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstant(cscimm2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstant(cscimm2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstant(cscimm2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the two 128-bit halves of
+// the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX
+func (x Int64x4) SelectFromPairGrouped(a, b uint8, y Int64x4) Int64x4 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}
+
+// SelectFromPairGrouped returns, for each of the four 128-bit subvectors
+// of the vectors x and y, the selection of two elements from the two
+// vectors x and y, where selector values in the range 0-1 specify
+// elements from x and values in the range 2-3 specify the 0-1 elements
+// of y. When the selectors are constants the selection can be
+// implemented in a single instruction.
+//
+// If the selectors are not constant this will translate to a function
+// call.
+//
+// Asm: VSHUFPD, CPU Feature: AVX512
+func (x Int64x8) SelectFromPairGrouped(a, b uint8, y Int64x8) Int64x8 {
+ pattern := (a&2)>>1 + (b & 2)
+
+ a, b = a&1, b&1
+
+ switch pattern {
+ case _LL:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ case _HH:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _LH:
+ return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y)
+ case _HL:
+ return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x)
+ }
+ panic("missing case, switch should be exhaustive")
+}