diff options
Diffstat (limited to 'src/simd/shuffles_amd64.go')
| -rw-r--r-- | src/simd/shuffles_amd64.go | 798 |
1 files changed, 544 insertions, 254 deletions
diff --git a/src/simd/shuffles_amd64.go b/src/simd/shuffles_amd64.go index 68c840730b..c46a2d06fe 100644 --- a/src/simd/shuffles_amd64.go +++ b/src/simd/shuffles_amd64.go @@ -44,6 +44,16 @@ const ( _HHHH // a:y, b:y, c:y, d:y ) +// These constants represent the source pattern for the four parameters +// (a, b, c, d) passed to SelectFromPair and SelectFromPairGrouped for +// two-element vectors. +const ( + _LL = iota + _HL + _LH + _HH +) + // SelectFromPair returns the selection of four elements from the two // vectors x and y, where selector values in the range 0-3 specify // elements from x and values in the range 4-7 specify the 0-3 elements @@ -72,53 +82,53 @@ func (x Int32x4) SelectFromPair(a, b, c, d uint8, y Int32x4) Int32x4 { switch pattern { case _LLLL: - return x.concatSelectedConstant(cscimm(a, b, c, d), x) + return x.concatSelectedConstant(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstant(cscimm(a, b, c, d), y) + return y.concatSelectedConstant(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstant(cscimm(a, b, c, d), y) + return x.concatSelectedConstant(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstant(cscimm(a, b, c, d), x) + return y.concatSelectedConstant(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstant(cscimm(a, a, b, b), x) - return z.concatSelectedConstant(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstant(cscimm4(a, a, b, b), x) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstant(cscimm(a, a, b, b), y) - return z.concatSelectedConstant(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstant(cscimm4(a, a, b, b), y) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstant(cscimm(a, a, b, b), x) - return z.concatSelectedConstant(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstant(cscimm4(a, a, b, b), x) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstant(cscimm(a, a, b, b), y) - return z.concatSelectedConstant(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstant(cscimm4(a, a, b, b), y) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstant(cscimm(c, c, d, d), y) - return x.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstant(cscimm4(c, c, d, d), y) + return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstant(cscimm(c, c, d, d), x) - return x.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstant(cscimm4(c, c, d, d), x) + return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstant(cscimm(c, c, d, d), y) - return y.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstant(cscimm4(c, c, d, d), y) + return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstant(cscimm(c, c, d, d), x) - return y.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstant(cscimm4(c, c, d, d), x) + return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstant(cscimm(a, c, b, d), y) - return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstant(cscimm4(a, c, b, d), y) + return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstant(cscimm(b, d, a, c), y) - return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstant(cscimm4(b, d, a, c), y) + return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstant(cscimm(b, c, a, d), y) - return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstant(cscimm4(b, c, a, d), y) + return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstant(cscimm(a, d, b, c), y) - return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstant(cscimm4(a, d, b, c), y) + return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -144,53 +154,53 @@ func (x Uint32x4) SelectFromPair(a, b, c, d uint8, y Uint32x4) Uint32x4 { switch pattern { case _LLLL: - return x.concatSelectedConstant(cscimm(a, b, c, d), x) + return x.concatSelectedConstant(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstant(cscimm(a, b, c, d), y) + return y.concatSelectedConstant(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstant(cscimm(a, b, c, d), y) + return x.concatSelectedConstant(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstant(cscimm(a, b, c, d), x) + return y.concatSelectedConstant(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstant(cscimm(a, a, b, b), x) - return z.concatSelectedConstant(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstant(cscimm4(a, a, b, b), x) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstant(cscimm(a, a, b, b), y) - return z.concatSelectedConstant(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstant(cscimm4(a, a, b, b), y) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstant(cscimm(a, a, b, b), x) - return z.concatSelectedConstant(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstant(cscimm4(a, a, b, b), x) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstant(cscimm(a, a, b, b), y) - return z.concatSelectedConstant(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstant(cscimm4(a, a, b, b), y) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstant(cscimm(c, c, d, d), y) - return x.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstant(cscimm4(c, c, d, d), y) + return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstant(cscimm(c, c, d, d), x) - return x.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstant(cscimm4(c, c, d, d), x) + return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstant(cscimm(c, c, d, d), y) - return y.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstant(cscimm4(c, c, d, d), y) + return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstant(cscimm(c, c, d, d), x) - return y.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstant(cscimm4(c, c, d, d), x) + return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstant(cscimm(a, c, b, d), y) - return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstant(cscimm4(a, c, b, d), y) + return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstant(cscimm(b, d, a, c), y) - return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstant(cscimm4(b, d, a, c), y) + return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstant(cscimm(b, c, a, d), y) - return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstant(cscimm4(b, c, a, d), y) + return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstant(cscimm(a, d, b, c), y) - return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstant(cscimm4(a, d, b, c), y) + return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -216,53 +226,53 @@ func (x Float32x4) SelectFromPair(a, b, c, d uint8, y Float32x4) Float32x4 { switch pattern { case _LLLL: - return x.concatSelectedConstant(cscimm(a, b, c, d), x) + return x.concatSelectedConstant(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstant(cscimm(a, b, c, d), y) + return y.concatSelectedConstant(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstant(cscimm(a, b, c, d), y) + return x.concatSelectedConstant(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstant(cscimm(a, b, c, d), x) + return y.concatSelectedConstant(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstant(cscimm(a, a, b, b), x) - return z.concatSelectedConstant(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstant(cscimm4(a, a, b, b), x) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstant(cscimm(a, a, b, b), y) - return z.concatSelectedConstant(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstant(cscimm4(a, a, b, b), y) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstant(cscimm(a, a, b, b), x) - return z.concatSelectedConstant(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstant(cscimm4(a, a, b, b), x) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstant(cscimm(a, a, b, b), y) - return z.concatSelectedConstant(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstant(cscimm4(a, a, b, b), y) + return z.concatSelectedConstant(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstant(cscimm(c, c, d, d), y) - return x.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstant(cscimm4(c, c, d, d), y) + return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstant(cscimm(c, c, d, d), x) - return x.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstant(cscimm4(c, c, d, d), x) + return x.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstant(cscimm(c, c, d, d), y) - return y.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstant(cscimm4(c, c, d, d), y) + return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstant(cscimm(c, c, d, d), x) - return y.concatSelectedConstant(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstant(cscimm4(c, c, d, d), x) + return y.concatSelectedConstant(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstant(cscimm(a, c, b, d), y) - return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstant(cscimm4(a, c, b, d), y) + return z.concatSelectedConstant(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstant(cscimm(b, d, a, c), y) - return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstant(cscimm4(b, d, a, c), y) + return z.concatSelectedConstant(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstant(cscimm(b, c, a, d), y) - return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstant(cscimm4(b, c, a, d), y) + return z.concatSelectedConstant(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstant(cscimm(a, d, b, c), y) - return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstant(cscimm4(a, d, b, c), y) + return z.concatSelectedConstant(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -291,53 +301,53 @@ func (x Int32x8) SelectFromPairGrouped(a, b, c, d uint8, y Int32x8) Int32x8 { switch pattern { case _LLLL: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y) - return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y) + return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y) - return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y) + return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y) - return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y) + return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y) - return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y) + return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -366,53 +376,53 @@ func (x Uint32x8) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x8) Uint32x8 { switch pattern { case _LLLL: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y) - return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y) + return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y) - return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y) + return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y) - return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y) + return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y) - return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y) + return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -441,53 +451,53 @@ func (x Float32x8) SelectFromPairGrouped(a, b, c, d uint8, y Float32x8) Float32x switch pattern { case _LLLL: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y) - return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y) + return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y) - return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y) + return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y) - return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y) + return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y) - return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y) + return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -511,53 +521,53 @@ func (x Int32x16) SelectFromPairGrouped(a, b, c, d uint8, y Int32x16) Int32x16 { switch pattern { case _LLLL: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y) - return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y) + return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y) - return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y) + return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y) - return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y) + return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y) - return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y) + return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -581,53 +591,53 @@ func (x Uint32x16) SelectFromPairGrouped(a, b, c, d uint8, y Uint32x16) Uint32x1 switch pattern { case _LLLL: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y) - return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y) + return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y) - return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y) + return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y) - return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y) + return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y) - return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y) + return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } @@ -651,59 +661,339 @@ func (x Float32x16) SelectFromPairGrouped(a, b, c, d uint8, y Float32x16) Float3 switch pattern { case _LLLL: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HHHH: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _LLHH: - return x.concatSelectedConstantGrouped(cscimm(a, b, c, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, c, d), y) case _HHLL: - return y.concatSelectedConstantGrouped(cscimm(a, b, c, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, c, d), x) case _HLLL: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _LHLL: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), x) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), x) case _HLHH: - z := y.concatSelectedConstantGrouped(cscimm(a, a, b, b), x) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := y.concatSelectedConstantGrouped(cscimm4(a, a, b, b), x) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LHHH: - z := x.concatSelectedConstantGrouped(cscimm(a, a, b, b), y) - return z.concatSelectedConstantGrouped(cscimm(0, 2, c, d), y) + z := x.concatSelectedConstantGrouped(cscimm4(a, a, b, b), y) + return z.concatSelectedConstantGrouped(cscimm4(0, 2, c, d), y) case _LLLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LLHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return x.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return x.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHLH: - z := x.concatSelectedConstantGrouped(cscimm(c, c, d, d), y) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := x.concatSelectedConstantGrouped(cscimm4(c, c, d, d), y) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _HHHL: - z := y.concatSelectedConstantGrouped(cscimm(c, c, d, d), x) - return y.concatSelectedConstantGrouped(cscimm(a, b, 0, 2), z) + z := y.concatSelectedConstantGrouped(cscimm4(c, c, d, d), x) + return y.concatSelectedConstantGrouped(cscimm4(a, b, 0, 2), z) case _LHLH: - z := x.concatSelectedConstantGrouped(cscimm(a, c, b, d), y) - return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm(0, 2, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, c, b, d), y) + return z.concatSelectedConstantGrouped(0b11_01_10_00 /* =cscimm4(0, 2, 1, 3) */, z) case _HLHL: - z := x.concatSelectedConstantGrouped(cscimm(b, d, a, c), y) - return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm(2, 0, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, d, a, c), y) + return z.concatSelectedConstantGrouped(0b01_11_00_10 /* =cscimm4(2, 0, 3, 1) */, z) case _HLLH: - z := x.concatSelectedConstantGrouped(cscimm(b, c, a, d), y) - return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm(2, 0, 1, 3) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(b, c, a, d), y) + return z.concatSelectedConstantGrouped(0b11_01_00_10 /* =cscimm4(2, 0, 1, 3) */, z) case _LHHL: - z := x.concatSelectedConstantGrouped(cscimm(a, d, b, c), y) - return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm(0, 2, 3, 1) */, z) + z := x.concatSelectedConstantGrouped(cscimm4(a, d, b, c), y) + return z.concatSelectedConstantGrouped(0b01_11_10_00 /* =cscimm4(0, 2, 3, 1) */, z) } panic("missing case, switch should be exhaustive") } -// cscimm converts the 4 vector element indices into a single +// cscimm4 converts the 4 vector element indices into a single // uint8 for use as an immediate. -func cscimm(a, b, c, d uint8) uint8 { +func cscimm4(a, b, c, d uint8) uint8 { return uint8(a + b<<2 + c<<4 + d<<6) } + +// cscimm2 converts the 2 vector element indices into a single +// uint8 for use as an immediate. +func cscimm2(a, b uint8) uint8 { + return uint8(a + b<<1) +} + +// cscimm2g2 converts the 2 vector element indices into a single +// uint8 for use as an immediate, but duplicated for VSHUFPD +// to emulate grouped behavior of VSHUFPS +func cscimm2g2(a, b uint8) uint8 { + g := cscimm2(a, b) + return g + g<<2 +} + +// cscimm2g4 converts the 2 vector element indices into a single +// uint8 for use as an immediate, but with four copies for VSHUFPD +// to emulate grouped behavior of VSHUFPS +func cscimm2g4(a, b uint8) uint8 { + g := cscimm2g2(a, b) + return g + g<<4 +} + +// SelectFromPair returns the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX +func (x Uint64x2) SelectFromPair(a, b uint8, y Uint64x2) Uint64x2 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstant(cscimm2(a, b), x) + case _HH: + return y.concatSelectedConstant(cscimm2(a, b), y) + case _LH: + return x.concatSelectedConstant(cscimm2(a, b), y) + case _HL: + return y.concatSelectedConstant(cscimm2(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPairGrouped returns, for each of the two 128-bit halves of +// the vectors x and y, the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX +func (x Uint64x4) SelectFromPairGrouped(a, b uint8, y Uint64x4) Uint64x4 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x) + case _HH: + return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y) + case _LH: + return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y) + case _HL: + return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPairGrouped returns, for each of the four 128-bit subvectors +// of the vectors x and y, the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX512 +func (x Uint64x8) SelectFromPairGrouped(a, b uint8, y Uint64x8) Uint64x8 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x) + case _HH: + return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y) + case _LH: + return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y) + case _HL: + return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPair returns the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX +func (x Float64x2) SelectFromPair(a, b uint8, y Float64x2) Float64x2 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstant(cscimm2(a, b), x) + case _HH: + return y.concatSelectedConstant(cscimm2(a, b), y) + case _LH: + return x.concatSelectedConstant(cscimm2(a, b), y) + case _HL: + return y.concatSelectedConstant(cscimm2(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPairGrouped returns, for each of the two 128-bit halves of +// the vectors x and y, the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX +func (x Float64x4) SelectFromPairGrouped(a, b uint8, y Float64x4) Float64x4 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x) + case _HH: + return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y) + case _LH: + return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y) + case _HL: + return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPairGrouped returns, for each of the four 128-bit subvectors +// of the vectors x and y, the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX512 +func (x Float64x8) SelectFromPairGrouped(a, b uint8, y Float64x8) Float64x8 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x) + case _HH: + return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y) + case _LH: + return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y) + case _HL: + return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPair returns the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX +func (x Int64x2) SelectFromPair(a, b uint8, y Int64x2) Int64x2 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstant(cscimm2(a, b), x) + case _HH: + return y.concatSelectedConstant(cscimm2(a, b), y) + case _LH: + return x.concatSelectedConstant(cscimm2(a, b), y) + case _HL: + return y.concatSelectedConstant(cscimm2(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPairGrouped returns, for each of the two 128-bit halves of +// the vectors x and y, the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX +func (x Int64x4) SelectFromPairGrouped(a, b uint8, y Int64x4) Int64x4 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstantGrouped(cscimm2g2(a, b), x) + case _HH: + return y.concatSelectedConstantGrouped(cscimm2g2(a, b), y) + case _LH: + return x.concatSelectedConstantGrouped(cscimm2g2(a, b), y) + case _HL: + return y.concatSelectedConstantGrouped(cscimm2g2(a, b), x) + } + panic("missing case, switch should be exhaustive") +} + +// SelectFromPairGrouped returns, for each of the four 128-bit subvectors +// of the vectors x and y, the selection of two elements from the two +// vectors x and y, where selector values in the range 0-1 specify +// elements from x and values in the range 2-3 specify the 0-1 elements +// of y. When the selectors are constants the selection can be +// implemented in a single instruction. +// +// If the selectors are not constant this will translate to a function +// call. +// +// Asm: VSHUFPD, CPU Feature: AVX512 +func (x Int64x8) SelectFromPairGrouped(a, b uint8, y Int64x8) Int64x8 { + pattern := (a&2)>>1 + (b & 2) + + a, b = a&1, b&1 + + switch pattern { + case _LL: + return x.concatSelectedConstantGrouped(cscimm2g4(a, b), x) + case _HH: + return y.concatSelectedConstantGrouped(cscimm2g4(a, b), y) + case _LH: + return x.concatSelectedConstantGrouped(cscimm2g4(a, b), y) + case _HL: + return y.concatSelectedConstantGrouped(cscimm2g4(a, b), x) + } + panic("missing case, switch should be exhaustive") +} |
