diff options
Diffstat (limited to 'src/simd/ops_amd64.go')
| -rw-r--r-- | src/simd/ops_amd64.go | 8481 |
1 files changed, 8481 insertions, 0 deletions
diff --git a/src/simd/ops_amd64.go b/src/simd/ops_amd64.go new file mode 100644 index 0000000000..38d984622d --- /dev/null +++ b/src/simd/ops_amd64.go @@ -0,0 +1,8481 @@ +// Code generated by x/arch/internal/simdgen using 'go run . -xedPath $XED_PATH -o godefs -goroot $GOROOT go.yaml types.yaml categories.yaml'; DO NOT EDIT. + +//go:build goexperiment.simd + +package simd + +/* AESDecryptLastRound */ + +// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +// +// Asm: VAESDECLAST, CPU Feature: AVX, AES +func (x Uint8x16) AESDecryptLastRound(y Uint32x4) Uint8x16 + +// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +// +// Asm: VAESDECLAST, CPU Feature: AVX512VAES +func (x Uint8x32) AESDecryptLastRound(y Uint32x8) Uint8x32 + +// AESDecryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvShiftRows(InvSubBytes(x)), y) +// +// Asm: VAESDECLAST, CPU Feature: AVX512VAES +func (x Uint8x64) AESDecryptLastRound(y Uint32x16) Uint8x64 + +/* AESDecryptOneRound */ + +// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +// +// Asm: VAESDEC, CPU Feature: AVX, AES +func (x Uint8x16) AESDecryptOneRound(y Uint32x4) Uint8x16 + +// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +// +// Asm: VAESDEC, CPU Feature: AVX512VAES +func (x Uint8x32) AESDecryptOneRound(y Uint32x8) Uint8x32 + +// AESDecryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of dw array in use. +// result = AddRoundKey(InvMixColumns(InvShiftRows(InvSubBytes(x))), y) +// +// Asm: VAESDEC, CPU Feature: AVX512VAES +func (x Uint8x64) AESDecryptOneRound(y Uint32x16) Uint8x64 + +/* AESEncryptLastRound */ + +// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey((ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENCLAST, CPU Feature: AVX, AES +func (x Uint8x16) AESEncryptLastRound(y Uint32x4) Uint8x16 + +// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey((ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENCLAST, CPU Feature: AVX512VAES +func (x Uint8x32) AESEncryptLastRound(y Uint32x8) Uint8x32 + +// AESEncryptLastRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey((ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENCLAST, CPU Feature: AVX512VAES +func (x Uint8x64) AESEncryptLastRound(y Uint32x16) Uint8x64 + +/* AESEncryptOneRound */ + +// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENC, CPU Feature: AVX, AES +func (x Uint8x16) AESEncryptOneRound(y Uint32x4) Uint8x16 + +// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENC, CPU Feature: AVX512VAES +func (x Uint8x32) AESEncryptOneRound(y Uint32x8) Uint8x32 + +// AESEncryptOneRound performs a series of operations in AES cipher algorithm defined in FIPS 197. +// x is the state array, starting from low index to high are s00, s10, s20, s30, s01, ..., s33. +// y is the chunk of w array in use. +// result = AddRoundKey(MixColumns(ShiftRows(SubBytes(x))), y) +// +// Asm: VAESENC, CPU Feature: AVX512VAES +func (x Uint8x64) AESEncryptOneRound(y Uint32x16) Uint8x64 + +/* AESInvMixColumns */ + +// AESInvMixColumns performs the InvMixColumns operation in AES cipher algorithm defined in FIPS 197. +// x is the chunk of w array in use. +// result = InvMixColumns(x) +// +// Asm: VAESIMC, CPU Feature: AVX, AES +func (x Uint32x4) AESInvMixColumns() Uint32x4 + +/* AESRoundKeyGenAssist */ + +// AESRoundKeyGenAssist performs some components of KeyExpansion in AES cipher algorithm defined in FIPS 197. +// x is an array of AES words, but only x[0] and x[2] are used. +// r is a value from the Rcon constant array. +// result[0] = XOR(SubWord(RotWord(x[0])), r) +// result[1] = SubWord(x[1]) +// result[2] = XOR(SubWord(RotWord(x[2])), r) +// result[3] = SubWord(x[3]) +// +// rconVal results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VAESKEYGENASSIST, CPU Feature: AVX, AES +func (x Uint32x4) AESRoundKeyGenAssist(rconVal uint8) Uint32x4 + +/* Abs */ + +// Abs computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX +func (x Int8x16) Abs() Int8x16 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX2 +func (x Int8x32) Abs() Int8x32 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSB, CPU Feature: AVX512 +func (x Int8x64) Abs() Int8x64 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX +func (x Int16x8) Abs() Int16x8 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX2 +func (x Int16x16) Abs() Int16x16 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSW, CPU Feature: AVX512 +func (x Int16x32) Abs() Int16x32 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX +func (x Int32x4) Abs() Int32x4 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX2 +func (x Int32x8) Abs() Int32x8 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSD, CPU Feature: AVX512 +func (x Int32x16) Abs() Int32x16 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512 +func (x Int64x2) Abs() Int64x2 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512 +func (x Int64x4) Abs() Int64x4 + +// Abs computes the absolute value of each element. +// +// Asm: VPABSQ, CPU Feature: AVX512 +func (x Int64x8) Abs() Int64x8 + +/* Add */ + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX +func (x Float32x4) Add(y Float32x4) Float32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX +func (x Float32x8) Add(y Float32x8) Float32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPS, CPU Feature: AVX512 +func (x Float32x16) Add(y Float32x16) Float32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX +func (x Float64x2) Add(y Float64x2) Float64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX +func (x Float64x4) Add(y Float64x4) Float64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VADDPD, CPU Feature: AVX512 +func (x Float64x8) Add(y Float64x8) Float64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX +func (x Int8x16) Add(y Int8x16) Int8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX2 +func (x Int8x32) Add(y Int8x32) Int8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512 +func (x Int8x64) Add(y Int8x64) Int8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX +func (x Int16x8) Add(y Int16x8) Int16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX2 +func (x Int16x16) Add(y Int16x16) Int16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512 +func (x Int16x32) Add(y Int16x32) Int16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX +func (x Int32x4) Add(y Int32x4) Int32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX2 +func (x Int32x8) Add(y Int32x8) Int32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512 +func (x Int32x16) Add(y Int32x16) Int32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX +func (x Int64x2) Add(y Int64x2) Int64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX2 +func (x Int64x4) Add(y Int64x4) Int64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512 +func (x Int64x8) Add(y Int64x8) Int64x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX +func (x Uint8x16) Add(y Uint8x16) Uint8x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX2 +func (x Uint8x32) Add(y Uint8x32) Uint8x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDB, CPU Feature: AVX512 +func (x Uint8x64) Add(y Uint8x64) Uint8x64 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX +func (x Uint16x8) Add(y Uint16x8) Uint16x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX2 +func (x Uint16x16) Add(y Uint16x16) Uint16x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDW, CPU Feature: AVX512 +func (x Uint16x32) Add(y Uint16x32) Uint16x32 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX +func (x Uint32x4) Add(y Uint32x4) Uint32x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX2 +func (x Uint32x8) Add(y Uint32x8) Uint32x8 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDD, CPU Feature: AVX512 +func (x Uint32x16) Add(y Uint32x16) Uint32x16 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX +func (x Uint64x2) Add(y Uint64x2) Uint64x2 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX2 +func (x Uint64x4) Add(y Uint64x4) Uint64x4 + +// Add adds corresponding elements of two vectors. +// +// Asm: VPADDQ, CPU Feature: AVX512 +func (x Uint64x8) Add(y Uint64x8) Uint64x8 + +/* AddPairs */ + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPS, CPU Feature: AVX +func (x Float32x4) AddPairs(y Float32x4) Float32x4 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPS, CPU Feature: AVX +func (x Float32x8) AddPairs(y Float32x8) Float32x8 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPD, CPU Feature: AVX +func (x Float64x2) AddPairs(y Float64x2) Float64x2 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VHADDPD, CPU Feature: AVX +func (x Float64x4) AddPairs(y Float64x4) Float64x4 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX +func (x Int16x8) AddPairs(y Int16x8) Int16x8 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX2 +func (x Int16x16) AddPairs(y Int16x16) Int16x16 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX +func (x Int32x4) AddPairs(y Int32x4) Int32x4 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX2 +func (x Int32x8) AddPairs(y Int32x8) Int32x8 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX +func (x Uint16x8) AddPairs(y Uint16x8) Uint16x8 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDW, CPU Feature: AVX2 +func (x Uint16x16) AddPairs(y Uint16x16) Uint16x16 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX +func (x Uint32x4) AddPairs(y Uint32x4) Uint32x4 + +// AddPairs horizontally adds adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDD, CPU Feature: AVX2 +func (x Uint32x8) AddPairs(y Uint32x8) Uint32x8 + +/* AddPairsSaturated */ + +// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDSW, CPU Feature: AVX +func (x Int16x8) AddPairsSaturated(y Int16x8) Int16x8 + +// AddPairsSaturated horizontally adds adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0+y1, y2+y3, ..., x0+x1, x2+x3, ...]. +// +// Asm: VPHADDSW, CPU Feature: AVX2 +func (x Int16x16) AddPairsSaturated(y Int16x16) Int16x16 + +/* AddSaturated */ + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX +func (x Int8x16) AddSaturated(y Int8x16) Int8x16 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX2 +func (x Int8x32) AddSaturated(y Int8x32) Int8x32 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSB, CPU Feature: AVX512 +func (x Int8x64) AddSaturated(y Int8x64) Int8x64 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX +func (x Int16x8) AddSaturated(y Int16x8) Int16x8 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX2 +func (x Int16x16) AddSaturated(y Int16x16) Int16x16 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDSW, CPU Feature: AVX512 +func (x Int16x32) AddSaturated(y Int16x32) Int16x32 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDUSB, CPU Feature: AVX +func (x Uint8x16) AddSaturated(y Uint8x16) Uint8x16 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDUSB, CPU Feature: AVX2 +func (x Uint8x32) AddSaturated(y Uint8x32) Uint8x32 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDUSB, CPU Feature: AVX512 +func (x Uint8x64) AddSaturated(y Uint8x64) Uint8x64 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDUSW, CPU Feature: AVX +func (x Uint16x8) AddSaturated(y Uint16x8) Uint16x8 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDUSW, CPU Feature: AVX2 +func (x Uint16x16) AddSaturated(y Uint16x16) Uint16x16 + +// AddSaturated adds corresponding elements of two vectors with saturation. +// +// Asm: VPADDUSW, CPU Feature: AVX512 +func (x Uint16x32) AddSaturated(y Uint16x32) Uint16x32 + +/* AddSub */ + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPS, CPU Feature: AVX +func (x Float32x4) AddSub(y Float32x4) Float32x4 + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPS, CPU Feature: AVX +func (x Float32x8) AddSub(y Float32x8) Float32x8 + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPD, CPU Feature: AVX +func (x Float64x2) AddSub(y Float64x2) Float64x2 + +// AddSub subtracts even elements and adds odd elements of two vectors. +// +// Asm: VADDSUBPD, CPU Feature: AVX +func (x Float64x4) AddSub(y Float64x4) Float64x4 + +/* And */ + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int8x16) And(y Int8x16) Int8x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int8x32) And(y Int8x32) Int8x32 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512 +func (x Int8x64) And(y Int8x64) Int8x64 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int16x8) And(y Int16x8) Int16x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int16x16) And(y Int16x16) Int16x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512 +func (x Int16x32) And(y Int16x32) Int16x32 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int32x4) And(y Int32x4) Int32x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int32x8) And(y Int32x8) Int32x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512 +func (x Int32x16) And(y Int32x16) Int32x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Int64x2) And(y Int64x2) Int64x2 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Int64x4) And(y Int64x4) Int64x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512 +func (x Int64x8) And(y Int64x8) Int64x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint8x16) And(y Uint8x16) Uint8x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint8x32) And(y Uint8x32) Uint8x32 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512 +func (x Uint8x64) And(y Uint8x64) Uint8x64 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint16x8) And(y Uint16x8) Uint16x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint16x16) And(y Uint16x16) Uint16x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512 +func (x Uint16x32) And(y Uint16x32) Uint16x32 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint32x4) And(y Uint32x4) Uint32x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint32x8) And(y Uint32x8) Uint32x8 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDD, CPU Feature: AVX512 +func (x Uint32x16) And(y Uint32x16) Uint32x16 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX +func (x Uint64x2) And(y Uint64x2) Uint64x2 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPAND, CPU Feature: AVX2 +func (x Uint64x4) And(y Uint64x4) Uint64x4 + +// And performs a bitwise AND operation between two vectors. +// +// Asm: VPANDQ, CPU Feature: AVX512 +func (x Uint64x8) And(y Uint64x8) Uint64x8 + +/* AndNot */ + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int8x16) AndNot(y Int8x16) Int8x16 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int8x32) AndNot(y Int8x32) Int8x32 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDND, CPU Feature: AVX512 +func (x Int8x64) AndNot(y Int8x64) Int8x64 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int16x8) AndNot(y Int16x8) Int16x8 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int16x16) AndNot(y Int16x16) Int16x16 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDND, CPU Feature: AVX512 +func (x Int16x32) AndNot(y Int16x32) Int16x32 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int32x4) AndNot(y Int32x4) Int32x4 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int32x8) AndNot(y Int32x8) Int32x8 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDND, CPU Feature: AVX512 +func (x Int32x16) AndNot(y Int32x16) Int32x16 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Int64x2) AndNot(y Int64x2) Int64x2 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Int64x4) AndNot(y Int64x4) Int64x4 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDNQ, CPU Feature: AVX512 +func (x Int64x8) AndNot(y Int64x8) Int64x8 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint8x16) AndNot(y Uint8x16) Uint8x16 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint8x32) AndNot(y Uint8x32) Uint8x32 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDND, CPU Feature: AVX512 +func (x Uint8x64) AndNot(y Uint8x64) Uint8x64 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint16x8) AndNot(y Uint16x8) Uint16x8 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint16x16) AndNot(y Uint16x16) Uint16x16 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDND, CPU Feature: AVX512 +func (x Uint16x32) AndNot(y Uint16x32) Uint16x32 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint32x4) AndNot(y Uint32x4) Uint32x4 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint32x8) AndNot(y Uint32x8) Uint32x8 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDND, CPU Feature: AVX512 +func (x Uint32x16) AndNot(y Uint32x16) Uint32x16 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX +func (x Uint64x2) AndNot(y Uint64x2) Uint64x2 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDN, CPU Feature: AVX2 +func (x Uint64x4) AndNot(y Uint64x4) Uint64x4 + +// AndNot performs a bitwise x &^ y. +// +// Asm: VPANDNQ, CPU Feature: AVX512 +func (x Uint64x8) AndNot(y Uint64x8) Uint64x8 + +/* Average */ + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX +func (x Uint8x16) Average(y Uint8x16) Uint8x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX2 +func (x Uint8x32) Average(y Uint8x32) Uint8x32 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGB, CPU Feature: AVX512 +func (x Uint8x64) Average(y Uint8x64) Uint8x64 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX +func (x Uint16x8) Average(y Uint16x8) Uint16x8 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX2 +func (x Uint16x16) Average(y Uint16x16) Uint16x16 + +// Average computes the rounded average of corresponding elements. +// +// Asm: VPAVGW, CPU Feature: AVX512 +func (x Uint16x32) Average(y Uint16x32) Uint16x32 + +/* Broadcast128 */ + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VBROADCASTSS, CPU Feature: AVX2 +func (x Float32x4) Broadcast128() Float32x4 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Float64x2) Broadcast128() Float64x2 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Int8x16) Broadcast128() Int8x16 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX2 +func (x Int16x8) Broadcast128() Int16x8 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTD, CPU Feature: AVX2 +func (x Int32x4) Broadcast128() Int32x4 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Int64x2) Broadcast128() Int64x2 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Uint8x16) Broadcast128() Uint8x16 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX2 +func (x Uint16x8) Broadcast128() Uint16x8 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTD, CPU Feature: AVX2 +func (x Uint32x4) Broadcast128() Uint32x4 + +// Broadcast128 copies element zero of its (128-bit) input to all elements of +// the 128-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Uint64x2) Broadcast128() Uint64x2 + +/* Broadcast256 */ + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VBROADCASTSS, CPU Feature: AVX2 +func (x Float32x4) Broadcast256() Float32x8 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VBROADCASTSD, CPU Feature: AVX2 +func (x Float64x2) Broadcast256() Float64x4 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Int8x16) Broadcast256() Int8x32 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX2 +func (x Int16x8) Broadcast256() Int16x16 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTD, CPU Feature: AVX2 +func (x Int32x4) Broadcast256() Int32x8 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Int64x2) Broadcast256() Int64x4 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX2 +func (x Uint8x16) Broadcast256() Uint8x32 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX2 +func (x Uint16x8) Broadcast256() Uint16x16 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTD, CPU Feature: AVX2 +func (x Uint32x4) Broadcast256() Uint32x8 + +// Broadcast256 copies element zero of its (128-bit) input to all elements of +// the 256-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX2 +func (x Uint64x2) Broadcast256() Uint64x4 + +/* Broadcast512 */ + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VBROADCASTSS, CPU Feature: AVX512 +func (x Float32x4) Broadcast512() Float32x16 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VBROADCASTSD, CPU Feature: AVX512 +func (x Float64x2) Broadcast512() Float64x8 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX512 +func (x Int8x16) Broadcast512() Int8x64 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX512 +func (x Int16x8) Broadcast512() Int16x32 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTD, CPU Feature: AVX512 +func (x Int32x4) Broadcast512() Int32x16 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX512 +func (x Int64x2) Broadcast512() Int64x8 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTB, CPU Feature: AVX512 +func (x Uint8x16) Broadcast512() Uint8x64 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTW, CPU Feature: AVX512 +func (x Uint16x8) Broadcast512() Uint16x32 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTD, CPU Feature: AVX512 +func (x Uint32x4) Broadcast512() Uint32x16 + +// Broadcast512 copies element zero of its (128-bit) input to all elements of +// the 512-bit output vector. +// +// Asm: VPBROADCASTQ, CPU Feature: AVX512 +func (x Uint64x2) Broadcast512() Uint64x8 + +/* Ceil */ + +// Ceil rounds elements up to the nearest integer. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Ceil() Float32x4 + +// Ceil rounds elements up to the nearest integer. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Ceil() Float32x8 + +// Ceil rounds elements up to the nearest integer. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Ceil() Float64x2 + +// Ceil rounds elements up to the nearest integer. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Ceil() Float64x4 + +/* CeilScaled */ + +// CeilScaled rounds elements up with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x4) CeilScaled(prec uint8) Float32x4 + +// CeilScaled rounds elements up with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x8) CeilScaled(prec uint8) Float32x8 + +// CeilScaled rounds elements up with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x16) CeilScaled(prec uint8) Float32x16 + +// CeilScaled rounds elements up with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x2) CeilScaled(prec uint8) Float64x2 + +// CeilScaled rounds elements up with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x4) CeilScaled(prec uint8) Float64x4 + +// CeilScaled rounds elements up with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x8) CeilScaled(prec uint8) Float64x8 + +/* CeilScaledResidue */ + +// CeilScaledResidue computes the difference after ceiling with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x4) CeilScaledResidue(prec uint8) Float32x4 + +// CeilScaledResidue computes the difference after ceiling with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x8) CeilScaledResidue(prec uint8) Float32x8 + +// CeilScaledResidue computes the difference after ceiling with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x16) CeilScaledResidue(prec uint8) Float32x16 + +// CeilScaledResidue computes the difference after ceiling with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x2) CeilScaledResidue(prec uint8) Float64x2 + +// CeilScaledResidue computes the difference after ceiling with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x4) CeilScaledResidue(prec uint8) Float64x4 + +// CeilScaledResidue computes the difference after ceiling with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x8) CeilScaledResidue(prec uint8) Float64x8 + +/* Compress */ + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPS, CPU Feature: AVX512 +func (x Float32x4) Compress(mask Mask32x4) Float32x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPS, CPU Feature: AVX512 +func (x Float32x8) Compress(mask Mask32x8) Float32x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPS, CPU Feature: AVX512 +func (x Float32x16) Compress(mask Mask32x16) Float32x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPD, CPU Feature: AVX512 +func (x Float64x2) Compress(mask Mask64x2) Float64x2 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPD, CPU Feature: AVX512 +func (x Float64x4) Compress(mask Mask64x4) Float64x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VCOMPRESSPD, CPU Feature: AVX512 +func (x Float64x8) Compress(mask Mask64x8) Float64x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Int8x16) Compress(mask Mask8x16) Int8x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Int8x32) Compress(mask Mask8x32) Int8x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Int8x64) Compress(mask Mask8x64) Int8x64 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Int16x8) Compress(mask Mask16x8) Int16x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Int16x16) Compress(mask Mask16x16) Int16x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Int16x32) Compress(mask Mask16x32) Int16x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512 +func (x Int32x4) Compress(mask Mask32x4) Int32x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512 +func (x Int32x8) Compress(mask Mask32x8) Int32x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512 +func (x Int32x16) Compress(mask Mask32x16) Int32x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512 +func (x Int64x2) Compress(mask Mask64x2) Int64x2 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512 +func (x Int64x4) Compress(mask Mask64x4) Int64x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512 +func (x Int64x8) Compress(mask Mask64x8) Int64x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Uint8x16) Compress(mask Mask8x16) Uint8x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Uint8x32) Compress(mask Mask8x32) Uint8x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSB, CPU Feature: AVX512VBMI2 +func (x Uint8x64) Compress(mask Mask8x64) Uint8x64 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) Compress(mask Mask16x8) Uint16x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) Compress(mask Mask16x16) Uint16x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) Compress(mask Mask16x32) Uint16x32 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512 +func (x Uint32x4) Compress(mask Mask32x4) Uint32x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512 +func (x Uint32x8) Compress(mask Mask32x8) Uint32x8 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSD, CPU Feature: AVX512 +func (x Uint32x16) Compress(mask Mask32x16) Uint32x16 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512 +func (x Uint64x2) Compress(mask Mask64x2) Uint64x2 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512 +func (x Uint64x4) Compress(mask Mask64x4) Uint64x4 + +// Compress performs a compression on vector x using mask by +// selecting elements as indicated by mask, and pack them to lower indexed elements. +// +// Asm: VPCOMPRESSQ, CPU Feature: AVX512 +func (x Uint64x8) Compress(mask Mask64x8) Uint64x8 + +/* ConcatPermute */ + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2B, CPU Feature: AVX512VBMI +func (x Int8x16) ConcatPermute(y Int8x16, indices Uint8x16) Int8x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2B, CPU Feature: AVX512VBMI +func (x Uint8x16) ConcatPermute(y Uint8x16, indices Uint8x16) Uint8x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2B, CPU Feature: AVX512VBMI +func (x Int8x32) ConcatPermute(y Int8x32, indices Uint8x32) Int8x32 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2B, CPU Feature: AVX512VBMI +func (x Uint8x32) ConcatPermute(y Uint8x32, indices Uint8x32) Uint8x32 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2B, CPU Feature: AVX512VBMI +func (x Int8x64) ConcatPermute(y Int8x64, indices Uint8x64) Int8x64 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2B, CPU Feature: AVX512VBMI +func (x Uint8x64) ConcatPermute(y Uint8x64, indices Uint8x64) Uint8x64 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2W, CPU Feature: AVX512 +func (x Int16x8) ConcatPermute(y Int16x8, indices Uint16x8) Int16x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2W, CPU Feature: AVX512 +func (x Uint16x8) ConcatPermute(y Uint16x8, indices Uint16x8) Uint16x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2W, CPU Feature: AVX512 +func (x Int16x16) ConcatPermute(y Int16x16, indices Uint16x16) Int16x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2W, CPU Feature: AVX512 +func (x Uint16x16) ConcatPermute(y Uint16x16, indices Uint16x16) Uint16x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2W, CPU Feature: AVX512 +func (x Int16x32) ConcatPermute(y Int16x32, indices Uint16x32) Int16x32 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2W, CPU Feature: AVX512 +func (x Uint16x32) ConcatPermute(y Uint16x32, indices Uint16x32) Uint16x32 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2PS, CPU Feature: AVX512 +func (x Float32x4) ConcatPermute(y Float32x4, indices Uint32x4) Float32x4 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2D, CPU Feature: AVX512 +func (x Int32x4) ConcatPermute(y Int32x4, indices Uint32x4) Int32x4 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2D, CPU Feature: AVX512 +func (x Uint32x4) ConcatPermute(y Uint32x4, indices Uint32x4) Uint32x4 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2PS, CPU Feature: AVX512 +func (x Float32x8) ConcatPermute(y Float32x8, indices Uint32x8) Float32x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2D, CPU Feature: AVX512 +func (x Int32x8) ConcatPermute(y Int32x8, indices Uint32x8) Int32x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2D, CPU Feature: AVX512 +func (x Uint32x8) ConcatPermute(y Uint32x8, indices Uint32x8) Uint32x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2PS, CPU Feature: AVX512 +func (x Float32x16) ConcatPermute(y Float32x16, indices Uint32x16) Float32x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2D, CPU Feature: AVX512 +func (x Int32x16) ConcatPermute(y Int32x16, indices Uint32x16) Int32x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2D, CPU Feature: AVX512 +func (x Uint32x16) ConcatPermute(y Uint32x16, indices Uint32x16) Uint32x16 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2PD, CPU Feature: AVX512 +func (x Float64x2) ConcatPermute(y Float64x2, indices Uint64x2) Float64x2 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2Q, CPU Feature: AVX512 +func (x Int64x2) ConcatPermute(y Int64x2, indices Uint64x2) Int64x2 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2Q, CPU Feature: AVX512 +func (x Uint64x2) ConcatPermute(y Uint64x2, indices Uint64x2) Uint64x2 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2PD, CPU Feature: AVX512 +func (x Float64x4) ConcatPermute(y Float64x4, indices Uint64x4) Float64x4 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2Q, CPU Feature: AVX512 +func (x Int64x4) ConcatPermute(y Int64x4, indices Uint64x4) Int64x4 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2Q, CPU Feature: AVX512 +func (x Uint64x4) ConcatPermute(y Uint64x4, indices Uint64x4) Uint64x4 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2PD, CPU Feature: AVX512 +func (x Float64x8) ConcatPermute(y Float64x8, indices Uint64x8) Float64x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2Q, CPU Feature: AVX512 +func (x Int64x8) ConcatPermute(y Int64x8, indices Uint64x8) Int64x8 + +// ConcatPermute performs a full permutation of vector x, y using indices: +// result := {xy[indices[0]], xy[indices[1]], ..., xy[indices[n]]} +// where xy is the concatenation of x (lower half) and y (upper half). +// Only the needed bits to represent xy's index are used in indices' elements. +// +// Asm: VPERMI2Q, CPU Feature: AVX512 +func (x Uint64x8) ConcatPermute(y Uint64x8, indices Uint64x8) Uint64x8 + +/* ConcatShiftBytesRight */ + +// ConcatShiftBytesRight concatenates x and y and shift it right by constant bytes. +// The result vector will be the lower half of the concatenated vector. +// +// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPALIGNR, CPU Feature: AVX +func (x Uint8x16) ConcatShiftBytesRight(constant uint8, y Uint8x16) Uint8x16 + +/* ConcatShiftBytesRightGrouped */ + +// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes. +// The result vector will be the lower half of the concatenated vector. +// This operation is performed grouped by each 16 byte. +// +// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPALIGNR, CPU Feature: AVX2 +func (x Uint8x32) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x32) Uint8x32 + +// ConcatShiftBytesRightGrouped concatenates x and y and shift it right by constant bytes. +// The result vector will be the lower half of the concatenated vector. +// This operation is performed grouped by each 16 byte. +// +// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPALIGNR, CPU Feature: AVX512 +func (x Uint8x64) ConcatShiftBytesRightGrouped(constant uint8, y Uint8x64) Uint8x64 + +/* ConvertToInt32 */ + +// ConvertToInt32 converts element values to int32. +// +// Asm: VCVTTPS2DQ, CPU Feature: AVX +func (x Float32x4) ConvertToInt32() Int32x4 + +// ConvertToInt32 converts element values to int32. +// +// Asm: VCVTTPS2DQ, CPU Feature: AVX +func (x Float32x8) ConvertToInt32() Int32x8 + +// ConvertToInt32 converts element values to int32. +// +// Asm: VCVTTPS2DQ, CPU Feature: AVX512 +func (x Float32x16) ConvertToInt32() Int32x16 + +/* ConvertToUint32 */ + +// ConvertToUint32 converts element values to uint32. +// +// Asm: VCVTPS2UDQ, CPU Feature: AVX512 +func (x Float32x4) ConvertToUint32() Uint32x4 + +// ConvertToUint32 converts element values to uint32. +// +// Asm: VCVTPS2UDQ, CPU Feature: AVX512 +func (x Float32x8) ConvertToUint32() Uint32x8 + +// ConvertToUint32 converts element values to uint32. +// +// Asm: VCVTPS2UDQ, CPU Feature: AVX512 +func (x Float32x16) ConvertToUint32() Uint32x16 + +/* CopySign */ + +// CopySign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNB, CPU Feature: AVX +func (x Int8x16) CopySign(y Int8x16) Int8x16 + +// CopySign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNB, CPU Feature: AVX2 +func (x Int8x32) CopySign(y Int8x32) Int8x32 + +// CopySign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNW, CPU Feature: AVX +func (x Int16x8) CopySign(y Int16x8) Int16x8 + +// CopySign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGNW, CPU Feature: AVX2 +func (x Int16x16) CopySign(y Int16x16) Int16x16 + +// CopySign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGND, CPU Feature: AVX +func (x Int32x4) CopySign(y Int32x4) Int32x4 + +// CopySign returns the product of the first operand with -1, 0, or 1, +// whichever constant is nearest to the value of the second operand. +// +// Asm: VPSIGND, CPU Feature: AVX2 +func (x Int32x8) CopySign(y Int32x8) Int32x8 + +/* Div */ + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX +func (x Float32x4) Div(y Float32x4) Float32x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX +func (x Float32x8) Div(y Float32x8) Float32x8 + +// Div divides elements of two vectors. +// +// Asm: VDIVPS, CPU Feature: AVX512 +func (x Float32x16) Div(y Float32x16) Float32x16 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX +func (x Float64x2) Div(y Float64x2) Float64x2 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX +func (x Float64x4) Div(y Float64x4) Float64x4 + +// Div divides elements of two vectors. +// +// Asm: VDIVPD, CPU Feature: AVX512 +func (x Float64x8) Div(y Float64x8) Float64x8 + +/* DotProductPairs */ + +// DotProductPairs multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX +func (x Int16x8) DotProductPairs(y Int16x8) Int32x4 + +// DotProductPairs multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX2 +func (x Int16x16) DotProductPairs(y Int16x16) Int32x8 + +// DotProductPairs multiplies the elements and add the pairs together, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDWD, CPU Feature: AVX512 +func (x Int16x32) DotProductPairs(y Int16x32) Int32x16 + +/* DotProductPairsSaturated */ + +// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX +func (x Uint8x16) DotProductPairsSaturated(y Int8x16) Int16x8 + +// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX2 +func (x Uint8x32) DotProductPairsSaturated(y Int8x32) Int16x16 + +// DotProductPairsSaturated multiplies the elements and add the pairs together with saturation, +// yielding a vector of half as many elements with twice the input element size. +// +// Asm: VPMADDUBSW, CPU Feature: AVX512 +func (x Uint8x64) DotProductPairsSaturated(y Int8x64) Int16x32 + +/* DotProductQuadruple */ + +// DotProductQuadruple performs dot products on groups of 4 elements of x and y. +// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. +// +// Asm: VPDPBUSD, CPU Feature: AVXVNNI +func (x Int8x16) DotProductQuadruple(y Uint8x16) Int32x4 + +// DotProductQuadruple performs dot products on groups of 4 elements of x and y. +// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. +// +// Asm: VPDPBUSD, CPU Feature: AVXVNNI +func (x Int8x32) DotProductQuadruple(y Uint8x32) Int32x8 + +// DotProductQuadruple performs dot products on groups of 4 elements of x and y. +// DotProductQuadruple(x, y).Add(z) will be optimized to the full form of the underlying instruction. +// +// Asm: VPDPBUSD, CPU Feature: AVX512VNNI +func (x Int8x64) DotProductQuadruple(y Uint8x64) Int32x16 + +/* DotProductQuadrupleSaturated */ + +// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. +// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. +// +// Asm: VPDPBUSDS, CPU Feature: AVXVNNI +func (x Int8x16) DotProductQuadrupleSaturated(y Uint8x16) Int32x4 + +// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. +// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. +// +// Asm: VPDPBUSDS, CPU Feature: AVXVNNI +func (x Int8x32) DotProductQuadrupleSaturated(y Uint8x32) Int32x8 + +// DotProductQuadrupleSaturated multiplies performs dot products on groups of 4 elements of x and y. +// DotProductQuadrupleSaturated(x, y).Add(z) will be optimized to the full form of the underlying instruction. +// +// Asm: VPDPBUSDS, CPU Feature: AVX512VNNI +func (x Int8x64) DotProductQuadrupleSaturated(y Uint8x64) Int32x16 + +/* Equal */ + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX +func (x Int8x16) Equal(y Int8x16) Mask8x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX2 +func (x Int8x32) Equal(y Int8x32) Mask8x32 + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX512 +func (x Int8x64) Equal(y Int8x64) Mask8x64 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX +func (x Int16x8) Equal(y Int16x8) Mask16x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX2 +func (x Int16x16) Equal(y Int16x16) Mask16x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX512 +func (x Int16x32) Equal(y Int16x32) Mask16x32 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX +func (x Int32x4) Equal(y Int32x4) Mask32x4 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX2 +func (x Int32x8) Equal(y Int32x8) Mask32x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX512 +func (x Int32x16) Equal(y Int32x16) Mask32x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX +func (x Int64x2) Equal(y Int64x2) Mask64x2 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX2 +func (x Int64x4) Equal(y Int64x4) Mask64x4 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX512 +func (x Int64x8) Equal(y Int64x8) Mask64x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX +func (x Uint8x16) Equal(y Uint8x16) Mask8x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX2 +func (x Uint8x32) Equal(y Uint8x32) Mask8x32 + +// Equal compares for equality. +// +// Asm: VPCMPEQB, CPU Feature: AVX512 +func (x Uint8x64) Equal(y Uint8x64) Mask8x64 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX +func (x Uint16x8) Equal(y Uint16x8) Mask16x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX2 +func (x Uint16x16) Equal(y Uint16x16) Mask16x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQW, CPU Feature: AVX512 +func (x Uint16x32) Equal(y Uint16x32) Mask16x32 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX +func (x Uint32x4) Equal(y Uint32x4) Mask32x4 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX2 +func (x Uint32x8) Equal(y Uint32x8) Mask32x8 + +// Equal compares for equality. +// +// Asm: VPCMPEQD, CPU Feature: AVX512 +func (x Uint32x16) Equal(y Uint32x16) Mask32x16 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX +func (x Uint64x2) Equal(y Uint64x2) Mask64x2 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX2 +func (x Uint64x4) Equal(y Uint64x4) Mask64x4 + +// Equal compares for equality. +// +// Asm: VPCMPEQQ, CPU Feature: AVX512 +func (x Uint64x8) Equal(y Uint64x8) Mask64x8 + +// Equal compares for equality. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Equal(y Float32x4) Mask32x4 + +// Equal compares for equality. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Equal(y Float32x8) Mask32x8 + +// Equal compares for equality. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) Equal(y Float32x16) Mask32x16 + +// Equal compares for equality. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Equal(y Float64x2) Mask64x2 + +// Equal compares for equality. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Equal(y Float64x4) Mask64x4 + +// Equal compares for equality. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) Equal(y Float64x8) Mask64x8 + +/* Expand */ + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VEXPANDPS, CPU Feature: AVX512 +func (x Float32x4) Expand(mask Mask32x4) Float32x4 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VEXPANDPS, CPU Feature: AVX512 +func (x Float32x8) Expand(mask Mask32x8) Float32x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VEXPANDPS, CPU Feature: AVX512 +func (x Float32x16) Expand(mask Mask32x16) Float32x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VEXPANDPD, CPU Feature: AVX512 +func (x Float64x2) Expand(mask Mask64x2) Float64x2 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VEXPANDPD, CPU Feature: AVX512 +func (x Float64x4) Expand(mask Mask64x4) Float64x4 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VEXPANDPD, CPU Feature: AVX512 +func (x Float64x8) Expand(mask Mask64x8) Float64x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2 +func (x Int8x16) Expand(mask Mask8x16) Int8x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2 +func (x Int8x32) Expand(mask Mask8x32) Int8x32 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2 +func (x Int8x64) Expand(mask Mask8x64) Int8x64 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2 +func (x Int16x8) Expand(mask Mask16x8) Int16x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2 +func (x Int16x16) Expand(mask Mask16x16) Int16x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2 +func (x Int16x32) Expand(mask Mask16x32) Int16x32 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDD, CPU Feature: AVX512 +func (x Int32x4) Expand(mask Mask32x4) Int32x4 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDD, CPU Feature: AVX512 +func (x Int32x8) Expand(mask Mask32x8) Int32x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDD, CPU Feature: AVX512 +func (x Int32x16) Expand(mask Mask32x16) Int32x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDQ, CPU Feature: AVX512 +func (x Int64x2) Expand(mask Mask64x2) Int64x2 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDQ, CPU Feature: AVX512 +func (x Int64x4) Expand(mask Mask64x4) Int64x4 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDQ, CPU Feature: AVX512 +func (x Int64x8) Expand(mask Mask64x8) Int64x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2 +func (x Uint8x16) Expand(mask Mask8x16) Uint8x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2 +func (x Uint8x32) Expand(mask Mask8x32) Uint8x32 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDB, CPU Feature: AVX512VBMI2 +func (x Uint8x64) Expand(mask Mask8x64) Uint8x64 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) Expand(mask Mask16x8) Uint16x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) Expand(mask Mask16x16) Uint16x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) Expand(mask Mask16x32) Uint16x32 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDD, CPU Feature: AVX512 +func (x Uint32x4) Expand(mask Mask32x4) Uint32x4 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDD, CPU Feature: AVX512 +func (x Uint32x8) Expand(mask Mask32x8) Uint32x8 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDD, CPU Feature: AVX512 +func (x Uint32x16) Expand(mask Mask32x16) Uint32x16 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDQ, CPU Feature: AVX512 +func (x Uint64x2) Expand(mask Mask64x2) Uint64x2 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDQ, CPU Feature: AVX512 +func (x Uint64x4) Expand(mask Mask64x4) Uint64x4 + +// Expand performs an expansion on a vector x whose elements are packed to lower parts. +// The expansion is to distribute elements as indexed by mask, from lower mask elements to upper in order. +// +// Asm: VPEXPANDQ, CPU Feature: AVX512 +func (x Uint64x8) Expand(mask Mask64x8) Uint64x8 + +/* ExtendLo2ToInt64x2 */ + +// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX +func (x Int8x16) ExtendLo2ToInt64x2() Int64x2 + +// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX +func (x Int16x8) ExtendLo2ToInt64x2() Int64x2 + +// ExtendLo2ToInt64x2 converts 2 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX +func (x Int32x4) ExtendLo2ToInt64x2() Int64x2 + +/* ExtendLo2ToUint64x2 */ + +// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX +func (x Uint8x16) ExtendLo2ToUint64x2() Uint64x2 + +// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX +func (x Uint16x8) ExtendLo2ToUint64x2() Uint64x2 + +// ExtendLo2ToUint64x2 converts 2 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX +func (x Uint32x4) ExtendLo2ToUint64x2() Uint64x2 + +/* ExtendLo4ToInt32x4 */ + +// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBD, CPU Feature: AVX +func (x Int8x16) ExtendLo4ToInt32x4() Int32x4 + +// ExtendLo4ToInt32x4 converts 4 lowest vector element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWD, CPU Feature: AVX +func (x Int16x8) ExtendLo4ToInt32x4() Int32x4 + +/* ExtendLo4ToInt64x4 */ + +// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX2 +func (x Int8x16) ExtendLo4ToInt64x4() Int64x4 + +// ExtendLo4ToInt64x4 converts 4 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX2 +func (x Int16x8) ExtendLo4ToInt64x4() Int64x4 + +/* ExtendLo4ToUint32x4 */ + +// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBD, CPU Feature: AVX +func (x Uint8x16) ExtendLo4ToUint32x4() Uint32x4 + +// ExtendLo4ToUint32x4 converts 4 lowest vector element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWD, CPU Feature: AVX +func (x Uint16x8) ExtendLo4ToUint32x4() Uint32x4 + +/* ExtendLo4ToUint64x4 */ + +// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX2 +func (x Uint8x16) ExtendLo4ToUint64x4() Uint64x4 + +// ExtendLo4ToUint64x4 converts 4 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX2 +func (x Uint16x8) ExtendLo4ToUint64x4() Uint64x4 + +/* ExtendLo8ToInt16x8 */ + +// ExtendLo8ToInt16x8 converts 8 lowest vector element values to int16. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBW, CPU Feature: AVX +func (x Int8x16) ExtendLo8ToInt16x8() Int16x8 + +/* ExtendLo8ToInt32x8 */ + +// ExtendLo8ToInt32x8 converts 8 lowest vector element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBD, CPU Feature: AVX2 +func (x Int8x16) ExtendLo8ToInt32x8() Int32x8 + +/* ExtendLo8ToInt64x8 */ + +// ExtendLo8ToInt64x8 converts 8 lowest vector element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBQ, CPU Feature: AVX512 +func (x Int8x16) ExtendLo8ToInt64x8() Int64x8 + +/* ExtendLo8ToUint16x8 */ + +// ExtendLo8ToUint16x8 converts 8 lowest vector element values to uint16. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBW, CPU Feature: AVX +func (x Uint8x16) ExtendLo8ToUint16x8() Uint16x8 + +/* ExtendLo8ToUint32x8 */ + +// ExtendLo8ToUint32x8 converts 8 lowest vector element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBD, CPU Feature: AVX2 +func (x Uint8x16) ExtendLo8ToUint32x8() Uint32x8 + +/* ExtendLo8ToUint64x8 */ + +// ExtendLo8ToUint64x8 converts 8 lowest vector element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBQ, CPU Feature: AVX512 +func (x Uint8x16) ExtendLo8ToUint64x8() Uint64x8 + +/* ExtendToInt16 */ + +// ExtendToInt16 converts element values to int16. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBW, CPU Feature: AVX2 +func (x Int8x16) ExtendToInt16() Int16x16 + +// ExtendToInt16 converts element values to int16. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBW, CPU Feature: AVX512 +func (x Int8x32) ExtendToInt16() Int16x32 + +/* ExtendToInt32 */ + +// ExtendToInt32 converts element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXBD, CPU Feature: AVX512 +func (x Int8x16) ExtendToInt32() Int32x16 + +// ExtendToInt32 converts element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWD, CPU Feature: AVX2 +func (x Int16x8) ExtendToInt32() Int32x8 + +// ExtendToInt32 converts element values to int32. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWD, CPU Feature: AVX512 +func (x Int16x16) ExtendToInt32() Int32x16 + +/* ExtendToInt64 */ + +// ExtendToInt64 converts element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXWQ, CPU Feature: AVX512 +func (x Int16x8) ExtendToInt64() Int64x8 + +// ExtendToInt64 converts element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX2 +func (x Int32x4) ExtendToInt64() Int64x4 + +// ExtendToInt64 converts element values to int64. +// The result vector's elements are sign-extended. +// +// Asm: VPMOVSXDQ, CPU Feature: AVX512 +func (x Int32x8) ExtendToInt64() Int64x8 + +/* ExtendToUint16 */ + +// ExtendToUint16 converts element values to uint16. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBW, CPU Feature: AVX2 +func (x Uint8x16) ExtendToUint16() Uint16x16 + +// ExtendToUint16 converts element values to uint16. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBW, CPU Feature: AVX512 +func (x Uint8x32) ExtendToUint16() Uint16x32 + +/* ExtendToUint32 */ + +// ExtendToUint32 converts element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXBD, CPU Feature: AVX512 +func (x Uint8x16) ExtendToUint32() Uint32x16 + +// ExtendToUint32 converts element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWD, CPU Feature: AVX2 +func (x Uint16x8) ExtendToUint32() Uint32x8 + +// ExtendToUint32 converts element values to uint32. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWD, CPU Feature: AVX512 +func (x Uint16x16) ExtendToUint32() Uint32x16 + +/* ExtendToUint64 */ + +// ExtendToUint64 converts element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXWQ, CPU Feature: AVX512 +func (x Uint16x8) ExtendToUint64() Uint64x8 + +// ExtendToUint64 converts element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX2 +func (x Uint32x4) ExtendToUint64() Uint64x4 + +// ExtendToUint64 converts element values to uint64. +// The result vector's elements are zero-extended. +// +// Asm: VPMOVZXDQ, CPU Feature: AVX512 +func (x Uint32x8) ExtendToUint64() Uint64x8 + +/* Floor */ + +// Floor rounds elements down to the nearest integer. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Floor() Float32x4 + +// Floor rounds elements down to the nearest integer. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Floor() Float32x8 + +// Floor rounds elements down to the nearest integer. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Floor() Float64x2 + +// Floor rounds elements down to the nearest integer. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Floor() Float64x4 + +/* FloorScaled */ + +// FloorScaled rounds elements down with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x4) FloorScaled(prec uint8) Float32x4 + +// FloorScaled rounds elements down with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x8) FloorScaled(prec uint8) Float32x8 + +// FloorScaled rounds elements down with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x16) FloorScaled(prec uint8) Float32x16 + +// FloorScaled rounds elements down with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x2) FloorScaled(prec uint8) Float64x2 + +// FloorScaled rounds elements down with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x4) FloorScaled(prec uint8) Float64x4 + +// FloorScaled rounds elements down with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x8) FloorScaled(prec uint8) Float64x8 + +/* FloorScaledResidue */ + +// FloorScaledResidue computes the difference after flooring with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x4) FloorScaledResidue(prec uint8) Float32x4 + +// FloorScaledResidue computes the difference after flooring with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x8) FloorScaledResidue(prec uint8) Float32x8 + +// FloorScaledResidue computes the difference after flooring with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x16) FloorScaledResidue(prec uint8) Float32x16 + +// FloorScaledResidue computes the difference after flooring with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x2) FloorScaledResidue(prec uint8) Float64x2 + +// FloorScaledResidue computes the difference after flooring with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x4) FloorScaledResidue(prec uint8) Float64x4 + +// FloorScaledResidue computes the difference after flooring with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x8) FloorScaledResidue(prec uint8) Float64x8 + +/* GaloisFieldAffineTransform */ + +// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// b results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI +func (x Uint8x16) GaloisFieldAffineTransform(y Uint64x2, b uint8) Uint8x16 + +// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// b results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI +func (x Uint8x32) GaloisFieldAffineTransform(y Uint64x4, b uint8) Uint8x32 + +// GaloisFieldAffineTransform computes an affine transformation in GF(2^8): +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// b results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VGF2P8AFFINEQB, CPU Feature: AVX512GFNI +func (x Uint8x64) GaloisFieldAffineTransform(y Uint64x8, b uint8) Uint8x64 + +/* GaloisFieldAffineTransformInverse */ + +// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8), +// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// b results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI +func (x Uint8x16) GaloisFieldAffineTransformInverse(y Uint64x2, b uint8) Uint8x16 + +// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8), +// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// b results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI +func (x Uint8x32) GaloisFieldAffineTransformInverse(y Uint64x4, b uint8) Uint8x32 + +// GaloisFieldAffineTransformInverse computes an affine transformation in GF(2^8), +// with x inverted with respect to reduction polynomial x^8 + x^4 + x^3 + x + 1: +// x is a vector of 8-bit vectors, with each adjacent 8 as a group; y is a vector of 8x8 1-bit matrixes; +// b is an 8-bit vector. The affine transformation is y * x + b, with each element of y +// corresponding to a group of 8 elements in x. +// +// b results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VGF2P8AFFINEINVQB, CPU Feature: AVX512GFNI +func (x Uint8x64) GaloisFieldAffineTransformInverse(y Uint64x8, b uint8) Uint8x64 + +/* GaloisFieldMul */ + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI +func (x Uint8x16) GaloisFieldMul(y Uint8x16) Uint8x16 + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI +func (x Uint8x32) GaloisFieldMul(y Uint8x32) Uint8x32 + +// GaloisFieldMul computes element-wise GF(2^8) multiplication with +// reduction polynomial x^8 + x^4 + x^3 + x + 1. +// +// Asm: VGF2P8MULB, CPU Feature: AVX512GFNI +func (x Uint8x64) GaloisFieldMul(y Uint8x64) Uint8x64 + +/* GetElem */ + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRD, CPU Feature: AVX +func (x Float32x4) GetElem(index uint8) float32 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Float64x2) GetElem(index uint8) float64 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRB, CPU Feature: AVX512 +func (x Int8x16) GetElem(index uint8) int8 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRW, CPU Feature: AVX512 +func (x Int16x8) GetElem(index uint8) int16 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRD, CPU Feature: AVX +func (x Int32x4) GetElem(index uint8) int32 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Int64x2) GetElem(index uint8) int64 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRB, CPU Feature: AVX512 +func (x Uint8x16) GetElem(index uint8) uint8 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRW, CPU Feature: AVX512 +func (x Uint16x8) GetElem(index uint8) uint16 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRD, CPU Feature: AVX +func (x Uint32x4) GetElem(index uint8) uint32 + +// GetElem retrieves a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPEXTRQ, CPU Feature: AVX +func (x Uint64x2) GetElem(index uint8) uint64 + +/* GetHi */ + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float32x8) GetHi() Float32x4 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF64X4, CPU Feature: AVX512 +func (x Float32x16) GetHi() Float32x8 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float64x4) GetHi() Float64x2 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTF64X4, CPU Feature: AVX512 +func (x Float64x8) GetHi() Float64x4 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int8x32) GetHi() Int8x16 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int8x64) GetHi() Int8x32 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int16x16) GetHi() Int16x8 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int16x32) GetHi() Int16x16 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int32x8) GetHi() Int32x4 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int32x16) GetHi() Int32x8 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int64x4) GetHi() Int64x2 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int64x8) GetHi() Int64x4 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint8x32) GetHi() Uint8x16 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint8x64) GetHi() Uint8x32 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint16x16) GetHi() Uint16x8 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint16x32) GetHi() Uint16x16 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint32x8) GetHi() Uint32x4 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint32x16) GetHi() Uint32x8 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint64x4) GetHi() Uint64x2 + +// GetHi returns the upper half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint64x8) GetHi() Uint64x4 + +/* GetLo */ + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float32x8) GetLo() Float32x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTF64X4, CPU Feature: AVX512 +func (x Float32x16) GetLo() Float32x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTF128, CPU Feature: AVX +func (x Float64x4) GetLo() Float64x2 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTF64X4, CPU Feature: AVX512 +func (x Float64x8) GetLo() Float64x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int8x32) GetLo() Int8x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int8x64) GetLo() Int8x32 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int16x16) GetLo() Int16x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int16x32) GetLo() Int16x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int32x8) GetLo() Int32x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int32x16) GetLo() Int32x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Int64x4) GetLo() Int64x2 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Int64x8) GetLo() Int64x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint8x32) GetLo() Uint8x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint8x64) GetLo() Uint8x32 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint16x16) GetLo() Uint16x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint16x32) GetLo() Uint16x16 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint32x8) GetLo() Uint32x4 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint32x16) GetLo() Uint32x8 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI128, CPU Feature: AVX2 +func (x Uint64x4) GetLo() Uint64x2 + +// GetLo returns the lower half of x. +// +// Asm: VEXTRACTI64X4, CPU Feature: AVX512 +func (x Uint64x8) GetLo() Uint64x4 + +/* Greater */ + +// Greater compares for greater than. +// +// Asm: VPCMPGTB, CPU Feature: AVX +func (x Int8x16) Greater(y Int8x16) Mask8x16 + +// Greater compares for greater than. +// +// Asm: VPCMPGTB, CPU Feature: AVX2 +func (x Int8x32) Greater(y Int8x32) Mask8x32 + +// Greater compares for greater than. +// +// Asm: VPCMPGTB, CPU Feature: AVX512 +func (x Int8x64) Greater(y Int8x64) Mask8x64 + +// Greater compares for greater than. +// +// Asm: VPCMPGTW, CPU Feature: AVX +func (x Int16x8) Greater(y Int16x8) Mask16x8 + +// Greater compares for greater than. +// +// Asm: VPCMPGTW, CPU Feature: AVX2 +func (x Int16x16) Greater(y Int16x16) Mask16x16 + +// Greater compares for greater than. +// +// Asm: VPCMPGTW, CPU Feature: AVX512 +func (x Int16x32) Greater(y Int16x32) Mask16x32 + +// Greater compares for greater than. +// +// Asm: VPCMPGTD, CPU Feature: AVX +func (x Int32x4) Greater(y Int32x4) Mask32x4 + +// Greater compares for greater than. +// +// Asm: VPCMPGTD, CPU Feature: AVX2 +func (x Int32x8) Greater(y Int32x8) Mask32x8 + +// Greater compares for greater than. +// +// Asm: VPCMPGTD, CPU Feature: AVX512 +func (x Int32x16) Greater(y Int32x16) Mask32x16 + +// Greater compares for greater than. +// +// Asm: VPCMPGTQ, CPU Feature: AVX +func (x Int64x2) Greater(y Int64x2) Mask64x2 + +// Greater compares for greater than. +// +// Asm: VPCMPGTQ, CPU Feature: AVX2 +func (x Int64x4) Greater(y Int64x4) Mask64x4 + +// Greater compares for greater than. +// +// Asm: VPCMPGTQ, CPU Feature: AVX512 +func (x Int64x8) Greater(y Int64x8) Mask64x8 + +// Greater compares for greater than. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Greater(y Float32x4) Mask32x4 + +// Greater compares for greater than. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Greater(y Float32x8) Mask32x8 + +// Greater compares for greater than. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) Greater(y Float32x16) Mask32x16 + +// Greater compares for greater than. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Greater(y Float64x2) Mask64x2 + +// Greater compares for greater than. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Greater(y Float64x4) Mask64x4 + +// Greater compares for greater than. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) Greater(y Float64x8) Mask64x8 + +// Greater compares for greater than. +// +// Asm: VPCMPUB, CPU Feature: AVX512 +func (x Uint8x64) Greater(y Uint8x64) Mask8x64 + +// Greater compares for greater than. +// +// Asm: VPCMPUW, CPU Feature: AVX512 +func (x Uint16x32) Greater(y Uint16x32) Mask16x32 + +// Greater compares for greater than. +// +// Asm: VPCMPUD, CPU Feature: AVX512 +func (x Uint32x16) Greater(y Uint32x16) Mask32x16 + +// Greater compares for greater than. +// +// Asm: VPCMPUQ, CPU Feature: AVX512 +func (x Uint64x8) Greater(y Uint64x8) Mask64x8 + +/* GreaterEqual */ + +// GreaterEqual compares for greater than or equal. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) GreaterEqual(y Float32x4) Mask32x4 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) GreaterEqual(y Float32x8) Mask32x8 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) GreaterEqual(y Float32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) GreaterEqual(y Float64x2) Mask64x2 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) GreaterEqual(y Float64x4) Mask64x4 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) GreaterEqual(y Float64x8) Mask64x8 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPB, CPU Feature: AVX512 +func (x Int8x64) GreaterEqual(y Int8x64) Mask8x64 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPW, CPU Feature: AVX512 +func (x Int16x32) GreaterEqual(y Int16x32) Mask16x32 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPD, CPU Feature: AVX512 +func (x Int32x16) GreaterEqual(y Int32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPQ, CPU Feature: AVX512 +func (x Int64x8) GreaterEqual(y Int64x8) Mask64x8 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPUB, CPU Feature: AVX512 +func (x Uint8x64) GreaterEqual(y Uint8x64) Mask8x64 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPUW, CPU Feature: AVX512 +func (x Uint16x32) GreaterEqual(y Uint16x32) Mask16x32 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPUD, CPU Feature: AVX512 +func (x Uint32x16) GreaterEqual(y Uint32x16) Mask32x16 + +// GreaterEqual compares for greater than or equal. +// +// Asm: VPCMPUQ, CPU Feature: AVX512 +func (x Uint64x8) GreaterEqual(y Uint64x8) Mask64x8 + +/* InterleaveHi */ + +// InterleaveHi interleaves the elements of the high halves of x and y. +// +// Asm: VPUNPCKHWD, CPU Feature: AVX +func (x Int16x8) InterleaveHi(y Int16x8) Int16x8 + +// InterleaveHi interleaves the elements of the high halves of x and y. +// +// Asm: VPUNPCKHDQ, CPU Feature: AVX +func (x Int32x4) InterleaveHi(y Int32x4) Int32x4 + +// InterleaveHi interleaves the elements of the high halves of x and y. +// +// Asm: VPUNPCKHQDQ, CPU Feature: AVX +func (x Int64x2) InterleaveHi(y Int64x2) Int64x2 + +// InterleaveHi interleaves the elements of the high halves of x and y. +// +// Asm: VPUNPCKHWD, CPU Feature: AVX +func (x Uint16x8) InterleaveHi(y Uint16x8) Uint16x8 + +// InterleaveHi interleaves the elements of the high halves of x and y. +// +// Asm: VPUNPCKHDQ, CPU Feature: AVX +func (x Uint32x4) InterleaveHi(y Uint32x4) Uint32x4 + +// InterleaveHi interleaves the elements of the high halves of x and y. +// +// Asm: VPUNPCKHQDQ, CPU Feature: AVX +func (x Uint64x2) InterleaveHi(y Uint64x2) Uint64x2 + +/* InterleaveHiGrouped */ + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHWD, CPU Feature: AVX2 +func (x Int16x16) InterleaveHiGrouped(y Int16x16) Int16x16 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHWD, CPU Feature: AVX512 +func (x Int16x32) InterleaveHiGrouped(y Int16x32) Int16x32 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHDQ, CPU Feature: AVX2 +func (x Int32x8) InterleaveHiGrouped(y Int32x8) Int32x8 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHDQ, CPU Feature: AVX512 +func (x Int32x16) InterleaveHiGrouped(y Int32x16) Int32x16 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHQDQ, CPU Feature: AVX2 +func (x Int64x4) InterleaveHiGrouped(y Int64x4) Int64x4 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHQDQ, CPU Feature: AVX512 +func (x Int64x8) InterleaveHiGrouped(y Int64x8) Int64x8 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHWD, CPU Feature: AVX2 +func (x Uint16x16) InterleaveHiGrouped(y Uint16x16) Uint16x16 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHWD, CPU Feature: AVX512 +func (x Uint16x32) InterleaveHiGrouped(y Uint16x32) Uint16x32 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHDQ, CPU Feature: AVX2 +func (x Uint32x8) InterleaveHiGrouped(y Uint32x8) Uint32x8 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHDQ, CPU Feature: AVX512 +func (x Uint32x16) InterleaveHiGrouped(y Uint32x16) Uint32x16 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHQDQ, CPU Feature: AVX2 +func (x Uint64x4) InterleaveHiGrouped(y Uint64x4) Uint64x4 + +// InterleaveHiGrouped interleaves the elements of the high half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKHQDQ, CPU Feature: AVX512 +func (x Uint64x8) InterleaveHiGrouped(y Uint64x8) Uint64x8 + +/* InterleaveLo */ + +// InterleaveLo interleaves the elements of the low halves of x and y. +// +// Asm: VPUNPCKLWD, CPU Feature: AVX +func (x Int16x8) InterleaveLo(y Int16x8) Int16x8 + +// InterleaveLo interleaves the elements of the low halves of x and y. +// +// Asm: VPUNPCKLDQ, CPU Feature: AVX +func (x Int32x4) InterleaveLo(y Int32x4) Int32x4 + +// InterleaveLo interleaves the elements of the low halves of x and y. +// +// Asm: VPUNPCKLQDQ, CPU Feature: AVX +func (x Int64x2) InterleaveLo(y Int64x2) Int64x2 + +// InterleaveLo interleaves the elements of the low halves of x and y. +// +// Asm: VPUNPCKLWD, CPU Feature: AVX +func (x Uint16x8) InterleaveLo(y Uint16x8) Uint16x8 + +// InterleaveLo interleaves the elements of the low halves of x and y. +// +// Asm: VPUNPCKLDQ, CPU Feature: AVX +func (x Uint32x4) InterleaveLo(y Uint32x4) Uint32x4 + +// InterleaveLo interleaves the elements of the low halves of x and y. +// +// Asm: VPUNPCKLQDQ, CPU Feature: AVX +func (x Uint64x2) InterleaveLo(y Uint64x2) Uint64x2 + +/* InterleaveLoGrouped */ + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLWD, CPU Feature: AVX2 +func (x Int16x16) InterleaveLoGrouped(y Int16x16) Int16x16 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLWD, CPU Feature: AVX512 +func (x Int16x32) InterleaveLoGrouped(y Int16x32) Int16x32 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLDQ, CPU Feature: AVX2 +func (x Int32x8) InterleaveLoGrouped(y Int32x8) Int32x8 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLDQ, CPU Feature: AVX512 +func (x Int32x16) InterleaveLoGrouped(y Int32x16) Int32x16 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLQDQ, CPU Feature: AVX2 +func (x Int64x4) InterleaveLoGrouped(y Int64x4) Int64x4 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLQDQ, CPU Feature: AVX512 +func (x Int64x8) InterleaveLoGrouped(y Int64x8) Int64x8 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLWD, CPU Feature: AVX2 +func (x Uint16x16) InterleaveLoGrouped(y Uint16x16) Uint16x16 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLWD, CPU Feature: AVX512 +func (x Uint16x32) InterleaveLoGrouped(y Uint16x32) Uint16x32 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLDQ, CPU Feature: AVX2 +func (x Uint32x8) InterleaveLoGrouped(y Uint32x8) Uint32x8 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLDQ, CPU Feature: AVX512 +func (x Uint32x16) InterleaveLoGrouped(y Uint32x16) Uint32x16 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLQDQ, CPU Feature: AVX2 +func (x Uint64x4) InterleaveLoGrouped(y Uint64x4) Uint64x4 + +// InterleaveLoGrouped interleaves the elements of the low half of each 128-bit subvector of x and y. +// +// Asm: VPUNPCKLQDQ, CPU Feature: AVX512 +func (x Uint64x8) InterleaveLoGrouped(y Uint64x8) Uint64x8 + +/* IsNan */ + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) IsNan(y Float32x4) Mask32x4 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) IsNan(y Float32x8) Mask32x8 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) IsNan(y Float32x16) Mask32x16 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) IsNan(y Float64x2) Mask64x2 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) IsNan(y Float64x4) Mask64x4 + +// IsNan checks if elements are NaN. Use as x.IsNan(x). +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) IsNan(y Float64x8) Mask64x8 + +/* LeadingZeros */ + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTD, CPU Feature: AVX512 +func (x Int32x4) LeadingZeros() Int32x4 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTD, CPU Feature: AVX512 +func (x Int32x8) LeadingZeros() Int32x8 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTD, CPU Feature: AVX512 +func (x Int32x16) LeadingZeros() Int32x16 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTQ, CPU Feature: AVX512 +func (x Int64x2) LeadingZeros() Int64x2 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTQ, CPU Feature: AVX512 +func (x Int64x4) LeadingZeros() Int64x4 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTQ, CPU Feature: AVX512 +func (x Int64x8) LeadingZeros() Int64x8 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTD, CPU Feature: AVX512 +func (x Uint32x4) LeadingZeros() Uint32x4 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTD, CPU Feature: AVX512 +func (x Uint32x8) LeadingZeros() Uint32x8 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTD, CPU Feature: AVX512 +func (x Uint32x16) LeadingZeros() Uint32x16 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTQ, CPU Feature: AVX512 +func (x Uint64x2) LeadingZeros() Uint64x2 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTQ, CPU Feature: AVX512 +func (x Uint64x4) LeadingZeros() Uint64x4 + +// LeadingZeros counts the leading zeros of each element in x. +// +// Asm: VPLZCNTQ, CPU Feature: AVX512 +func (x Uint64x8) LeadingZeros() Uint64x8 + +/* Less */ + +// Less compares for less than. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) Less(y Float32x4) Mask32x4 + +// Less compares for less than. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) Less(y Float32x8) Mask32x8 + +// Less compares for less than. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) Less(y Float32x16) Mask32x16 + +// Less compares for less than. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) Less(y Float64x2) Mask64x2 + +// Less compares for less than. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) Less(y Float64x4) Mask64x4 + +// Less compares for less than. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) Less(y Float64x8) Mask64x8 + +// Less compares for less than. +// +// Asm: VPCMPB, CPU Feature: AVX512 +func (x Int8x64) Less(y Int8x64) Mask8x64 + +// Less compares for less than. +// +// Asm: VPCMPW, CPU Feature: AVX512 +func (x Int16x32) Less(y Int16x32) Mask16x32 + +// Less compares for less than. +// +// Asm: VPCMPD, CPU Feature: AVX512 +func (x Int32x16) Less(y Int32x16) Mask32x16 + +// Less compares for less than. +// +// Asm: VPCMPQ, CPU Feature: AVX512 +func (x Int64x8) Less(y Int64x8) Mask64x8 + +// Less compares for less than. +// +// Asm: VPCMPUB, CPU Feature: AVX512 +func (x Uint8x64) Less(y Uint8x64) Mask8x64 + +// Less compares for less than. +// +// Asm: VPCMPUW, CPU Feature: AVX512 +func (x Uint16x32) Less(y Uint16x32) Mask16x32 + +// Less compares for less than. +// +// Asm: VPCMPUD, CPU Feature: AVX512 +func (x Uint32x16) Less(y Uint32x16) Mask32x16 + +// Less compares for less than. +// +// Asm: VPCMPUQ, CPU Feature: AVX512 +func (x Uint64x8) Less(y Uint64x8) Mask64x8 + +/* LessEqual */ + +// LessEqual compares for less than or equal. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) LessEqual(y Float32x4) Mask32x4 + +// LessEqual compares for less than or equal. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) LessEqual(y Float32x8) Mask32x8 + +// LessEqual compares for less than or equal. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) LessEqual(y Float32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) LessEqual(y Float64x2) Mask64x2 + +// LessEqual compares for less than or equal. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) LessEqual(y Float64x4) Mask64x4 + +// LessEqual compares for less than or equal. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) LessEqual(y Float64x8) Mask64x8 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPB, CPU Feature: AVX512 +func (x Int8x64) LessEqual(y Int8x64) Mask8x64 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPW, CPU Feature: AVX512 +func (x Int16x32) LessEqual(y Int16x32) Mask16x32 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPD, CPU Feature: AVX512 +func (x Int32x16) LessEqual(y Int32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPQ, CPU Feature: AVX512 +func (x Int64x8) LessEqual(y Int64x8) Mask64x8 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPUB, CPU Feature: AVX512 +func (x Uint8x64) LessEqual(y Uint8x64) Mask8x64 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPUW, CPU Feature: AVX512 +func (x Uint16x32) LessEqual(y Uint16x32) Mask16x32 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPUD, CPU Feature: AVX512 +func (x Uint32x16) LessEqual(y Uint32x16) Mask32x16 + +// LessEqual compares for less than or equal. +// +// Asm: VPCMPUQ, CPU Feature: AVX512 +func (x Uint64x8) LessEqual(y Uint64x8) Mask64x8 + +/* Max */ + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX +func (x Float32x4) Max(y Float32x4) Float32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX +func (x Float32x8) Max(y Float32x8) Float32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPS, CPU Feature: AVX512 +func (x Float32x16) Max(y Float32x16) Float32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX +func (x Float64x2) Max(y Float64x2) Float64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX +func (x Float64x4) Max(y Float64x4) Float64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VMAXPD, CPU Feature: AVX512 +func (x Float64x8) Max(y Float64x8) Float64x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX +func (x Int8x16) Max(y Int8x16) Int8x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX2 +func (x Int8x32) Max(y Int8x32) Int8x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSB, CPU Feature: AVX512 +func (x Int8x64) Max(y Int8x64) Int8x64 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX +func (x Int16x8) Max(y Int16x8) Int16x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX2 +func (x Int16x16) Max(y Int16x16) Int16x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSW, CPU Feature: AVX512 +func (x Int16x32) Max(y Int16x32) Int16x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX +func (x Int32x4) Max(y Int32x4) Int32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX2 +func (x Int32x8) Max(y Int32x8) Int32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSD, CPU Feature: AVX512 +func (x Int32x16) Max(y Int32x16) Int32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512 +func (x Int64x2) Max(y Int64x2) Int64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512 +func (x Int64x4) Max(y Int64x4) Int64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXSQ, CPU Feature: AVX512 +func (x Int64x8) Max(y Int64x8) Int64x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX +func (x Uint8x16) Max(y Uint8x16) Uint8x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX2 +func (x Uint8x32) Max(y Uint8x32) Uint8x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUB, CPU Feature: AVX512 +func (x Uint8x64) Max(y Uint8x64) Uint8x64 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX +func (x Uint16x8) Max(y Uint16x8) Uint16x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX2 +func (x Uint16x16) Max(y Uint16x16) Uint16x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUW, CPU Feature: AVX512 +func (x Uint16x32) Max(y Uint16x32) Uint16x32 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX +func (x Uint32x4) Max(y Uint32x4) Uint32x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX2 +func (x Uint32x8) Max(y Uint32x8) Uint32x8 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUD, CPU Feature: AVX512 +func (x Uint32x16) Max(y Uint32x16) Uint32x16 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512 +func (x Uint64x2) Max(y Uint64x2) Uint64x2 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512 +func (x Uint64x4) Max(y Uint64x4) Uint64x4 + +// Max computes the maximum of corresponding elements. +// +// Asm: VPMAXUQ, CPU Feature: AVX512 +func (x Uint64x8) Max(y Uint64x8) Uint64x8 + +/* Min */ + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX +func (x Float32x4) Min(y Float32x4) Float32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX +func (x Float32x8) Min(y Float32x8) Float32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPS, CPU Feature: AVX512 +func (x Float32x16) Min(y Float32x16) Float32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX +func (x Float64x2) Min(y Float64x2) Float64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX +func (x Float64x4) Min(y Float64x4) Float64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VMINPD, CPU Feature: AVX512 +func (x Float64x8) Min(y Float64x8) Float64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX +func (x Int8x16) Min(y Int8x16) Int8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX2 +func (x Int8x32) Min(y Int8x32) Int8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSB, CPU Feature: AVX512 +func (x Int8x64) Min(y Int8x64) Int8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX +func (x Int16x8) Min(y Int16x8) Int16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX2 +func (x Int16x16) Min(y Int16x16) Int16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSW, CPU Feature: AVX512 +func (x Int16x32) Min(y Int16x32) Int16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX +func (x Int32x4) Min(y Int32x4) Int32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX2 +func (x Int32x8) Min(y Int32x8) Int32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSD, CPU Feature: AVX512 +func (x Int32x16) Min(y Int32x16) Int32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512 +func (x Int64x2) Min(y Int64x2) Int64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512 +func (x Int64x4) Min(y Int64x4) Int64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINSQ, CPU Feature: AVX512 +func (x Int64x8) Min(y Int64x8) Int64x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX +func (x Uint8x16) Min(y Uint8x16) Uint8x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX2 +func (x Uint8x32) Min(y Uint8x32) Uint8x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUB, CPU Feature: AVX512 +func (x Uint8x64) Min(y Uint8x64) Uint8x64 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX +func (x Uint16x8) Min(y Uint16x8) Uint16x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX2 +func (x Uint16x16) Min(y Uint16x16) Uint16x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUW, CPU Feature: AVX512 +func (x Uint16x32) Min(y Uint16x32) Uint16x32 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX +func (x Uint32x4) Min(y Uint32x4) Uint32x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX2 +func (x Uint32x8) Min(y Uint32x8) Uint32x8 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUD, CPU Feature: AVX512 +func (x Uint32x16) Min(y Uint32x16) Uint32x16 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512 +func (x Uint64x2) Min(y Uint64x2) Uint64x2 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512 +func (x Uint64x4) Min(y Uint64x4) Uint64x4 + +// Min computes the minimum of corresponding elements. +// +// Asm: VPMINUQ, CPU Feature: AVX512 +func (x Uint64x8) Min(y Uint64x8) Uint64x8 + +/* Mul */ + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPS, CPU Feature: AVX +func (x Float32x4) Mul(y Float32x4) Float32x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPS, CPU Feature: AVX +func (x Float32x8) Mul(y Float32x8) Float32x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPS, CPU Feature: AVX512 +func (x Float32x16) Mul(y Float32x16) Float32x16 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPD, CPU Feature: AVX +func (x Float64x2) Mul(y Float64x2) Float64x2 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPD, CPU Feature: AVX +func (x Float64x4) Mul(y Float64x4) Float64x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VMULPD, CPU Feature: AVX512 +func (x Float64x8) Mul(y Float64x8) Float64x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLW, CPU Feature: AVX +func (x Int16x8) Mul(y Int16x8) Int16x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLW, CPU Feature: AVX2 +func (x Int16x16) Mul(y Int16x16) Int16x16 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLW, CPU Feature: AVX512 +func (x Int16x32) Mul(y Int16x32) Int16x32 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLD, CPU Feature: AVX +func (x Int32x4) Mul(y Int32x4) Int32x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLD, CPU Feature: AVX2 +func (x Int32x8) Mul(y Int32x8) Int32x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLD, CPU Feature: AVX512 +func (x Int32x16) Mul(y Int32x16) Int32x16 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLQ, CPU Feature: AVX512 +func (x Int64x2) Mul(y Int64x2) Int64x2 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLQ, CPU Feature: AVX512 +func (x Int64x4) Mul(y Int64x4) Int64x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLQ, CPU Feature: AVX512 +func (x Int64x8) Mul(y Int64x8) Int64x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLW, CPU Feature: AVX +func (x Uint16x8) Mul(y Uint16x8) Uint16x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLW, CPU Feature: AVX2 +func (x Uint16x16) Mul(y Uint16x16) Uint16x16 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLW, CPU Feature: AVX512 +func (x Uint16x32) Mul(y Uint16x32) Uint16x32 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLD, CPU Feature: AVX +func (x Uint32x4) Mul(y Uint32x4) Uint32x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLD, CPU Feature: AVX2 +func (x Uint32x8) Mul(y Uint32x8) Uint32x8 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLD, CPU Feature: AVX512 +func (x Uint32x16) Mul(y Uint32x16) Uint32x16 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLQ, CPU Feature: AVX512 +func (x Uint64x2) Mul(y Uint64x2) Uint64x2 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLQ, CPU Feature: AVX512 +func (x Uint64x4) Mul(y Uint64x4) Uint64x4 + +// Mul multiplies corresponding elements of two vectors. +// +// Asm: VPMULLQ, CPU Feature: AVX512 +func (x Uint64x8) Mul(y Uint64x8) Uint64x8 + +/* MulAdd */ + +// MulAdd performs a fused (x * y) + z. +// +// Asm: VFMADD213PS, CPU Feature: AVX512 +func (x Float32x4) MulAdd(y Float32x4, z Float32x4) Float32x4 + +// MulAdd performs a fused (x * y) + z. +// +// Asm: VFMADD213PS, CPU Feature: AVX512 +func (x Float32x8) MulAdd(y Float32x8, z Float32x8) Float32x8 + +// MulAdd performs a fused (x * y) + z. +// +// Asm: VFMADD213PS, CPU Feature: AVX512 +func (x Float32x16) MulAdd(y Float32x16, z Float32x16) Float32x16 + +// MulAdd performs a fused (x * y) + z. +// +// Asm: VFMADD213PD, CPU Feature: AVX512 +func (x Float64x2) MulAdd(y Float64x2, z Float64x2) Float64x2 + +// MulAdd performs a fused (x * y) + z. +// +// Asm: VFMADD213PD, CPU Feature: AVX512 +func (x Float64x4) MulAdd(y Float64x4, z Float64x4) Float64x4 + +// MulAdd performs a fused (x * y) + z. +// +// Asm: VFMADD213PD, CPU Feature: AVX512 +func (x Float64x8) MulAdd(y Float64x8, z Float64x8) Float64x8 + +/* MulAddSub */ + +// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512 +func (x Float32x4) MulAddSub(y Float32x4, z Float32x4) Float32x4 + +// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512 +func (x Float32x8) MulAddSub(y Float32x8, z Float32x8) Float32x8 + +// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +// +// Asm: VFMADDSUB213PS, CPU Feature: AVX512 +func (x Float32x16) MulAddSub(y Float32x16, z Float32x16) Float32x16 + +// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512 +func (x Float64x2) MulAddSub(y Float64x2, z Float64x2) Float64x2 + +// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512 +func (x Float64x4) MulAddSub(y Float64x4, z Float64x4) Float64x4 + +// MulAddSub performs a fused (x * y) - z for odd-indexed elements, and (x * y) + z for even-indexed elements. +// +// Asm: VFMADDSUB213PD, CPU Feature: AVX512 +func (x Float64x8) MulAddSub(y Float64x8, z Float64x8) Float64x8 + +/* MulEvenWiden */ + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX +func (x Int32x4) MulEvenWiden(y Int32x4) Int64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULDQ, CPU Feature: AVX2 +func (x Int32x8) MulEvenWiden(y Int32x8) Int64x4 + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX +func (x Uint32x4) MulEvenWiden(y Uint32x4) Uint64x2 + +// MulEvenWiden multiplies even-indexed elements, widening the result. +// Result[i] = v1.Even[i] * v2.Even[i]. +// +// Asm: VPMULUDQ, CPU Feature: AVX2 +func (x Uint32x8) MulEvenWiden(y Uint32x8) Uint64x4 + +/* MulHigh */ + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHW, CPU Feature: AVX +func (x Int16x8) MulHigh(y Int16x8) Int16x8 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHW, CPU Feature: AVX2 +func (x Int16x16) MulHigh(y Int16x16) Int16x16 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHW, CPU Feature: AVX512 +func (x Int16x32) MulHigh(y Int16x32) Int16x32 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX +func (x Uint16x8) MulHigh(y Uint16x8) Uint16x8 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX2 +func (x Uint16x16) MulHigh(y Uint16x16) Uint16x16 + +// MulHigh multiplies elements and stores the high part of the result. +// +// Asm: VPMULHUW, CPU Feature: AVX512 +func (x Uint16x32) MulHigh(y Uint16x32) Uint16x32 + +/* MulSubAdd */ + +// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512 +func (x Float32x4) MulSubAdd(y Float32x4, z Float32x4) Float32x4 + +// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512 +func (x Float32x8) MulSubAdd(y Float32x8, z Float32x8) Float32x8 + +// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. +// +// Asm: VFMSUBADD213PS, CPU Feature: AVX512 +func (x Float32x16) MulSubAdd(y Float32x16, z Float32x16) Float32x16 + +// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512 +func (x Float64x2) MulSubAdd(y Float64x2, z Float64x2) Float64x2 + +// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512 +func (x Float64x4) MulSubAdd(y Float64x4, z Float64x4) Float64x4 + +// MulSubAdd performs a fused (x * y) + z for odd-indexed elements, and (x * y) - z for even-indexed elements. +// +// Asm: VFMSUBADD213PD, CPU Feature: AVX512 +func (x Float64x8) MulSubAdd(y Float64x8, z Float64x8) Float64x8 + +/* NotEqual */ + +// NotEqual compares for inequality. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x4) NotEqual(y Float32x4) Mask32x4 + +// NotEqual compares for inequality. +// +// Asm: VCMPPS, CPU Feature: AVX +func (x Float32x8) NotEqual(y Float32x8) Mask32x8 + +// NotEqual compares for inequality. +// +// Asm: VCMPPS, CPU Feature: AVX512 +func (x Float32x16) NotEqual(y Float32x16) Mask32x16 + +// NotEqual compares for inequality. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x2) NotEqual(y Float64x2) Mask64x2 + +// NotEqual compares for inequality. +// +// Asm: VCMPPD, CPU Feature: AVX +func (x Float64x4) NotEqual(y Float64x4) Mask64x4 + +// NotEqual compares for inequality. +// +// Asm: VCMPPD, CPU Feature: AVX512 +func (x Float64x8) NotEqual(y Float64x8) Mask64x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPB, CPU Feature: AVX512 +func (x Int8x64) NotEqual(y Int8x64) Mask8x64 + +// NotEqual compares for inequality. +// +// Asm: VPCMPW, CPU Feature: AVX512 +func (x Int16x32) NotEqual(y Int16x32) Mask16x32 + +// NotEqual compares for inequality. +// +// Asm: VPCMPD, CPU Feature: AVX512 +func (x Int32x16) NotEqual(y Int32x16) Mask32x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPQ, CPU Feature: AVX512 +func (x Int64x8) NotEqual(y Int64x8) Mask64x8 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUB, CPU Feature: AVX512 +func (x Uint8x64) NotEqual(y Uint8x64) Mask8x64 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUW, CPU Feature: AVX512 +func (x Uint16x32) NotEqual(y Uint16x32) Mask16x32 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUD, CPU Feature: AVX512 +func (x Uint32x16) NotEqual(y Uint32x16) Mask32x16 + +// NotEqual compares for inequality. +// +// Asm: VPCMPUQ, CPU Feature: AVX512 +func (x Uint64x8) NotEqual(y Uint64x8) Mask64x8 + +/* OnesCount */ + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512BITALG +func (x Int8x16) OnesCount() Int8x16 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512BITALG +func (x Int8x32) OnesCount() Int8x32 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512BITALG +func (x Int8x64) OnesCount() Int8x64 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512BITALG +func (x Int16x8) OnesCount() Int16x8 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512BITALG +func (x Int16x16) OnesCount() Int16x16 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512BITALG +func (x Int16x32) OnesCount() Int16x32 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ +func (x Int32x4) OnesCount() Int32x4 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ +func (x Int32x8) OnesCount() Int32x8 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ +func (x Int32x16) OnesCount() Int32x16 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ +func (x Int64x2) OnesCount() Int64x2 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ +func (x Int64x4) OnesCount() Int64x4 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ +func (x Int64x8) OnesCount() Int64x8 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512BITALG +func (x Uint8x16) OnesCount() Uint8x16 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512BITALG +func (x Uint8x32) OnesCount() Uint8x32 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTB, CPU Feature: AVX512BITALG +func (x Uint8x64) OnesCount() Uint8x64 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512BITALG +func (x Uint16x8) OnesCount() Uint16x8 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512BITALG +func (x Uint16x16) OnesCount() Uint16x16 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTW, CPU Feature: AVX512BITALG +func (x Uint16x32) OnesCount() Uint16x32 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ +func (x Uint32x4) OnesCount() Uint32x4 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ +func (x Uint32x8) OnesCount() Uint32x8 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTD, CPU Feature: AVX512VPOPCNTDQ +func (x Uint32x16) OnesCount() Uint32x16 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ +func (x Uint64x2) OnesCount() Uint64x2 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ +func (x Uint64x4) OnesCount() Uint64x4 + +// OnesCount counts the number of set bits in each element. +// +// Asm: VPOPCNTQ, CPU Feature: AVX512VPOPCNTDQ +func (x Uint64x8) OnesCount() Uint64x8 + +/* Or */ + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int8x16) Or(y Int8x16) Int8x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int8x32) Or(y Int8x32) Int8x32 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512 +func (x Int8x64) Or(y Int8x64) Int8x64 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int16x8) Or(y Int16x8) Int16x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int16x16) Or(y Int16x16) Int16x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512 +func (x Int16x32) Or(y Int16x32) Int16x32 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int32x4) Or(y Int32x4) Int32x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int32x8) Or(y Int32x8) Int32x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512 +func (x Int32x16) Or(y Int32x16) Int32x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Int64x2) Or(y Int64x2) Int64x2 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Int64x4) Or(y Int64x4) Int64x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512 +func (x Int64x8) Or(y Int64x8) Int64x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint8x16) Or(y Uint8x16) Uint8x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint8x32) Or(y Uint8x32) Uint8x32 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512 +func (x Uint8x64) Or(y Uint8x64) Uint8x64 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint16x8) Or(y Uint16x8) Uint16x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint16x16) Or(y Uint16x16) Uint16x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512 +func (x Uint16x32) Or(y Uint16x32) Uint16x32 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint32x4) Or(y Uint32x4) Uint32x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint32x8) Or(y Uint32x8) Uint32x8 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORD, CPU Feature: AVX512 +func (x Uint32x16) Or(y Uint32x16) Uint32x16 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX +func (x Uint64x2) Or(y Uint64x2) Uint64x2 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPOR, CPU Feature: AVX2 +func (x Uint64x4) Or(y Uint64x4) Uint64x4 + +// Or performs a bitwise OR operation between two vectors. +// +// Asm: VPORQ, CPU Feature: AVX512 +func (x Uint64x8) Or(y Uint64x8) Uint64x8 + +/* Permute */ + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMB, CPU Feature: AVX512VBMI +func (x Int8x16) Permute(indices Uint8x16) Int8x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMB, CPU Feature: AVX512VBMI +func (x Uint8x16) Permute(indices Uint8x16) Uint8x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 5 bits (values 0-31) of each element of indices is used +// +// Asm: VPERMB, CPU Feature: AVX512VBMI +func (x Int8x32) Permute(indices Uint8x32) Int8x32 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 5 bits (values 0-31) of each element of indices is used +// +// Asm: VPERMB, CPU Feature: AVX512VBMI +func (x Uint8x32) Permute(indices Uint8x32) Uint8x32 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 6 bits (values 0-63) of each element of indices is used +// +// Asm: VPERMB, CPU Feature: AVX512VBMI +func (x Int8x64) Permute(indices Uint8x64) Int8x64 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 6 bits (values 0-63) of each element of indices is used +// +// Asm: VPERMB, CPU Feature: AVX512VBMI +func (x Uint8x64) Permute(indices Uint8x64) Uint8x64 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMW, CPU Feature: AVX512 +func (x Int16x8) Permute(indices Uint16x8) Int16x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMW, CPU Feature: AVX512 +func (x Uint16x8) Permute(indices Uint16x8) Uint16x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMW, CPU Feature: AVX512 +func (x Int16x16) Permute(indices Uint16x16) Int16x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMW, CPU Feature: AVX512 +func (x Uint16x16) Permute(indices Uint16x16) Uint16x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 5 bits (values 0-31) of each element of indices is used +// +// Asm: VPERMW, CPU Feature: AVX512 +func (x Int16x32) Permute(indices Uint16x32) Int16x32 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 5 bits (values 0-31) of each element of indices is used +// +// Asm: VPERMW, CPU Feature: AVX512 +func (x Uint16x32) Permute(indices Uint16x32) Uint16x32 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMPS, CPU Feature: AVX2 +func (x Float32x8) Permute(indices Uint32x8) Float32x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMD, CPU Feature: AVX2 +func (x Int32x8) Permute(indices Uint32x8) Int32x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMD, CPU Feature: AVX2 +func (x Uint32x8) Permute(indices Uint32x8) Uint32x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMPS, CPU Feature: AVX512 +func (x Float32x16) Permute(indices Uint32x16) Float32x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMD, CPU Feature: AVX512 +func (x Int32x16) Permute(indices Uint32x16) Int32x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 4 bits (values 0-15) of each element of indices is used +// +// Asm: VPERMD, CPU Feature: AVX512 +func (x Uint32x16) Permute(indices Uint32x16) Uint32x16 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 2 bits (values 0-3) of each element of indices is used +// +// Asm: VPERMPD, CPU Feature: AVX512 +func (x Float64x4) Permute(indices Uint64x4) Float64x4 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 2 bits (values 0-3) of each element of indices is used +// +// Asm: VPERMQ, CPU Feature: AVX512 +func (x Int64x4) Permute(indices Uint64x4) Int64x4 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 2 bits (values 0-3) of each element of indices is used +// +// Asm: VPERMQ, CPU Feature: AVX512 +func (x Uint64x4) Permute(indices Uint64x4) Uint64x4 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMPD, CPU Feature: AVX512 +func (x Float64x8) Permute(indices Uint64x8) Float64x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMQ, CPU Feature: AVX512 +func (x Int64x8) Permute(indices Uint64x8) Int64x8 + +// Permute performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The low 3 bits (values 0-7) of each element of indices is used +// +// Asm: VPERMQ, CPU Feature: AVX512 +func (x Uint64x8) Permute(indices Uint64x8) Uint64x8 + +/* PermuteOrZero */ + +// PermuteOrZero performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The lower four bits of each byte-sized index in indices select an element from x, +// unless the index's sign bit is set in which case zero is used instead. +// +// Asm: VPSHUFB, CPU Feature: AVX +func (x Int8x16) PermuteOrZero(indices Int8x16) Int8x16 + +// PermuteOrZero performs a full permutation of vector x using indices: +// result := {x[indices[0]], x[indices[1]], ..., x[indices[n]]} +// The lower four bits of each byte-sized index in indices select an element from x, +// unless the index's sign bit is set in which case zero is used instead. +// +// Asm: VPSHUFB, CPU Feature: AVX +func (x Uint8x16) PermuteOrZero(indices Int8x16) Uint8x16 + +/* PermuteOrZeroGrouped */ + +// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, +// unless the index's sign bit is set in which case zero is used instead. +// Each group is of size 128-bit. +// +// Asm: VPSHUFB, CPU Feature: AVX2 +func (x Int8x32) PermuteOrZeroGrouped(indices Int8x32) Int8x32 + +// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, +// unless the index's sign bit is set in which case zero is used instead. +// Each group is of size 128-bit. +// +// Asm: VPSHUFB, CPU Feature: AVX512 +func (x Int8x64) PermuteOrZeroGrouped(indices Int8x64) Int8x64 + +// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, +// unless the index's sign bit is set in which case zero is used instead. +// Each group is of size 128-bit. +// +// Asm: VPSHUFB, CPU Feature: AVX2 +func (x Uint8x32) PermuteOrZeroGrouped(indices Int8x32) Uint8x32 + +// PermuteOrZeroGrouped performs a grouped permutation of vector x using indices: +// result = {x_group0[indices[0]], x_group0[indices[1]], ..., x_group1[indices[16]], x_group1[indices[17]], ...} +// The lower four bits of each byte-sized index in indices select an element from its corresponding group in x, +// unless the index's sign bit is set in which case zero is used instead. +// Each group is of size 128-bit. +// +// Asm: VPSHUFB, CPU Feature: AVX512 +func (x Uint8x64) PermuteOrZeroGrouped(indices Int8x64) Uint8x64 + +/* Reciprocal */ + +// Reciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCPPS, CPU Feature: AVX +func (x Float32x4) Reciprocal() Float32x4 + +// Reciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCPPS, CPU Feature: AVX +func (x Float32x8) Reciprocal() Float32x8 + +// Reciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PS, CPU Feature: AVX512 +func (x Float32x16) Reciprocal() Float32x16 + +// Reciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512 +func (x Float64x2) Reciprocal() Float64x2 + +// Reciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512 +func (x Float64x4) Reciprocal() Float64x4 + +// Reciprocal computes an approximate reciprocal of each element. +// +// Asm: VRCP14PD, CPU Feature: AVX512 +func (x Float64x8) Reciprocal() Float64x8 + +/* ReciprocalSqrt */ + +// ReciprocalSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRTPS, CPU Feature: AVX +func (x Float32x4) ReciprocalSqrt() Float32x4 + +// ReciprocalSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRTPS, CPU Feature: AVX +func (x Float32x8) ReciprocalSqrt() Float32x8 + +// ReciprocalSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PS, CPU Feature: AVX512 +func (x Float32x16) ReciprocalSqrt() Float32x16 + +// ReciprocalSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512 +func (x Float64x2) ReciprocalSqrt() Float64x2 + +// ReciprocalSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512 +func (x Float64x4) ReciprocalSqrt() Float64x4 + +// ReciprocalSqrt computes an approximate reciprocal of the square root of each element. +// +// Asm: VRSQRT14PD, CPU Feature: AVX512 +func (x Float64x8) ReciprocalSqrt() Float64x8 + +/* RotateAllLeft */ + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLD, CPU Feature: AVX512 +func (x Int32x4) RotateAllLeft(shift uint8) Int32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLD, CPU Feature: AVX512 +func (x Int32x8) RotateAllLeft(shift uint8) Int32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLD, CPU Feature: AVX512 +func (x Int32x16) RotateAllLeft(shift uint8) Int32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLQ, CPU Feature: AVX512 +func (x Int64x2) RotateAllLeft(shift uint8) Int64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLQ, CPU Feature: AVX512 +func (x Int64x4) RotateAllLeft(shift uint8) Int64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLQ, CPU Feature: AVX512 +func (x Int64x8) RotateAllLeft(shift uint8) Int64x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLD, CPU Feature: AVX512 +func (x Uint32x4) RotateAllLeft(shift uint8) Uint32x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLD, CPU Feature: AVX512 +func (x Uint32x8) RotateAllLeft(shift uint8) Uint32x8 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLD, CPU Feature: AVX512 +func (x Uint32x16) RotateAllLeft(shift uint8) Uint32x16 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLQ, CPU Feature: AVX512 +func (x Uint64x2) RotateAllLeft(shift uint8) Uint64x2 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLQ, CPU Feature: AVX512 +func (x Uint64x4) RotateAllLeft(shift uint8) Uint64x4 + +// RotateAllLeft rotates each element to the left by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPROLQ, CPU Feature: AVX512 +func (x Uint64x8) RotateAllLeft(shift uint8) Uint64x8 + +/* RotateAllRight */ + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORD, CPU Feature: AVX512 +func (x Int32x4) RotateAllRight(shift uint8) Int32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORD, CPU Feature: AVX512 +func (x Int32x8) RotateAllRight(shift uint8) Int32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORD, CPU Feature: AVX512 +func (x Int32x16) RotateAllRight(shift uint8) Int32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORQ, CPU Feature: AVX512 +func (x Int64x2) RotateAllRight(shift uint8) Int64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORQ, CPU Feature: AVX512 +func (x Int64x4) RotateAllRight(shift uint8) Int64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORQ, CPU Feature: AVX512 +func (x Int64x8) RotateAllRight(shift uint8) Int64x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORD, CPU Feature: AVX512 +func (x Uint32x4) RotateAllRight(shift uint8) Uint32x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORD, CPU Feature: AVX512 +func (x Uint32x8) RotateAllRight(shift uint8) Uint32x8 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORD, CPU Feature: AVX512 +func (x Uint32x16) RotateAllRight(shift uint8) Uint32x16 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORQ, CPU Feature: AVX512 +func (x Uint64x2) RotateAllRight(shift uint8) Uint64x2 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORQ, CPU Feature: AVX512 +func (x Uint64x4) RotateAllRight(shift uint8) Uint64x4 + +// RotateAllRight rotates each element to the right by the number of bits specified by the immediate. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPRORQ, CPU Feature: AVX512 +func (x Uint64x8) RotateAllRight(shift uint8) Uint64x8 + +/* RotateLeft */ + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512 +func (x Int32x4) RotateLeft(y Int32x4) Int32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512 +func (x Int32x8) RotateLeft(y Int32x8) Int32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512 +func (x Int32x16) RotateLeft(y Int32x16) Int32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512 +func (x Int64x2) RotateLeft(y Int64x2) Int64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512 +func (x Int64x4) RotateLeft(y Int64x4) Int64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512 +func (x Int64x8) RotateLeft(y Int64x8) Int64x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512 +func (x Uint32x4) RotateLeft(y Uint32x4) Uint32x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512 +func (x Uint32x8) RotateLeft(y Uint32x8) Uint32x8 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVD, CPU Feature: AVX512 +func (x Uint32x16) RotateLeft(y Uint32x16) Uint32x16 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512 +func (x Uint64x2) RotateLeft(y Uint64x2) Uint64x2 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512 +func (x Uint64x4) RotateLeft(y Uint64x4) Uint64x4 + +// RotateLeft rotates each element in x to the left by the number of bits specified by y's corresponding elements. +// +// Asm: VPROLVQ, CPU Feature: AVX512 +func (x Uint64x8) RotateLeft(y Uint64x8) Uint64x8 + +/* RotateRight */ + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512 +func (x Int32x4) RotateRight(y Int32x4) Int32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512 +func (x Int32x8) RotateRight(y Int32x8) Int32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512 +func (x Int32x16) RotateRight(y Int32x16) Int32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512 +func (x Int64x2) RotateRight(y Int64x2) Int64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512 +func (x Int64x4) RotateRight(y Int64x4) Int64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512 +func (x Int64x8) RotateRight(y Int64x8) Int64x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512 +func (x Uint32x4) RotateRight(y Uint32x4) Uint32x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512 +func (x Uint32x8) RotateRight(y Uint32x8) Uint32x8 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVD, CPU Feature: AVX512 +func (x Uint32x16) RotateRight(y Uint32x16) Uint32x16 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512 +func (x Uint64x2) RotateRight(y Uint64x2) Uint64x2 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512 +func (x Uint64x4) RotateRight(y Uint64x4) Uint64x4 + +// RotateRight rotates each element in x to the right by the number of bits specified by y's corresponding elements. +// +// Asm: VPRORVQ, CPU Feature: AVX512 +func (x Uint64x8) RotateRight(y Uint64x8) Uint64x8 + +/* RoundToEven */ + +// RoundToEven rounds elements to the nearest integer. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) RoundToEven() Float32x4 + +// RoundToEven rounds elements to the nearest integer. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) RoundToEven() Float32x8 + +// RoundToEven rounds elements to the nearest integer. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) RoundToEven() Float64x2 + +// RoundToEven rounds elements to the nearest integer. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) RoundToEven() Float64x4 + +/* RoundToEvenScaled */ + +// RoundToEvenScaled rounds elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x4) RoundToEvenScaled(prec uint8) Float32x4 + +// RoundToEvenScaled rounds elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x8) RoundToEvenScaled(prec uint8) Float32x8 + +// RoundToEvenScaled rounds elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x16) RoundToEvenScaled(prec uint8) Float32x16 + +// RoundToEvenScaled rounds elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x2) RoundToEvenScaled(prec uint8) Float64x2 + +// RoundToEvenScaled rounds elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x4) RoundToEvenScaled(prec uint8) Float64x4 + +// RoundToEvenScaled rounds elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x8) RoundToEvenScaled(prec uint8) Float64x8 + +/* RoundToEvenScaledResidue */ + +// RoundToEvenScaledResidue computes the difference after rounding with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x4) RoundToEvenScaledResidue(prec uint8) Float32x4 + +// RoundToEvenScaledResidue computes the difference after rounding with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x8) RoundToEvenScaledResidue(prec uint8) Float32x8 + +// RoundToEvenScaledResidue computes the difference after rounding with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x16) RoundToEvenScaledResidue(prec uint8) Float32x16 + +// RoundToEvenScaledResidue computes the difference after rounding with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x2) RoundToEvenScaledResidue(prec uint8) Float64x2 + +// RoundToEvenScaledResidue computes the difference after rounding with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x4) RoundToEvenScaledResidue(prec uint8) Float64x4 + +// RoundToEvenScaledResidue computes the difference after rounding with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x8) RoundToEvenScaledResidue(prec uint8) Float64x8 + +/* SHA1FourRounds */ + +// SHA1FourRounds performs 4 rounds of B loop in SHA1 algorithm defined in FIPS 180-4. +// x contains the state variables a, b, c and d from upper to lower order. +// y contains the W array elements (with the state variable e added to the upper element) from upper to lower order. +// result = the state variables a', b', c', d' updated after 4 rounds. +// constant = 0 for the first 20 rounds of the loop, 1 for the next 20 rounds of the loop..., 3 for the last 20 rounds of the loop. +// +// constant results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: SHA1RNDS4, CPU Feature: SHA +func (x Uint32x4) SHA1FourRounds(constant uint8, y Uint32x4) Uint32x4 + +/* SHA1Message1 */ + +// SHA1Message1 does the XORing of 1 in SHA1 algorithm defined in FIPS 180-4. +// x = {W3, W2, W1, W0} +// y = {0, 0, W5, W4} +// result = {W3^W5, W2^W4, W1^W3, W0^W2}. +// +// Asm: SHA1MSG1, CPU Feature: SHA +func (x Uint32x4) SHA1Message1(y Uint32x4) Uint32x4 + +/* SHA1Message2 */ + +// SHA1Message2 does the calculation of 3 and 4 in SHA1 algorithm defined in FIPS 180-4. +// x = result of 2. +// y = {W15, W14, W13} +// result = {W19, W18, W17, W16} +// +// Asm: SHA1MSG2, CPU Feature: SHA +func (x Uint32x4) SHA1Message2(y Uint32x4) Uint32x4 + +/* SHA1NextE */ + +// SHA1NextE calculates the state variable e' updated after 4 rounds in SHA1 algorithm defined in FIPS 180-4. +// x contains the state variable a (before the 4 rounds), placed in the upper element. +// y is the elements of W array for next 4 rounds from upper to lower order. +// result = the elements of the W array for the next 4 rounds, with the updated state variable e' added to the upper element, +// from upper to lower order. +// For the last round of the loop, you can specify zero for y to obtain the e' value itself, or better off specifying H4:0:0:0 +// for y to get e' added to H4. (Note that the value of e' is computed only from x, and values of y don't affect the +// computation of the value of e'.) +// +// Asm: SHA1NEXTE, CPU Feature: SHA +func (x Uint32x4) SHA1NextE(y Uint32x4) Uint32x4 + +/* SHA256Message1 */ + +// SHA256Message1 does the sigma and addtion of 1 in SHA1 algorithm defined in FIPS 180-4. +// x = {W0, W1, W2, W3} +// y = {W4, 0, 0, 0} +// result = {W0+σ(W1), W1+σ(W2), W2+σ(W3), W3+σ(W4)} +// +// Asm: SHA256MSG1, CPU Feature: SHA +func (x Uint32x4) SHA256Message1(y Uint32x4) Uint32x4 + +/* SHA256Message2 */ + +// SHA256Message2 does the sigma and addition of 3 in SHA1 algorithm defined in FIPS 180-4. +// x = result of 2 +// y = {0, 0, W14, W15} +// result = {W16, W17, W18, W19} +// +// Asm: SHA256MSG2, CPU Feature: SHA +func (x Uint32x4) SHA256Message2(y Uint32x4) Uint32x4 + +/* SHA256TwoRounds */ + +// SHA256TwoRounds does 2 rounds of B loop to calculate updated state variables in SHA1 algorithm defined in FIPS 180-4. +// x = {h, g, d, c} +// y = {f, e, b, a} +// z = {W0+K0, W1+K1} +// result = {f', e', b', a'} +// The K array is a 64-DWORD constant array defined in page 11 of FIPS 180-4. Each element of the K array is to be added to +// the corresponding element of the W array to make the input data z. +// The updated state variables c', d', g', h' are not returned by this instruction, because they are equal to the input data +// y (the state variables a, b, e, f before the 2 rounds). +// +// Asm: SHA256RNDS2, CPU Feature: SHA +func (x Uint32x4) SHA256TwoRounds(y Uint32x4, z Uint32x4) Uint32x4 + +/* SaturateToInt8 */ + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x8) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x16) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x32) SaturateToInt8() Int8x32 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x4) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x8) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x16) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x2) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x4) SaturateToInt8() Int8x16 + +// SaturateToInt8 converts element values to int8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x8) SaturateToInt8() Int8x16 + +/* SaturateToInt16 */ + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSDW, CPU Feature: AVX512 +func (x Int32x4) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSDW, CPU Feature: AVX512 +func (x Int32x8) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSDW, CPU Feature: AVX512 +func (x Int32x16) SaturateToInt16() Int16x16 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQW, CPU Feature: AVX512 +func (x Int64x2) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQW, CPU Feature: AVX512 +func (x Int64x4) SaturateToInt16() Int16x8 + +// SaturateToInt16 converts element values to int16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQW, CPU Feature: AVX512 +func (x Int64x8) SaturateToInt16() Int16x8 + +/* SaturateToInt16Concat */ + +// SaturateToInt16Concat converts element values to int16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKSSDW, CPU Feature: AVX +func (x Int32x4) SaturateToInt16Concat(y Int32x4) Int16x8 + +// SaturateToInt16Concat converts element values to int16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKSSDW, CPU Feature: AVX2 +func (x Int32x8) SaturateToInt16Concat(y Int32x8) Int16x16 + +// SaturateToInt16Concat converts element values to int16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKSSDW, CPU Feature: AVX512 +func (x Int32x16) SaturateToInt16Concat(y Int32x16) Int16x32 + +/* SaturateToInt32 */ + +// SaturateToInt32 converts element values to int32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQD, CPU Feature: AVX512 +func (x Int64x2) SaturateToInt32() Int32x4 + +// SaturateToInt32 converts element values to int32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQD, CPU Feature: AVX512 +func (x Int64x4) SaturateToInt32() Int32x4 + +// SaturateToInt32 converts element values to int32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVSQD, CPU Feature: AVX512 +func (x Int64x8) SaturateToInt32() Int32x8 + +/* SaturateToUint8 */ + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x8) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSWB, CPU Feature: AVX512 +func (x Int16x16) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x4) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x8) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSDB, CPU Feature: AVX512 +func (x Int32x16) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x2) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x4) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVSQB, CPU Feature: AVX512 +func (x Int64x8) SaturateToUint8() Int8x16 + +// SaturateToUint8 converts element values to uint8. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSWB, CPU Feature: AVX512 +func (x Uint16x32) SaturateToUint8() Uint8x32 + +/* SaturateToUint16 */ + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSDW, CPU Feature: AVX512 +func (x Uint32x4) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSDW, CPU Feature: AVX512 +func (x Uint32x8) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSDW, CPU Feature: AVX512 +func (x Uint32x16) SaturateToUint16() Uint16x16 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQW, CPU Feature: AVX512 +func (x Uint64x2) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQW, CPU Feature: AVX512 +func (x Uint64x4) SaturateToUint16() Uint16x8 + +// SaturateToUint16 converts element values to uint16. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQW, CPU Feature: AVX512 +func (x Uint64x8) SaturateToUint16() Uint16x8 + +/* SaturateToUint16Concat */ + +// SaturateToUint16Concat converts element values to uint16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKUSDW, CPU Feature: AVX +func (x Uint32x4) SaturateToUint16Concat(y Uint32x4) Uint16x8 + +// SaturateToUint16Concat converts element values to uint16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKUSDW, CPU Feature: AVX2 +func (x Uint32x8) SaturateToUint16Concat(y Uint32x8) Uint16x16 + +// SaturateToUint16Concat converts element values to uint16. +// With each 128-bit as a group: +// The converted group from the first input vector will be packed to the lower part of the result vector, +// the converted group from the second input vector will be packed to the upper part of the result vector. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPACKUSDW, CPU Feature: AVX512 +func (x Uint32x16) SaturateToUint16Concat(y Uint32x16) Uint16x32 + +/* SaturateToUint32 */ + +// SaturateToUint32 converts element values to uint32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQD, CPU Feature: AVX512 +func (x Uint64x2) SaturateToUint32() Uint32x4 + +// SaturateToUint32 converts element values to uint32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQD, CPU Feature: AVX512 +func (x Uint64x4) SaturateToUint32() Uint32x4 + +// SaturateToUint32 converts element values to uint32. +// Conversion is done with saturation on the vector elements. +// +// Asm: VPMOVUSQD, CPU Feature: AVX512 +func (x Uint64x8) SaturateToUint32() Uint32x8 + +/* Scale */ + +// Scale multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512 +func (x Float32x4) Scale(y Float32x4) Float32x4 + +// Scale multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512 +func (x Float32x8) Scale(y Float32x8) Float32x8 + +// Scale multiplies elements by a power of 2. +// +// Asm: VSCALEFPS, CPU Feature: AVX512 +func (x Float32x16) Scale(y Float32x16) Float32x16 + +// Scale multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512 +func (x Float64x2) Scale(y Float64x2) Float64x2 + +// Scale multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512 +func (x Float64x4) Scale(y Float64x4) Float64x4 + +// Scale multiplies elements by a power of 2. +// +// Asm: VSCALEFPD, CPU Feature: AVX512 +func (x Float64x8) Scale(y Float64x8) Float64x8 + +/* Select128FromPair */ + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73}) +// +// returns {70, 71, 72, 73, 40, 41, 42, 43}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2F128, CPU Feature: AVX +func (x Float32x8) Select128FromPair(lo, hi uint8, y Float32x8) Float32x8 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71}) +// +// returns {70, 71, 40, 41}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2F128, CPU Feature: AVX +func (x Float64x4) Select128FromPair(lo, hi uint8, y Float64x4) Float64x4 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0, +// {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f}) +// +// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Int8x32) Select128FromPair(lo, hi uint8, y Int8x32) Int8x32 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0, +// {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77}) +// +// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Int16x16) Select128FromPair(lo, hi uint8, y Int16x16) Int16x16 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73}) +// +// returns {70, 71, 72, 73, 40, 41, 42, 43}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Int32x8) Select128FromPair(lo, hi uint8, y Int32x8) Int32x8 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71}) +// +// returns {70, 71, 40, 41}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Int64x4) Select128FromPair(lo, hi uint8, y Int64x4) Int64x4 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {0x40, 0x41, ..., 0x4f, 0x50, 0x51, ..., 0x5f}.Select128FromPair(3, 0, +// {0x60, 0x61, ..., 0x6f, 0x70, 0x71, ..., 0x7f}) +// +// returns {0x70, 0x71, ..., 0x7f, 0x40, 0x41, ..., 0x4f}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Uint8x32) Select128FromPair(lo, hi uint8, y Uint8x32) Uint8x32 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 42, 43, 44, 45, 46, 47, 50, 51, 52, 53, 54, 55, 56, 57}.Select128FromPair(3, 0, +// {60, 61, 62, 63, 64, 65, 66, 67, 70, 71, 72, 73, 74, 75, 76, 77}) +// +// returns {70, 71, 72, 73, 74, 75, 76, 77, 40, 41, 42, 43, 44, 45, 46, 47}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Uint16x16) Select128FromPair(lo, hi uint8, y Uint16x16) Uint16x16 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 42, 43, 50, 51, 52, 53}.Select128FromPair(3, 0, {60, 61, 62, 63, 70, 71, 72, 73}) +// +// returns {70, 71, 72, 73, 40, 41, 42, 43}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Uint32x8) Select128FromPair(lo, hi uint8, y Uint32x8) Uint32x8 + +// Select128FromPair treats the 256-bit vectors x and y as a single vector of four +// 128-bit elements, and returns a 256-bit result formed by +// concatenating the two elements specified by lo and hi. +// For example, +// +// {40, 41, 50, 51}.Select128FromPair(3, 0, {60, 61, 70, 71}) +// +// returns {70, 71, 40, 41}. +// +// lo, hi result in better performance when they are constants, non-constant values will be translated into a jump table. +// lo, hi should be between 0 and 3, inclusive; other values may result in a runtime panic. +// +// Asm: VPERM2I128, CPU Feature: AVX2 +func (x Uint64x4) Select128FromPair(lo, hi uint8, y Uint64x4) Uint64x4 + +/* SetElem */ + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRD, CPU Feature: AVX +func (x Float32x4) SetElem(index uint8, y float32) Float32x4 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRQ, CPU Feature: AVX +func (x Float64x2) SetElem(index uint8, y float64) Float64x2 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRB, CPU Feature: AVX +func (x Int8x16) SetElem(index uint8, y int8) Int8x16 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRW, CPU Feature: AVX +func (x Int16x8) SetElem(index uint8, y int16) Int16x8 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRD, CPU Feature: AVX +func (x Int32x4) SetElem(index uint8, y int32) Int32x4 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRQ, CPU Feature: AVX +func (x Int64x2) SetElem(index uint8, y int64) Int64x2 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRB, CPU Feature: AVX +func (x Uint8x16) SetElem(index uint8, y uint8) Uint8x16 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRW, CPU Feature: AVX +func (x Uint16x8) SetElem(index uint8, y uint16) Uint16x8 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRD, CPU Feature: AVX +func (x Uint32x4) SetElem(index uint8, y uint32) Uint32x4 + +// SetElem sets a single constant-indexed element's value. +// +// index results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPINSRQ, CPU Feature: AVX +func (x Uint64x2) SetElem(index uint8, y uint64) Uint64x2 + +/* SetHi */ + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float32x8) SetHi(y Float32x4) Float32x8 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF64X4, CPU Feature: AVX512 +func (x Float32x16) SetHi(y Float32x8) Float32x16 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float64x4) SetHi(y Float64x2) Float64x4 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTF64X4, CPU Feature: AVX512 +func (x Float64x8) SetHi(y Float64x4) Float64x8 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int8x32) SetHi(y Int8x16) Int8x32 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int8x64) SetHi(y Int8x32) Int8x64 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int16x16) SetHi(y Int16x8) Int16x16 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int16x32) SetHi(y Int16x16) Int16x32 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int32x8) SetHi(y Int32x4) Int32x8 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int32x16) SetHi(y Int32x8) Int32x16 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int64x4) SetHi(y Int64x2) Int64x4 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int64x8) SetHi(y Int64x4) Int64x8 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint8x32) SetHi(y Uint8x16) Uint8x32 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint8x64) SetHi(y Uint8x32) Uint8x64 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint16x16) SetHi(y Uint16x8) Uint16x16 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint16x32) SetHi(y Uint16x16) Uint16x32 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint32x8) SetHi(y Uint32x4) Uint32x8 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint32x16) SetHi(y Uint32x8) Uint32x16 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint64x4) SetHi(y Uint64x2) Uint64x4 + +// SetHi returns x with its upper half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint64x8) SetHi(y Uint64x4) Uint64x8 + +/* SetLo */ + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float32x8) SetLo(y Float32x4) Float32x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTF64X4, CPU Feature: AVX512 +func (x Float32x16) SetLo(y Float32x8) Float32x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTF128, CPU Feature: AVX +func (x Float64x4) SetLo(y Float64x2) Float64x4 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTF64X4, CPU Feature: AVX512 +func (x Float64x8) SetLo(y Float64x4) Float64x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int8x32) SetLo(y Int8x16) Int8x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int8x64) SetLo(y Int8x32) Int8x64 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int16x16) SetLo(y Int16x8) Int16x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int16x32) SetLo(y Int16x16) Int16x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int32x8) SetLo(y Int32x4) Int32x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int32x16) SetLo(y Int32x8) Int32x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Int64x4) SetLo(y Int64x2) Int64x4 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Int64x8) SetLo(y Int64x4) Int64x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint8x32) SetLo(y Uint8x16) Uint8x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint8x64) SetLo(y Uint8x32) Uint8x64 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint16x16) SetLo(y Uint16x8) Uint16x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint16x32) SetLo(y Uint16x16) Uint16x32 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint32x8) SetLo(y Uint32x4) Uint32x8 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint32x16) SetLo(y Uint32x8) Uint32x16 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI128, CPU Feature: AVX2 +func (x Uint64x4) SetLo(y Uint64x2) Uint64x4 + +// SetLo returns x with its lower half set to y. +// +// Asm: VINSERTI64X4, CPU Feature: AVX512 +func (x Uint64x8) SetLo(y Uint64x4) Uint64x8 + +/* ShiftAllLeft */ + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX +func (x Int16x8) ShiftAllLeft(y uint64) Int16x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX2 +func (x Int16x16) ShiftAllLeft(y uint64) Int16x16 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512 +func (x Int16x32) ShiftAllLeft(y uint64) Int16x32 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX +func (x Int32x4) ShiftAllLeft(y uint64) Int32x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX2 +func (x Int32x8) ShiftAllLeft(y uint64) Int32x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512 +func (x Int32x16) ShiftAllLeft(y uint64) Int32x16 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX +func (x Int64x2) ShiftAllLeft(y uint64) Int64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX2 +func (x Int64x4) ShiftAllLeft(y uint64) Int64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512 +func (x Int64x8) ShiftAllLeft(y uint64) Int64x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX +func (x Uint16x8) ShiftAllLeft(y uint64) Uint16x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX2 +func (x Uint16x16) ShiftAllLeft(y uint64) Uint16x16 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLW, CPU Feature: AVX512 +func (x Uint16x32) ShiftAllLeft(y uint64) Uint16x32 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX +func (x Uint32x4) ShiftAllLeft(y uint64) Uint32x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX2 +func (x Uint32x8) ShiftAllLeft(y uint64) Uint32x8 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLD, CPU Feature: AVX512 +func (x Uint32x16) ShiftAllLeft(y uint64) Uint32x16 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX +func (x Uint64x2) ShiftAllLeft(y uint64) Uint64x2 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftAllLeft(y uint64) Uint64x4 + +// ShiftAllLeft shifts each element to the left by the specified number of bits. Emptied lower bits are zeroed. +// +// Asm: VPSLLQ, CPU Feature: AVX512 +func (x Uint64x8) ShiftAllLeft(y uint64) Uint64x8 + +/* ShiftAllLeftConcat */ + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDW, CPU Feature: AVX512VBMI2 +func (x Int16x8) ShiftAllLeftConcat(shift uint8, y Int16x8) Int16x8 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDW, CPU Feature: AVX512VBMI2 +func (x Int16x16) ShiftAllLeftConcat(shift uint8, y Int16x16) Int16x16 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDW, CPU Feature: AVX512VBMI2 +func (x Int16x32) ShiftAllLeftConcat(shift uint8, y Int16x32) Int16x32 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDD, CPU Feature: AVX512VBMI2 +func (x Int32x4) ShiftAllLeftConcat(shift uint8, y Int32x4) Int32x4 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDD, CPU Feature: AVX512VBMI2 +func (x Int32x8) ShiftAllLeftConcat(shift uint8, y Int32x8) Int32x8 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDD, CPU Feature: AVX512VBMI2 +func (x Int32x16) ShiftAllLeftConcat(shift uint8, y Int32x16) Int32x16 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 +func (x Int64x2) ShiftAllLeftConcat(shift uint8, y Int64x2) Int64x2 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 +func (x Int64x4) ShiftAllLeftConcat(shift uint8, y Int64x4) Int64x4 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 +func (x Int64x8) ShiftAllLeftConcat(shift uint8, y Int64x8) Int64x8 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) ShiftAllLeftConcat(shift uint8, y Uint16x8) Uint16x8 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) ShiftAllLeftConcat(shift uint8, y Uint16x16) Uint16x16 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) ShiftAllLeftConcat(shift uint8, y Uint16x32) Uint16x32 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDD, CPU Feature: AVX512VBMI2 +func (x Uint32x4) ShiftAllLeftConcat(shift uint8, y Uint32x4) Uint32x4 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDD, CPU Feature: AVX512VBMI2 +func (x Uint32x8) ShiftAllLeftConcat(shift uint8, y Uint32x8) Uint32x8 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDD, CPU Feature: AVX512VBMI2 +func (x Uint32x16) ShiftAllLeftConcat(shift uint8, y Uint32x16) Uint32x16 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 +func (x Uint64x2) ShiftAllLeftConcat(shift uint8, y Uint64x2) Uint64x2 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 +func (x Uint64x4) ShiftAllLeftConcat(shift uint8, y Uint64x4) Uint64x4 + +// ShiftAllLeftConcat shifts each element of x to the left by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the upper bits of y to the emptied lower bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHLDQ, CPU Feature: AVX512VBMI2 +func (x Uint64x8) ShiftAllLeftConcat(shift uint8, y Uint64x8) Uint64x8 + +/* ShiftAllRight */ + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAW, CPU Feature: AVX +func (x Int16x8) ShiftAllRight(y uint64) Int16x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAW, CPU Feature: AVX2 +func (x Int16x16) ShiftAllRight(y uint64) Int16x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAW, CPU Feature: AVX512 +func (x Int16x32) ShiftAllRight(y uint64) Int16x32 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAD, CPU Feature: AVX +func (x Int32x4) ShiftAllRight(y uint64) Int32x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAD, CPU Feature: AVX2 +func (x Int32x8) ShiftAllRight(y uint64) Int32x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAD, CPU Feature: AVX512 +func (x Int32x16) ShiftAllRight(y uint64) Int32x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512 +func (x Int64x2) ShiftAllRight(y uint64) Int64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512 +func (x Int64x4) ShiftAllRight(y uint64) Int64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAQ, CPU Feature: AVX512 +func (x Int64x8) ShiftAllRight(y uint64) Int64x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX +func (x Uint16x8) ShiftAllRight(y uint64) Uint16x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX2 +func (x Uint16x16) ShiftAllRight(y uint64) Uint16x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLW, CPU Feature: AVX512 +func (x Uint16x32) ShiftAllRight(y uint64) Uint16x32 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX +func (x Uint32x4) ShiftAllRight(y uint64) Uint32x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX2 +func (x Uint32x8) ShiftAllRight(y uint64) Uint32x8 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLD, CPU Feature: AVX512 +func (x Uint32x16) ShiftAllRight(y uint64) Uint32x16 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX +func (x Uint64x2) ShiftAllRight(y uint64) Uint64x2 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftAllRight(y uint64) Uint64x4 + +// ShiftAllRight shifts each element to the right by the specified number of bits. Emptied upper bits are zeroed. +// +// Asm: VPSRLQ, CPU Feature: AVX512 +func (x Uint64x8) ShiftAllRight(y uint64) Uint64x8 + +/* ShiftAllRightConcat */ + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDW, CPU Feature: AVX512VBMI2 +func (x Int16x8) ShiftAllRightConcat(shift uint8, y Int16x8) Int16x8 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDW, CPU Feature: AVX512VBMI2 +func (x Int16x16) ShiftAllRightConcat(shift uint8, y Int16x16) Int16x16 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDW, CPU Feature: AVX512VBMI2 +func (x Int16x32) ShiftAllRightConcat(shift uint8, y Int16x32) Int16x32 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDD, CPU Feature: AVX512VBMI2 +func (x Int32x4) ShiftAllRightConcat(shift uint8, y Int32x4) Int32x4 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDD, CPU Feature: AVX512VBMI2 +func (x Int32x8) ShiftAllRightConcat(shift uint8, y Int32x8) Int32x8 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDD, CPU Feature: AVX512VBMI2 +func (x Int32x16) ShiftAllRightConcat(shift uint8, y Int32x16) Int32x16 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 +func (x Int64x2) ShiftAllRightConcat(shift uint8, y Int64x2) Int64x2 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 +func (x Int64x4) ShiftAllRightConcat(shift uint8, y Int64x4) Int64x4 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 +func (x Int64x8) ShiftAllRightConcat(shift uint8, y Int64x8) Int64x8 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) ShiftAllRightConcat(shift uint8, y Uint16x8) Uint16x8 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) ShiftAllRightConcat(shift uint8, y Uint16x16) Uint16x16 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) ShiftAllRightConcat(shift uint8, y Uint16x32) Uint16x32 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDD, CPU Feature: AVX512VBMI2 +func (x Uint32x4) ShiftAllRightConcat(shift uint8, y Uint32x4) Uint32x4 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDD, CPU Feature: AVX512VBMI2 +func (x Uint32x8) ShiftAllRightConcat(shift uint8, y Uint32x8) Uint32x8 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDD, CPU Feature: AVX512VBMI2 +func (x Uint32x16) ShiftAllRightConcat(shift uint8, y Uint32x16) Uint32x16 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 +func (x Uint64x2) ShiftAllRightConcat(shift uint8, y Uint64x2) Uint64x2 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 +func (x Uint64x4) ShiftAllRightConcat(shift uint8, y Uint64x4) Uint64x4 + +// ShiftAllRightConcat shifts each element of x to the right by the number of bits specified by the +// immediate(only the lower 5 bits are used), and then copies the lower bits of y to the emptied upper bits of the shifted x. +// +// shift results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VPSHRDQ, CPU Feature: AVX512VBMI2 +func (x Uint64x8) ShiftAllRightConcat(shift uint8, y Uint64x8) Uint64x8 + +/* ShiftLeft */ + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512 +func (x Int16x8) ShiftLeft(y Int16x8) Int16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512 +func (x Int16x16) ShiftLeft(y Int16x16) Int16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512 +func (x Int16x32) ShiftLeft(y Int16x32) Int16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Int32x4) ShiftLeft(y Int32x4) Int32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Int32x8) ShiftLeft(y Int32x8) Int32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512 +func (x Int32x16) ShiftLeft(y Int32x16) Int32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Int64x2) ShiftLeft(y Int64x2) Int64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Int64x4) ShiftLeft(y Int64x4) Int64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512 +func (x Int64x8) ShiftLeft(y Int64x8) Int64x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512 +func (x Uint16x8) ShiftLeft(y Uint16x8) Uint16x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512 +func (x Uint16x16) ShiftLeft(y Uint16x16) Uint16x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVW, CPU Feature: AVX512 +func (x Uint16x32) ShiftLeft(y Uint16x32) Uint16x32 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Uint32x4) ShiftLeft(y Uint32x4) Uint32x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX2 +func (x Uint32x8) ShiftLeft(y Uint32x8) Uint32x8 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVD, CPU Feature: AVX512 +func (x Uint32x16) ShiftLeft(y Uint32x16) Uint32x16 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Uint64x2) ShiftLeft(y Uint64x2) Uint64x2 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftLeft(y Uint64x4) Uint64x4 + +// ShiftLeft shifts each element in x to the left by the number of bits specified in y's corresponding elements. Emptied lower bits are zeroed. +// +// Asm: VPSLLVQ, CPU Feature: AVX512 +func (x Uint64x8) ShiftLeft(y Uint64x8) Uint64x8 + +/* ShiftLeftConcat */ + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 +func (x Int16x8) ShiftLeftConcat(y Int16x8, z Int16x8) Int16x8 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 +func (x Int16x16) ShiftLeftConcat(y Int16x16, z Int16x16) Int16x16 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 +func (x Int16x32) ShiftLeftConcat(y Int16x32, z Int16x32) Int16x32 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 +func (x Int32x4) ShiftLeftConcat(y Int32x4, z Int32x4) Int32x4 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 +func (x Int32x8) ShiftLeftConcat(y Int32x8, z Int32x8) Int32x8 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 +func (x Int32x16) ShiftLeftConcat(y Int32x16, z Int32x16) Int32x16 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 +func (x Int64x2) ShiftLeftConcat(y Int64x2, z Int64x2) Int64x2 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 +func (x Int64x4) ShiftLeftConcat(y Int64x4, z Int64x4) Int64x4 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 +func (x Int64x8) ShiftLeftConcat(y Int64x8, z Int64x8) Int64x8 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) ShiftLeftConcat(y Uint16x8, z Uint16x8) Uint16x8 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) ShiftLeftConcat(y Uint16x16, z Uint16x16) Uint16x16 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) ShiftLeftConcat(y Uint16x32, z Uint16x32) Uint16x32 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 +func (x Uint32x4) ShiftLeftConcat(y Uint32x4, z Uint32x4) Uint32x4 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 +func (x Uint32x8) ShiftLeftConcat(y Uint32x8, z Uint32x8) Uint32x8 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVD, CPU Feature: AVX512VBMI2 +func (x Uint32x16) ShiftLeftConcat(y Uint32x16, z Uint32x16) Uint32x16 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 +func (x Uint64x2) ShiftLeftConcat(y Uint64x2, z Uint64x2) Uint64x2 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 +func (x Uint64x4) ShiftLeftConcat(y Uint64x4, z Uint64x4) Uint64x4 + +// ShiftLeftConcat shifts each element of x to the left by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the upper bits of z to the emptied lower bits of the shifted x. +// +// Asm: VPSHLDVQ, CPU Feature: AVX512VBMI2 +func (x Uint64x8) ShiftLeftConcat(y Uint64x8, z Uint64x8) Uint64x8 + +/* ShiftRight */ + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512 +func (x Int16x8) ShiftRight(y Int16x8) Int16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512 +func (x Int16x16) ShiftRight(y Int16x16) Int16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVW, CPU Feature: AVX512 +func (x Int16x32) ShiftRight(y Int16x32) Int16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Int32x4) ShiftRight(y Int32x4) Int32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX2 +func (x Int32x8) ShiftRight(y Int32x8) Int32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVD, CPU Feature: AVX512 +func (x Int32x16) ShiftRight(y Int32x16) Int32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512 +func (x Int64x2) ShiftRight(y Int64x2) Int64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512 +func (x Int64x4) ShiftRight(y Int64x4) Int64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are filled with the sign bit. +// +// Asm: VPSRAVQ, CPU Feature: AVX512 +func (x Int64x8) ShiftRight(y Int64x8) Int64x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512 +func (x Uint16x8) ShiftRight(y Uint16x8) Uint16x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512 +func (x Uint16x16) ShiftRight(y Uint16x16) Uint16x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVW, CPU Feature: AVX512 +func (x Uint16x32) ShiftRight(y Uint16x32) Uint16x32 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX2 +func (x Uint32x4) ShiftRight(y Uint32x4) Uint32x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX2 +func (x Uint32x8) ShiftRight(y Uint32x8) Uint32x8 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVD, CPU Feature: AVX512 +func (x Uint32x16) ShiftRight(y Uint32x16) Uint32x16 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX2 +func (x Uint64x2) ShiftRight(y Uint64x2) Uint64x2 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX2 +func (x Uint64x4) ShiftRight(y Uint64x4) Uint64x4 + +// ShiftRight shifts each element in x to the right by the number of bits specified in y's corresponding elements. Emptied upper bits are zeroed. +// +// Asm: VPSRLVQ, CPU Feature: AVX512 +func (x Uint64x8) ShiftRight(y Uint64x8) Uint64x8 + +/* ShiftRightConcat */ + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 +func (x Int16x8) ShiftRightConcat(y Int16x8, z Int16x8) Int16x8 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 +func (x Int16x16) ShiftRightConcat(y Int16x16, z Int16x16) Int16x16 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 +func (x Int16x32) ShiftRightConcat(y Int16x32, z Int16x32) Int16x32 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 +func (x Int32x4) ShiftRightConcat(y Int32x4, z Int32x4) Int32x4 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 +func (x Int32x8) ShiftRightConcat(y Int32x8, z Int32x8) Int32x8 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 +func (x Int32x16) ShiftRightConcat(y Int32x16, z Int32x16) Int32x16 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 +func (x Int64x2) ShiftRightConcat(y Int64x2, z Int64x2) Int64x2 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 +func (x Int64x4) ShiftRightConcat(y Int64x4, z Int64x4) Int64x4 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 +func (x Int64x8) ShiftRightConcat(y Int64x8, z Int64x8) Int64x8 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 +func (x Uint16x8) ShiftRightConcat(y Uint16x8, z Uint16x8) Uint16x8 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 +func (x Uint16x16) ShiftRightConcat(y Uint16x16, z Uint16x16) Uint16x16 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVW, CPU Feature: AVX512VBMI2 +func (x Uint16x32) ShiftRightConcat(y Uint16x32, z Uint16x32) Uint16x32 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 +func (x Uint32x4) ShiftRightConcat(y Uint32x4, z Uint32x4) Uint32x4 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 +func (x Uint32x8) ShiftRightConcat(y Uint32x8, z Uint32x8) Uint32x8 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVD, CPU Feature: AVX512VBMI2 +func (x Uint32x16) ShiftRightConcat(y Uint32x16, z Uint32x16) Uint32x16 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 +func (x Uint64x2) ShiftRightConcat(y Uint64x2, z Uint64x2) Uint64x2 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 +func (x Uint64x4) ShiftRightConcat(y Uint64x4, z Uint64x4) Uint64x4 + +// ShiftRightConcat shifts each element of x to the right by the number of bits specified by the +// corresponding elements in y(only the lower 5 bits are used), and then copies the lower bits of z to the emptied upper bits of the shifted x. +// +// Asm: VPSHRDVQ, CPU Feature: AVX512VBMI2 +func (x Uint64x8) ShiftRightConcat(y Uint64x8, z Uint64x8) Uint64x8 + +/* Sqrt */ + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX +func (x Float32x4) Sqrt() Float32x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX +func (x Float32x8) Sqrt() Float32x8 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPS, CPU Feature: AVX512 +func (x Float32x16) Sqrt() Float32x16 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX +func (x Float64x2) Sqrt() Float64x2 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX +func (x Float64x4) Sqrt() Float64x4 + +// Sqrt computes the square root of each element. +// +// Asm: VSQRTPD, CPU Feature: AVX512 +func (x Float64x8) Sqrt() Float64x8 + +/* Sub */ + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX +func (x Float32x4) Sub(y Float32x4) Float32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX +func (x Float32x8) Sub(y Float32x8) Float32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPS, CPU Feature: AVX512 +func (x Float32x16) Sub(y Float32x16) Float32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX +func (x Float64x2) Sub(y Float64x2) Float64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX +func (x Float64x4) Sub(y Float64x4) Float64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VSUBPD, CPU Feature: AVX512 +func (x Float64x8) Sub(y Float64x8) Float64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX +func (x Int8x16) Sub(y Int8x16) Int8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX2 +func (x Int8x32) Sub(y Int8x32) Int8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512 +func (x Int8x64) Sub(y Int8x64) Int8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX +func (x Int16x8) Sub(y Int16x8) Int16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX2 +func (x Int16x16) Sub(y Int16x16) Int16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512 +func (x Int16x32) Sub(y Int16x32) Int16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX +func (x Int32x4) Sub(y Int32x4) Int32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX2 +func (x Int32x8) Sub(y Int32x8) Int32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512 +func (x Int32x16) Sub(y Int32x16) Int32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX +func (x Int64x2) Sub(y Int64x2) Int64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX2 +func (x Int64x4) Sub(y Int64x4) Int64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512 +func (x Int64x8) Sub(y Int64x8) Int64x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX +func (x Uint8x16) Sub(y Uint8x16) Uint8x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX2 +func (x Uint8x32) Sub(y Uint8x32) Uint8x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBB, CPU Feature: AVX512 +func (x Uint8x64) Sub(y Uint8x64) Uint8x64 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX +func (x Uint16x8) Sub(y Uint16x8) Uint16x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX2 +func (x Uint16x16) Sub(y Uint16x16) Uint16x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBW, CPU Feature: AVX512 +func (x Uint16x32) Sub(y Uint16x32) Uint16x32 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX +func (x Uint32x4) Sub(y Uint32x4) Uint32x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX2 +func (x Uint32x8) Sub(y Uint32x8) Uint32x8 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBD, CPU Feature: AVX512 +func (x Uint32x16) Sub(y Uint32x16) Uint32x16 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX +func (x Uint64x2) Sub(y Uint64x2) Uint64x2 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX2 +func (x Uint64x4) Sub(y Uint64x4) Uint64x4 + +// Sub subtracts corresponding elements of two vectors. +// +// Asm: VPSUBQ, CPU Feature: AVX512 +func (x Uint64x8) Sub(y Uint64x8) Uint64x8 + +/* SubPairs */ + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPS, CPU Feature: AVX +func (x Float32x4) SubPairs(y Float32x4) Float32x4 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPS, CPU Feature: AVX +func (x Float32x8) SubPairs(y Float32x8) Float32x8 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPD, CPU Feature: AVX +func (x Float64x2) SubPairs(y Float64x2) Float64x2 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VHSUBPD, CPU Feature: AVX +func (x Float64x4) SubPairs(y Float64x4) Float64x4 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX +func (x Int16x8) SubPairs(y Int16x8) Int16x8 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX2 +func (x Int16x16) SubPairs(y Int16x16) Int16x16 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX +func (x Int32x4) SubPairs(y Int32x4) Int32x4 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX2 +func (x Int32x8) SubPairs(y Int32x8) Int32x8 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX +func (x Uint16x8) SubPairs(y Uint16x8) Uint16x8 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBW, CPU Feature: AVX2 +func (x Uint16x16) SubPairs(y Uint16x16) Uint16x16 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX +func (x Uint32x4) SubPairs(y Uint32x4) Uint32x4 + +// SubPairs horizontally subtracts adjacent pairs of elements. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBD, CPU Feature: AVX2 +func (x Uint32x8) SubPairs(y Uint32x8) Uint32x8 + +/* SubPairsSaturated */ + +// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBSW, CPU Feature: AVX +func (x Int16x8) SubPairsSaturated(y Int16x8) Int16x8 + +// SubPairsSaturated horizontally subtracts adjacent pairs of elements with saturation. +// For x = [x0, x1, x2, x3, ...] and y = [y0, y1, y2, y3, ...], the result is [y0-y1, y2-y3, ..., x0-x1, x2-x3, ...]. +// +// Asm: VPHSUBSW, CPU Feature: AVX2 +func (x Int16x16) SubPairsSaturated(y Int16x16) Int16x16 + +/* SubSaturated */ + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX +func (x Int8x16) SubSaturated(y Int8x16) Int8x16 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX2 +func (x Int8x32) SubSaturated(y Int8x32) Int8x32 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSB, CPU Feature: AVX512 +func (x Int8x64) SubSaturated(y Int8x64) Int8x64 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX +func (x Int16x8) SubSaturated(y Int16x8) Int16x8 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX2 +func (x Int16x16) SubSaturated(y Int16x16) Int16x16 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBSW, CPU Feature: AVX512 +func (x Int16x32) SubSaturated(y Int16x32) Int16x32 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBUSB, CPU Feature: AVX +func (x Uint8x16) SubSaturated(y Uint8x16) Uint8x16 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBUSB, CPU Feature: AVX2 +func (x Uint8x32) SubSaturated(y Uint8x32) Uint8x32 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBUSB, CPU Feature: AVX512 +func (x Uint8x64) SubSaturated(y Uint8x64) Uint8x64 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBUSW, CPU Feature: AVX +func (x Uint16x8) SubSaturated(y Uint16x8) Uint16x8 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBUSW, CPU Feature: AVX2 +func (x Uint16x16) SubSaturated(y Uint16x16) Uint16x16 + +// SubSaturated subtracts corresponding elements of two vectors with saturation. +// +// Asm: VPSUBUSW, CPU Feature: AVX512 +func (x Uint16x32) SubSaturated(y Uint16x32) Uint16x32 + +/* SumAbsDiff */ + +// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will +// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed. +// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors. +// +// Asm: VPSADBW, CPU Feature: AVX +func (x Uint8x16) SumAbsDiff(y Uint8x16) Uint16x8 + +// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will +// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed. +// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors. +// +// Asm: VPSADBW, CPU Feature: AVX2 +func (x Uint8x32) SumAbsDiff(y Uint8x32) Uint16x16 + +// SumAbsDiff sums the absolute distance of the two input vectors, each adjacent 8 bytes as a group. The output sum will +// be a vector of word-sized elements whose each 4*n-th element contains the sum of the n-th input group. The other elements in the result vector are zeroed. +// This method could be seen as the norm of the L1 distance of each adjacent 8-byte vector group of the two input vectors. +// +// Asm: VPSADBW, CPU Feature: AVX512 +func (x Uint8x64) SumAbsDiff(y Uint8x64) Uint16x32 + +/* Trunc */ + +// Trunc truncates elements towards zero. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x4) Trunc() Float32x4 + +// Trunc truncates elements towards zero. +// +// Asm: VROUNDPS, CPU Feature: AVX +func (x Float32x8) Trunc() Float32x8 + +// Trunc truncates elements towards zero. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x2) Trunc() Float64x2 + +// Trunc truncates elements towards zero. +// +// Asm: VROUNDPD, CPU Feature: AVX +func (x Float64x4) Trunc() Float64x4 + +/* TruncScaled */ + +// TruncScaled truncates elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x4) TruncScaled(prec uint8) Float32x4 + +// TruncScaled truncates elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x8) TruncScaled(prec uint8) Float32x8 + +// TruncScaled truncates elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPS, CPU Feature: AVX512 +func (x Float32x16) TruncScaled(prec uint8) Float32x16 + +// TruncScaled truncates elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x2) TruncScaled(prec uint8) Float64x2 + +// TruncScaled truncates elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x4) TruncScaled(prec uint8) Float64x4 + +// TruncScaled truncates elements with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VRNDSCALEPD, CPU Feature: AVX512 +func (x Float64x8) TruncScaled(prec uint8) Float64x8 + +/* TruncScaledResidue */ + +// TruncScaledResidue computes the difference after truncating with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x4) TruncScaledResidue(prec uint8) Float32x4 + +// TruncScaledResidue computes the difference after truncating with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x8) TruncScaledResidue(prec uint8) Float32x8 + +// TruncScaledResidue computes the difference after truncating with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPS, CPU Feature: AVX512 +func (x Float32x16) TruncScaledResidue(prec uint8) Float32x16 + +// TruncScaledResidue computes the difference after truncating with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x2) TruncScaledResidue(prec uint8) Float64x2 + +// TruncScaledResidue computes the difference after truncating with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x4) TruncScaledResidue(prec uint8) Float64x4 + +// TruncScaledResidue computes the difference after truncating with specified precision. +// +// prec results in better performance when it's a constant, a non-constant value will be translated into a jump table. +// +// Asm: VREDUCEPD, CPU Feature: AVX512 +func (x Float64x8) TruncScaledResidue(prec uint8) Float64x8 + +/* TruncateToInt8 */ + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x8) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x16) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Int16x32) TruncateToInt8() Int8x32 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x4) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x8) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Int32x16) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x2) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x4) TruncateToInt8() Int8x16 + +// TruncateToInt8 converts element values to int8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Int64x8) TruncateToInt8() Int8x16 + +/* TruncateToInt16 */ + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x4) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x8) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Int32x16) TruncateToInt16() Int16x16 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x2) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x4) TruncateToInt16() Int16x8 + +// TruncateToInt16 converts element values to int16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Int64x8) TruncateToInt16() Int16x8 + +/* TruncateToInt32 */ + +// TruncateToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x2) TruncateToInt32() Int32x4 + +// TruncateToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x4) TruncateToInt32() Int32x4 + +// TruncateToInt32 converts element values to int32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Int64x8) TruncateToInt32() Int32x8 + +/* TruncateToUint8 */ + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x8) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x16) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVWB, CPU Feature: AVX512 +func (x Uint16x32) TruncateToUint8() Uint8x32 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x4) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x8) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVDB, CPU Feature: AVX512 +func (x Uint32x16) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x2) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x4) TruncateToUint8() Uint8x16 + +// TruncateToUint8 converts element values to uint8. +// Conversion is done with truncation on the vector elements. +// Results are packed to low elements in the returned vector, its upper elements are zero-cleared. +// +// Asm: VPMOVQB, CPU Feature: AVX512 +func (x Uint64x8) TruncateToUint8() Uint8x16 + +/* TruncateToUint16 */ + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x4) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x8) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVDW, CPU Feature: AVX512 +func (x Uint32x16) TruncateToUint16() Uint16x16 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x2) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x4) TruncateToUint16() Uint16x8 + +// TruncateToUint16 converts element values to uint16. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQW, CPU Feature: AVX512 +func (x Uint64x8) TruncateToUint16() Uint16x8 + +/* TruncateToUint32 */ + +// TruncateToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x2) TruncateToUint32() Uint32x4 + +// TruncateToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x4) TruncateToUint32() Uint32x4 + +// TruncateToUint32 converts element values to uint32. +// Conversion is done with truncation on the vector elements. +// +// Asm: VPMOVQD, CPU Feature: AVX512 +func (x Uint64x8) TruncateToUint32() Uint32x8 + +/* Xor */ + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int8x16) Xor(y Int8x16) Int8x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int8x32) Xor(y Int8x32) Int8x32 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512 +func (x Int8x64) Xor(y Int8x64) Int8x64 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int16x8) Xor(y Int16x8) Int16x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int16x16) Xor(y Int16x16) Int16x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512 +func (x Int16x32) Xor(y Int16x32) Int16x32 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int32x4) Xor(y Int32x4) Int32x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int32x8) Xor(y Int32x8) Int32x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512 +func (x Int32x16) Xor(y Int32x16) Int32x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Int64x2) Xor(y Int64x2) Int64x2 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Int64x4) Xor(y Int64x4) Int64x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512 +func (x Int64x8) Xor(y Int64x8) Int64x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint8x16) Xor(y Uint8x16) Uint8x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint8x32) Xor(y Uint8x32) Uint8x32 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512 +func (x Uint8x64) Xor(y Uint8x64) Uint8x64 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint16x8) Xor(y Uint16x8) Uint16x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint16x16) Xor(y Uint16x16) Uint16x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512 +func (x Uint16x32) Xor(y Uint16x32) Uint16x32 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint32x4) Xor(y Uint32x4) Uint32x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint32x8) Xor(y Uint32x8) Uint32x8 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORD, CPU Feature: AVX512 +func (x Uint32x16) Xor(y Uint32x16) Uint32x16 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX +func (x Uint64x2) Xor(y Uint64x2) Uint64x2 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXOR, CPU Feature: AVX2 +func (x Uint64x4) Xor(y Uint64x4) Uint64x4 + +// Xor performs a bitwise XOR operation between two vectors. +// +// Asm: VPXORQ, CPU Feature: AVX512 +func (x Uint64x8) Xor(y Uint64x8) Uint64x8 + +// Float64x2 converts from Float32x4 to Float64x2 +func (from Float32x4) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Float32x4 to Int8x16 +func (from Float32x4) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Float32x4 to Int16x8 +func (from Float32x4) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Float32x4 to Int32x4 +func (from Float32x4) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Float32x4 to Int64x2 +func (from Float32x4) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Float32x4 to Uint8x16 +func (from Float32x4) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Float32x4 to Uint16x8 +func (from Float32x4) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Float32x4 to Uint32x4 +func (from Float32x4) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Float32x4 to Uint64x2 +func (from Float32x4) AsUint64x2() (to Uint64x2) + +// Float64x4 converts from Float32x8 to Float64x4 +func (from Float32x8) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Float32x8 to Int8x32 +func (from Float32x8) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Float32x8 to Int16x16 +func (from Float32x8) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Float32x8 to Int32x8 +func (from Float32x8) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Float32x8 to Int64x4 +func (from Float32x8) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Float32x8 to Uint8x32 +func (from Float32x8) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Float32x8 to Uint16x16 +func (from Float32x8) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Float32x8 to Uint32x8 +func (from Float32x8) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Float32x8 to Uint64x4 +func (from Float32x8) AsUint64x4() (to Uint64x4) + +// Float64x8 converts from Float32x16 to Float64x8 +func (from Float32x16) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Float32x16 to Int8x64 +func (from Float32x16) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Float32x16 to Int16x32 +func (from Float32x16) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Float32x16 to Int32x16 +func (from Float32x16) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Float32x16 to Int64x8 +func (from Float32x16) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Float32x16 to Uint8x64 +func (from Float32x16) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Float32x16 to Uint16x32 +func (from Float32x16) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Float32x16 to Uint32x16 +func (from Float32x16) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Float32x16 to Uint64x8 +func (from Float32x16) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Float64x2 to Float32x4 +func (from Float64x2) AsFloat32x4() (to Float32x4) + +// Int8x16 converts from Float64x2 to Int8x16 +func (from Float64x2) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Float64x2 to Int16x8 +func (from Float64x2) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Float64x2 to Int32x4 +func (from Float64x2) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Float64x2 to Int64x2 +func (from Float64x2) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Float64x2 to Uint8x16 +func (from Float64x2) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Float64x2 to Uint16x8 +func (from Float64x2) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Float64x2 to Uint32x4 +func (from Float64x2) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Float64x2 to Uint64x2 +func (from Float64x2) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Float64x4 to Float32x8 +func (from Float64x4) AsFloat32x8() (to Float32x8) + +// Int8x32 converts from Float64x4 to Int8x32 +func (from Float64x4) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Float64x4 to Int16x16 +func (from Float64x4) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Float64x4 to Int32x8 +func (from Float64x4) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Float64x4 to Int64x4 +func (from Float64x4) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Float64x4 to Uint8x32 +func (from Float64x4) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Float64x4 to Uint16x16 +func (from Float64x4) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Float64x4 to Uint32x8 +func (from Float64x4) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Float64x4 to Uint64x4 +func (from Float64x4) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Float64x8 to Float32x16 +func (from Float64x8) AsFloat32x16() (to Float32x16) + +// Int8x64 converts from Float64x8 to Int8x64 +func (from Float64x8) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Float64x8 to Int16x32 +func (from Float64x8) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Float64x8 to Int32x16 +func (from Float64x8) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Float64x8 to Int64x8 +func (from Float64x8) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Float64x8 to Uint8x64 +func (from Float64x8) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Float64x8 to Uint16x32 +func (from Float64x8) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Float64x8 to Uint32x16 +func (from Float64x8) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Float64x8 to Uint64x8 +func (from Float64x8) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Int8x16 to Float32x4 +func (from Int8x16) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Int8x16 to Float64x2 +func (from Int8x16) AsFloat64x2() (to Float64x2) + +// Int16x8 converts from Int8x16 to Int16x8 +func (from Int8x16) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Int8x16 to Int32x4 +func (from Int8x16) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Int8x16 to Int64x2 +func (from Int8x16) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Int8x16 to Uint8x16 +func (from Int8x16) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Int8x16 to Uint16x8 +func (from Int8x16) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Int8x16 to Uint32x4 +func (from Int8x16) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Int8x16 to Uint64x2 +func (from Int8x16) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Int8x32 to Float32x8 +func (from Int8x32) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Int8x32 to Float64x4 +func (from Int8x32) AsFloat64x4() (to Float64x4) + +// Int16x16 converts from Int8x32 to Int16x16 +func (from Int8x32) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Int8x32 to Int32x8 +func (from Int8x32) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Int8x32 to Int64x4 +func (from Int8x32) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Int8x32 to Uint8x32 +func (from Int8x32) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Int8x32 to Uint16x16 +func (from Int8x32) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Int8x32 to Uint32x8 +func (from Int8x32) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Int8x32 to Uint64x4 +func (from Int8x32) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Int8x64 to Float32x16 +func (from Int8x64) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Int8x64 to Float64x8 +func (from Int8x64) AsFloat64x8() (to Float64x8) + +// Int16x32 converts from Int8x64 to Int16x32 +func (from Int8x64) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Int8x64 to Int32x16 +func (from Int8x64) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Int8x64 to Int64x8 +func (from Int8x64) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Int8x64 to Uint8x64 +func (from Int8x64) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Int8x64 to Uint16x32 +func (from Int8x64) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Int8x64 to Uint32x16 +func (from Int8x64) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Int8x64 to Uint64x8 +func (from Int8x64) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Int16x8 to Float32x4 +func (from Int16x8) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Int16x8 to Float64x2 +func (from Int16x8) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Int16x8 to Int8x16 +func (from Int16x8) AsInt8x16() (to Int8x16) + +// Int32x4 converts from Int16x8 to Int32x4 +func (from Int16x8) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Int16x8 to Int64x2 +func (from Int16x8) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Int16x8 to Uint8x16 +func (from Int16x8) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Int16x8 to Uint16x8 +func (from Int16x8) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Int16x8 to Uint32x4 +func (from Int16x8) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Int16x8 to Uint64x2 +func (from Int16x8) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Int16x16 to Float32x8 +func (from Int16x16) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Int16x16 to Float64x4 +func (from Int16x16) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Int16x16 to Int8x32 +func (from Int16x16) AsInt8x32() (to Int8x32) + +// Int32x8 converts from Int16x16 to Int32x8 +func (from Int16x16) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Int16x16 to Int64x4 +func (from Int16x16) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Int16x16 to Uint8x32 +func (from Int16x16) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Int16x16 to Uint16x16 +func (from Int16x16) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Int16x16 to Uint32x8 +func (from Int16x16) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Int16x16 to Uint64x4 +func (from Int16x16) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Int16x32 to Float32x16 +func (from Int16x32) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Int16x32 to Float64x8 +func (from Int16x32) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Int16x32 to Int8x64 +func (from Int16x32) AsInt8x64() (to Int8x64) + +// Int32x16 converts from Int16x32 to Int32x16 +func (from Int16x32) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Int16x32 to Int64x8 +func (from Int16x32) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Int16x32 to Uint8x64 +func (from Int16x32) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Int16x32 to Uint16x32 +func (from Int16x32) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Int16x32 to Uint32x16 +func (from Int16x32) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Int16x32 to Uint64x8 +func (from Int16x32) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Int32x4 to Float32x4 +func (from Int32x4) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Int32x4 to Float64x2 +func (from Int32x4) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Int32x4 to Int8x16 +func (from Int32x4) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Int32x4 to Int16x8 +func (from Int32x4) AsInt16x8() (to Int16x8) + +// Int64x2 converts from Int32x4 to Int64x2 +func (from Int32x4) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Int32x4 to Uint8x16 +func (from Int32x4) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Int32x4 to Uint16x8 +func (from Int32x4) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Int32x4 to Uint32x4 +func (from Int32x4) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Int32x4 to Uint64x2 +func (from Int32x4) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Int32x8 to Float32x8 +func (from Int32x8) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Int32x8 to Float64x4 +func (from Int32x8) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Int32x8 to Int8x32 +func (from Int32x8) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Int32x8 to Int16x16 +func (from Int32x8) AsInt16x16() (to Int16x16) + +// Int64x4 converts from Int32x8 to Int64x4 +func (from Int32x8) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Int32x8 to Uint8x32 +func (from Int32x8) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Int32x8 to Uint16x16 +func (from Int32x8) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Int32x8 to Uint32x8 +func (from Int32x8) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Int32x8 to Uint64x4 +func (from Int32x8) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Int32x16 to Float32x16 +func (from Int32x16) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Int32x16 to Float64x8 +func (from Int32x16) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Int32x16 to Int8x64 +func (from Int32x16) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Int32x16 to Int16x32 +func (from Int32x16) AsInt16x32() (to Int16x32) + +// Int64x8 converts from Int32x16 to Int64x8 +func (from Int32x16) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Int32x16 to Uint8x64 +func (from Int32x16) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Int32x16 to Uint16x32 +func (from Int32x16) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Int32x16 to Uint32x16 +func (from Int32x16) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Int32x16 to Uint64x8 +func (from Int32x16) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Int64x2 to Float32x4 +func (from Int64x2) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Int64x2 to Float64x2 +func (from Int64x2) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Int64x2 to Int8x16 +func (from Int64x2) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Int64x2 to Int16x8 +func (from Int64x2) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Int64x2 to Int32x4 +func (from Int64x2) AsInt32x4() (to Int32x4) + +// Uint8x16 converts from Int64x2 to Uint8x16 +func (from Int64x2) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Int64x2 to Uint16x8 +func (from Int64x2) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Int64x2 to Uint32x4 +func (from Int64x2) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Int64x2 to Uint64x2 +func (from Int64x2) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Int64x4 to Float32x8 +func (from Int64x4) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Int64x4 to Float64x4 +func (from Int64x4) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Int64x4 to Int8x32 +func (from Int64x4) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Int64x4 to Int16x16 +func (from Int64x4) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Int64x4 to Int32x8 +func (from Int64x4) AsInt32x8() (to Int32x8) + +// Uint8x32 converts from Int64x4 to Uint8x32 +func (from Int64x4) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Int64x4 to Uint16x16 +func (from Int64x4) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Int64x4 to Uint32x8 +func (from Int64x4) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Int64x4 to Uint64x4 +func (from Int64x4) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Int64x8 to Float32x16 +func (from Int64x8) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Int64x8 to Float64x8 +func (from Int64x8) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Int64x8 to Int8x64 +func (from Int64x8) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Int64x8 to Int16x32 +func (from Int64x8) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Int64x8 to Int32x16 +func (from Int64x8) AsInt32x16() (to Int32x16) + +// Uint8x64 converts from Int64x8 to Uint8x64 +func (from Int64x8) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Int64x8 to Uint16x32 +func (from Int64x8) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Int64x8 to Uint32x16 +func (from Int64x8) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Int64x8 to Uint64x8 +func (from Int64x8) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Uint8x16 to Float32x4 +func (from Uint8x16) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Uint8x16 to Float64x2 +func (from Uint8x16) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Uint8x16 to Int8x16 +func (from Uint8x16) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Uint8x16 to Int16x8 +func (from Uint8x16) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Uint8x16 to Int32x4 +func (from Uint8x16) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Uint8x16 to Int64x2 +func (from Uint8x16) AsInt64x2() (to Int64x2) + +// Uint16x8 converts from Uint8x16 to Uint16x8 +func (from Uint8x16) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Uint8x16 to Uint32x4 +func (from Uint8x16) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Uint8x16 to Uint64x2 +func (from Uint8x16) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Uint8x32 to Float32x8 +func (from Uint8x32) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Uint8x32 to Float64x4 +func (from Uint8x32) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Uint8x32 to Int8x32 +func (from Uint8x32) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Uint8x32 to Int16x16 +func (from Uint8x32) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Uint8x32 to Int32x8 +func (from Uint8x32) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Uint8x32 to Int64x4 +func (from Uint8x32) AsInt64x4() (to Int64x4) + +// Uint16x16 converts from Uint8x32 to Uint16x16 +func (from Uint8x32) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Uint8x32 to Uint32x8 +func (from Uint8x32) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Uint8x32 to Uint64x4 +func (from Uint8x32) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Uint8x64 to Float32x16 +func (from Uint8x64) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Uint8x64 to Float64x8 +func (from Uint8x64) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Uint8x64 to Int8x64 +func (from Uint8x64) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Uint8x64 to Int16x32 +func (from Uint8x64) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Uint8x64 to Int32x16 +func (from Uint8x64) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Uint8x64 to Int64x8 +func (from Uint8x64) AsInt64x8() (to Int64x8) + +// Uint16x32 converts from Uint8x64 to Uint16x32 +func (from Uint8x64) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Uint8x64 to Uint32x16 +func (from Uint8x64) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Uint8x64 to Uint64x8 +func (from Uint8x64) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Uint16x8 to Float32x4 +func (from Uint16x8) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Uint16x8 to Float64x2 +func (from Uint16x8) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Uint16x8 to Int8x16 +func (from Uint16x8) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Uint16x8 to Int16x8 +func (from Uint16x8) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Uint16x8 to Int32x4 +func (from Uint16x8) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Uint16x8 to Int64x2 +func (from Uint16x8) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Uint16x8 to Uint8x16 +func (from Uint16x8) AsUint8x16() (to Uint8x16) + +// Uint32x4 converts from Uint16x8 to Uint32x4 +func (from Uint16x8) AsUint32x4() (to Uint32x4) + +// Uint64x2 converts from Uint16x8 to Uint64x2 +func (from Uint16x8) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Uint16x16 to Float32x8 +func (from Uint16x16) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Uint16x16 to Float64x4 +func (from Uint16x16) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Uint16x16 to Int8x32 +func (from Uint16x16) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Uint16x16 to Int16x16 +func (from Uint16x16) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Uint16x16 to Int32x8 +func (from Uint16x16) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Uint16x16 to Int64x4 +func (from Uint16x16) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Uint16x16 to Uint8x32 +func (from Uint16x16) AsUint8x32() (to Uint8x32) + +// Uint32x8 converts from Uint16x16 to Uint32x8 +func (from Uint16x16) AsUint32x8() (to Uint32x8) + +// Uint64x4 converts from Uint16x16 to Uint64x4 +func (from Uint16x16) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Uint16x32 to Float32x16 +func (from Uint16x32) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Uint16x32 to Float64x8 +func (from Uint16x32) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Uint16x32 to Int8x64 +func (from Uint16x32) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Uint16x32 to Int16x32 +func (from Uint16x32) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Uint16x32 to Int32x16 +func (from Uint16x32) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Uint16x32 to Int64x8 +func (from Uint16x32) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Uint16x32 to Uint8x64 +func (from Uint16x32) AsUint8x64() (to Uint8x64) + +// Uint32x16 converts from Uint16x32 to Uint32x16 +func (from Uint16x32) AsUint32x16() (to Uint32x16) + +// Uint64x8 converts from Uint16x32 to Uint64x8 +func (from Uint16x32) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Uint32x4 to Float32x4 +func (from Uint32x4) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Uint32x4 to Float64x2 +func (from Uint32x4) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Uint32x4 to Int8x16 +func (from Uint32x4) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Uint32x4 to Int16x8 +func (from Uint32x4) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Uint32x4 to Int32x4 +func (from Uint32x4) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Uint32x4 to Int64x2 +func (from Uint32x4) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Uint32x4 to Uint8x16 +func (from Uint32x4) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Uint32x4 to Uint16x8 +func (from Uint32x4) AsUint16x8() (to Uint16x8) + +// Uint64x2 converts from Uint32x4 to Uint64x2 +func (from Uint32x4) AsUint64x2() (to Uint64x2) + +// Float32x8 converts from Uint32x8 to Float32x8 +func (from Uint32x8) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Uint32x8 to Float64x4 +func (from Uint32x8) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Uint32x8 to Int8x32 +func (from Uint32x8) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Uint32x8 to Int16x16 +func (from Uint32x8) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Uint32x8 to Int32x8 +func (from Uint32x8) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Uint32x8 to Int64x4 +func (from Uint32x8) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Uint32x8 to Uint8x32 +func (from Uint32x8) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Uint32x8 to Uint16x16 +func (from Uint32x8) AsUint16x16() (to Uint16x16) + +// Uint64x4 converts from Uint32x8 to Uint64x4 +func (from Uint32x8) AsUint64x4() (to Uint64x4) + +// Float32x16 converts from Uint32x16 to Float32x16 +func (from Uint32x16) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Uint32x16 to Float64x8 +func (from Uint32x16) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Uint32x16 to Int8x64 +func (from Uint32x16) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Uint32x16 to Int16x32 +func (from Uint32x16) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Uint32x16 to Int32x16 +func (from Uint32x16) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Uint32x16 to Int64x8 +func (from Uint32x16) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Uint32x16 to Uint8x64 +func (from Uint32x16) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Uint32x16 to Uint16x32 +func (from Uint32x16) AsUint16x32() (to Uint16x32) + +// Uint64x8 converts from Uint32x16 to Uint64x8 +func (from Uint32x16) AsUint64x8() (to Uint64x8) + +// Float32x4 converts from Uint64x2 to Float32x4 +func (from Uint64x2) AsFloat32x4() (to Float32x4) + +// Float64x2 converts from Uint64x2 to Float64x2 +func (from Uint64x2) AsFloat64x2() (to Float64x2) + +// Int8x16 converts from Uint64x2 to Int8x16 +func (from Uint64x2) AsInt8x16() (to Int8x16) + +// Int16x8 converts from Uint64x2 to Int16x8 +func (from Uint64x2) AsInt16x8() (to Int16x8) + +// Int32x4 converts from Uint64x2 to Int32x4 +func (from Uint64x2) AsInt32x4() (to Int32x4) + +// Int64x2 converts from Uint64x2 to Int64x2 +func (from Uint64x2) AsInt64x2() (to Int64x2) + +// Uint8x16 converts from Uint64x2 to Uint8x16 +func (from Uint64x2) AsUint8x16() (to Uint8x16) + +// Uint16x8 converts from Uint64x2 to Uint16x8 +func (from Uint64x2) AsUint16x8() (to Uint16x8) + +// Uint32x4 converts from Uint64x2 to Uint32x4 +func (from Uint64x2) AsUint32x4() (to Uint32x4) + +// Float32x8 converts from Uint64x4 to Float32x8 +func (from Uint64x4) AsFloat32x8() (to Float32x8) + +// Float64x4 converts from Uint64x4 to Float64x4 +func (from Uint64x4) AsFloat64x4() (to Float64x4) + +// Int8x32 converts from Uint64x4 to Int8x32 +func (from Uint64x4) AsInt8x32() (to Int8x32) + +// Int16x16 converts from Uint64x4 to Int16x16 +func (from Uint64x4) AsInt16x16() (to Int16x16) + +// Int32x8 converts from Uint64x4 to Int32x8 +func (from Uint64x4) AsInt32x8() (to Int32x8) + +// Int64x4 converts from Uint64x4 to Int64x4 +func (from Uint64x4) AsInt64x4() (to Int64x4) + +// Uint8x32 converts from Uint64x4 to Uint8x32 +func (from Uint64x4) AsUint8x32() (to Uint8x32) + +// Uint16x16 converts from Uint64x4 to Uint16x16 +func (from Uint64x4) AsUint16x16() (to Uint16x16) + +// Uint32x8 converts from Uint64x4 to Uint32x8 +func (from Uint64x4) AsUint32x8() (to Uint32x8) + +// Float32x16 converts from Uint64x8 to Float32x16 +func (from Uint64x8) AsFloat32x16() (to Float32x16) + +// Float64x8 converts from Uint64x8 to Float64x8 +func (from Uint64x8) AsFloat64x8() (to Float64x8) + +// Int8x64 converts from Uint64x8 to Int8x64 +func (from Uint64x8) AsInt8x64() (to Int8x64) + +// Int16x32 converts from Uint64x8 to Int16x32 +func (from Uint64x8) AsInt16x32() (to Int16x32) + +// Int32x16 converts from Uint64x8 to Int32x16 +func (from Uint64x8) AsInt32x16() (to Int32x16) + +// Int64x8 converts from Uint64x8 to Int64x8 +func (from Uint64x8) AsInt64x8() (to Int64x8) + +// Uint8x64 converts from Uint64x8 to Uint8x64 +func (from Uint64x8) AsUint8x64() (to Uint8x64) + +// Uint16x32 converts from Uint64x8 to Uint16x32 +func (from Uint64x8) AsUint16x32() (to Uint16x32) + +// Uint32x16 converts from Uint64x8 to Uint32x16 +func (from Uint64x8) AsUint32x16() (to Uint32x16) + +// AsInt8x16 converts from Mask8x16 to Int8x16 +func (from Mask8x16) AsInt8x16() (to Int8x16) + +// asMask converts from Int8x16 to Mask8x16 +func (from Int8x16) asMask() (to Mask8x16) + +func (x Mask8x16) And(y Mask8x16) Mask8x16 + +func (x Mask8x16) Or(y Mask8x16) Mask8x16 + +// AsInt8x32 converts from Mask8x32 to Int8x32 +func (from Mask8x32) AsInt8x32() (to Int8x32) + +// asMask converts from Int8x32 to Mask8x32 +func (from Int8x32) asMask() (to Mask8x32) + +func (x Mask8x32) And(y Mask8x32) Mask8x32 + +func (x Mask8x32) Or(y Mask8x32) Mask8x32 + +// AsInt8x64 converts from Mask8x64 to Int8x64 +func (from Mask8x64) AsInt8x64() (to Int8x64) + +// asMask converts from Int8x64 to Mask8x64 +func (from Int8x64) asMask() (to Mask8x64) + +func (x Mask8x64) And(y Mask8x64) Mask8x64 + +func (x Mask8x64) Or(y Mask8x64) Mask8x64 + +// AsInt16x8 converts from Mask16x8 to Int16x8 +func (from Mask16x8) AsInt16x8() (to Int16x8) + +// asMask converts from Int16x8 to Mask16x8 +func (from Int16x8) asMask() (to Mask16x8) + +func (x Mask16x8) And(y Mask16x8) Mask16x8 + +func (x Mask16x8) Or(y Mask16x8) Mask16x8 + +// AsInt16x16 converts from Mask16x16 to Int16x16 +func (from Mask16x16) AsInt16x16() (to Int16x16) + +// asMask converts from Int16x16 to Mask16x16 +func (from Int16x16) asMask() (to Mask16x16) + +func (x Mask16x16) And(y Mask16x16) Mask16x16 + +func (x Mask16x16) Or(y Mask16x16) Mask16x16 + +// AsInt16x32 converts from Mask16x32 to Int16x32 +func (from Mask16x32) AsInt16x32() (to Int16x32) + +// asMask converts from Int16x32 to Mask16x32 +func (from Int16x32) asMask() (to Mask16x32) + +func (x Mask16x32) And(y Mask16x32) Mask16x32 + +func (x Mask16x32) Or(y Mask16x32) Mask16x32 + +// AsInt32x4 converts from Mask32x4 to Int32x4 +func (from Mask32x4) AsInt32x4() (to Int32x4) + +// asMask converts from Int32x4 to Mask32x4 +func (from Int32x4) asMask() (to Mask32x4) + +func (x Mask32x4) And(y Mask32x4) Mask32x4 + +func (x Mask32x4) Or(y Mask32x4) Mask32x4 + +// AsInt32x8 converts from Mask32x8 to Int32x8 +func (from Mask32x8) AsInt32x8() (to Int32x8) + +// asMask converts from Int32x8 to Mask32x8 +func (from Int32x8) asMask() (to Mask32x8) + +func (x Mask32x8) And(y Mask32x8) Mask32x8 + +func (x Mask32x8) Or(y Mask32x8) Mask32x8 + +// AsInt32x16 converts from Mask32x16 to Int32x16 +func (from Mask32x16) AsInt32x16() (to Int32x16) + +// asMask converts from Int32x16 to Mask32x16 +func (from Int32x16) asMask() (to Mask32x16) + +func (x Mask32x16) And(y Mask32x16) Mask32x16 + +func (x Mask32x16) Or(y Mask32x16) Mask32x16 + +// AsInt64x2 converts from Mask64x2 to Int64x2 +func (from Mask64x2) AsInt64x2() (to Int64x2) + +// asMask converts from Int64x2 to Mask64x2 +func (from Int64x2) asMask() (to Mask64x2) + +func (x Mask64x2) And(y Mask64x2) Mask64x2 + +func (x Mask64x2) Or(y Mask64x2) Mask64x2 + +// AsInt64x4 converts from Mask64x4 to Int64x4 +func (from Mask64x4) AsInt64x4() (to Int64x4) + +// asMask converts from Int64x4 to Mask64x4 +func (from Int64x4) asMask() (to Mask64x4) + +func (x Mask64x4) And(y Mask64x4) Mask64x4 + +func (x Mask64x4) Or(y Mask64x4) Mask64x4 + +// AsInt64x8 converts from Mask64x8 to Int64x8 +func (from Mask64x8) AsInt64x8() (to Int64x8) + +// asMask converts from Int64x8 to Mask64x8 +func (from Int64x8) asMask() (to Mask64x8) + +func (x Mask64x8) And(y Mask64x8) Mask64x8 + +func (x Mask64x8) Or(y Mask64x8) Mask64x8 |
