diff options
Diffstat (limited to 'src/simd/internal/simd_test/simd_test.go')
| -rw-r--r-- | src/simd/internal/simd_test/simd_test.go | 1248 |
1 files changed, 1248 insertions, 0 deletions
diff --git a/src/simd/internal/simd_test/simd_test.go b/src/simd/internal/simd_test/simd_test.go new file mode 100644 index 0000000000..f51e3dc15f --- /dev/null +++ b/src/simd/internal/simd_test/simd_test.go @@ -0,0 +1,1248 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build goexperiment.simd && amd64 + +package simd_test + +import ( + "reflect" + "simd" + "slices" + "testing" +) + +var sink any + +func TestType(t *testing.T) { + // Testing: + // - Defined as another struct's field is ok + // - Pointer is ok + // - Type defition is ok + // - Type alias is ok + // - Type conversion is ok + // - Conversion to interface is ok + type alias = simd.Int32x4 + type maskT simd.Mask32x4 + type myStruct struct { + x alias + y *simd.Int32x4 + z maskT + } + vals := [4]int32{1, 2, 3, 4} + v := myStruct{x: simd.LoadInt32x4(&vals)} + // masking elements 1 and 2. + want := []int32{2, 4, 0, 0} + y := simd.LoadInt32x4(&vals) + v.y = &y + sink = y + + if !simd.X86.AVX512GFNI() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + v.z = maskT(simd.Mask32x4FromBits(0b0011)) + *v.y = v.y.Add(v.x).Masked(simd.Mask32x4(v.z)) + + got := [4]int32{} + v.y.Store(&got) + for i := range 4 { + if want[i] != got[i] { + t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) + } + } +} + +func TestUncomparable(t *testing.T) { + // Test that simd vectors are not comparable + var x, y any = simd.LoadUint32x4(&[4]uint32{1, 2, 3, 4}), simd.LoadUint32x4(&[4]uint32{5, 6, 7, 8}) + shouldPanic := func(fn func()) { + defer func() { + if recover() == nil { + panic("did not panic") + } + }() + fn() + } + shouldPanic(func() { _ = x == y }) +} + +func TestFuncValue(t *testing.T) { + // Test that simd intrinsic can be used as a function value. + xv := [4]int32{1, 2, 3, 4} + yv := [4]int32{5, 6, 7, 8} + want := []int32{6, 8, 10, 12} + x := simd.LoadInt32x4(&xv) + y := simd.LoadInt32x4(&yv) + fn := simd.Int32x4.Add + sink = fn + x = fn(x, y) + got := [4]int32{} + x.Store(&got) + for i := range 4 { + if want[i] != got[i] { + t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) + } + } +} + +func TestReflectMethod(t *testing.T) { + // Test that simd intrinsic can be accessed via reflection. + // NOTE: we don't yet support reflect method.Call. + xv := [4]int32{1, 2, 3, 4} + yv := [4]int32{5, 6, 7, 8} + want := []int32{6, 8, 10, 12} + x := simd.LoadInt32x4(&xv) + y := simd.LoadInt32x4(&yv) + m, ok := reflect.TypeOf(x).MethodByName("Add") + if !ok { + t.Fatal("Add method not found") + } + fn := m.Func.Interface().(func(x, y simd.Int32x4) simd.Int32x4) + x = fn(x, y) + got := [4]int32{} + x.Store(&got) + for i := range 4 { + if want[i] != got[i] { + t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) + } + } +} + +func TestVectorConversion(t *testing.T) { + if !simd.X86.AVX512GFNI() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + xv := [4]int32{1, 2, 3, 4} + x := simd.LoadInt32x4(&xv) + xPromoted := x.AsInt64x2() + xPromotedDemoted := xPromoted.AsInt32x4() + got := [4]int32{} + xPromotedDemoted.Store(&got) + for i := range 4 { + if xv[i] != got[i] { + t.Errorf("Result at %d incorrect: want %d, got %d", i, xv[i], got[i]) + } + } +} + +func TestMaskConversion(t *testing.T) { + if !simd.X86.AVX512GFNI() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + x := simd.LoadInt32x4Slice([]int32{5, 0, 7, 0}) + mask := simd.Int32x4{}.Sub(x).ToMask() + y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}).Add(x).Masked(mask) + want := [4]int32{6, 0, 10, 0} + got := make([]int32, 4) + y.StoreSlice(got) + for i := range 4 { + if want[i] != got[i] { + t.Errorf("Result at %d incorrect: want %d, got %d", i, want[i], got[i]) + } + } +} + +func TestPermute(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + x := []int64{1, 2, 3, 4, 5, 6, 7, 8} + indices := []uint64{7, 6, 5, 4, 3, 2, 1, 0} + want := []int64{8, 7, 6, 5, 4, 3, 2, 1} + got := make([]int64, 8) + simd.LoadInt64x8Slice(x).Permute(simd.LoadUint64x8Slice(indices)).StoreSlice(got) + for i := range 8 { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestPermuteOrZero(t *testing.T) { + x := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16} + indices := []int8{7, 6, 5, 4, 3, 2, 1, 0, -1, 8, -1, 9, -1, 10, -1, 11} + want := []uint8{8, 7, 6, 5, 4, 3, 2, 1, 0, 9, 0, 10, 0, 11, 0, 12} + got := make([]uint8, len(x)) + simd.LoadUint8x16Slice(x).PermuteOrZero(simd.LoadInt8x16Slice(indices)).StoreSlice(got) + for i := range 8 { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestConcatPermute(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + x := []int64{1, 2, 3, 4, 5, 6, 7, 8} + y := []int64{-1, -2, -3, -4, -5, -6, -7, -8} + indices := []uint64{7 + 8, 6, 5 + 8, 4, 3 + 8, 2, 1 + 8, 0} + want := []int64{-8, 7, -6, 5, -4, 3, -2, 1} + got := make([]int64, 8) + simd.LoadInt64x8Slice(x).ConcatPermute(simd.LoadInt64x8Slice(y), simd.LoadUint64x8Slice(indices)).StoreSlice(got) + for i := range 8 { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestCompress(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + v1234 := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) + v2400 := v1234.Compress(simd.Mask32x4FromBits(0b1010)) + got := make([]int32, 4) + v2400.StoreSlice(got) + want := []int32{2, 4, 0, 0} + if !slices.Equal(got, want) { + t.Errorf("want and got differ, want=%v, got=%v", want, got) + } +} + +func TestExpand(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + v3400 := simd.LoadInt32x4Slice([]int32{3, 4, 0, 0}) + v2400 := v3400.Expand(simd.Mask32x4FromBits(0b1010)) + got := make([]int32, 4) + v2400.StoreSlice(got) + want := []int32{0, 3, 0, 4} + if !slices.Equal(got, want) { + t.Errorf("want and got differ, want=%v, got=%v", want, got) + } +} + +var testShiftAllVal uint64 = 3 + +func TestShiftAll(t *testing.T) { + got := make([]int32, 4) + simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(2).StoreSlice(got) + for _, v := range got { + if v != 0b1100 { + t.Errorf("expect 0b1100, got %b", v) + } + } + simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).ShiftAllLeft(testShiftAllVal).StoreSlice(got) + for _, v := range got { + if v != 0b11000 { + t.Errorf("expect 0b11000, got %b", v) + } + } +} + +func TestSlicesInt8(t *testing.T) { + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x32Slice(a) + b := make([]int8, 32, 32) + v.StoreSlice(b) + checkSlices(t, a, b) +} + +func TestSlicesInt8SetElem(t *testing.T) { + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x16Slice(a) + + v = v.SetElem(3, 13) + a[3] = 13 + + b := make([]int8, 16, 16) + v.StoreSlice(b) + checkSlices(t, a, b) +} + +func TestSlicesInt8GetElem(t *testing.T) { + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x16Slice(a) + e := v.GetElem(2) + if e != a[2] { + t.Errorf("GetElem(2) = %d != a[2] = %d", e, a[2]) + } + +} + +func TestSlicesInt8TooShortLoad(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Logf("Saw EXPECTED panic %v", r) + } else { + t.Errorf("Did not see expected panic") + } + }() + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31} // TOO SHORT, should panic + v := simd.LoadInt8x32Slice(a) + b := make([]int8, 32, 32) + v.StoreSlice(b) + checkSlices(t, a, b) +} + +func TestSlicesInt8TooShortStore(t *testing.T) { + defer func() { + if r := recover(); r != nil { + t.Logf("Saw EXPECTED panic %v", r) + } else { + t.Errorf("Did not see expected panic") + } + }() + a := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32} + v := simd.LoadInt8x32Slice(a) + b := make([]int8, 31) // TOO SHORT, should panic + v.StoreSlice(b) + checkSlices(t, a, b) +} + +func TestSlicesFloat64(t *testing.T) { + a := []float64{1, 2, 3, 4, 5, 6, 7, 8} // too long, should be fine + v := simd.LoadFloat64x4Slice(a) + b := make([]float64, 4, 4) + v.StoreSlice(b) + for i := range b { + if a[i] != b[i] { + t.Errorf("a and b differ at index %d, a=%f, b=%f", i, a[i], b[i]) + } + } +} + +// TODO: try to reduce this test to be smaller. +func TestMergeLocals(t *testing.T) { + testMergeLocalswrapper(t, simd.Int64x4.Add) +} + +//go:noinline +func forceSpill() {} + +func testMergeLocalswrapper(t *testing.T, op func(simd.Int64x4, simd.Int64x4) simd.Int64x4) { + t.Helper() + s0 := []int64{0, 1, 2, 3} + s1 := []int64{-1, 0, -1, 0} + want := []int64{-1, 1, 1, 3} + v := simd.LoadInt64x4Slice(s0) + m := simd.LoadInt64x4Slice(s1) + forceSpill() + got := make([]int64, 4) + gotv := op(v, m) + gotv.StoreSlice(got) + for i := range len(want) { + if !(got[i] == want[i]) { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], got[i]) + } + } +} + +func TestBitMaskFromBits(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + results := [2]int64{} + want := [2]int64{0, 6} + m := simd.Mask64x2FromBits(0b10) + simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) + for i := range 2 { + if results[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) + } + } +} + +var maskForTestBitMaskFromBitsLoad = uint8(0b10) + +func TestBitMaskFromBitsLoad(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + results := [2]int64{} + want := [2]int64{0, 6} + m := simd.Mask64x2FromBits(maskForTestBitMaskFromBitsLoad) + simd.LoadInt64x2Slice([]int64{1, 2}).Add(simd.LoadInt64x2Slice([]int64{3, 4})).Masked(m).Store(&results) + for i := range 2 { + if results[i] != want[i] { + t.Errorf("Result at %d incorrect: want %v, got %v", i, want[i], results[i]) + } + } +} + +func TestBitMaskToBits(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + if v := simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits(); v != 0b101 { + t.Errorf("Want 0b101, got %b", v) + } +} + +var maskForTestBitMaskFromBitsStore uint8 + +func TestBitMaskToBitsStore(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + maskForTestBitMaskFromBitsStore = simd.LoadInt16x8Slice([]int16{1, 0, 1, 0, 0, 0, 0, 0}).ToMask().ToBits() + if maskForTestBitMaskFromBitsStore != 0b101 { + t.Errorf("Want 0b101, got %b", maskForTestBitMaskFromBitsStore) + } +} + +func TestMergeFloat(t *testing.T) { + k := make([]int64, 4, 4) + s := make([]float64, 4, 4) + + a := simd.LoadFloat64x4Slice([]float64{1, 2, 3, 4}) + b := simd.LoadFloat64x4Slice([]float64{4, 2, 3, 1}) + g := a.Greater(b) + g.AsInt64x4().StoreSlice(k) + c := a.Merge(b, g) + + c.StoreSlice(s) + + checkSlices[int64](t, k, []int64{0, 0, 0, -1}) + checkSlices[float64](t, s, []float64{4, 2, 3, 4}) +} + +func TestMergeFloat512(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + + k := make([]int64, 8, 8) + s := make([]float64, 8, 8) + + a := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) + b := simd.LoadFloat64x8Slice([]float64{8, 7, 6, 5, 4, 2, 3, 1}) + g := a.Greater(b) + g.AsInt64x8().StoreSlice(k) + c := a.Merge(b, g) + d := a.Masked(g) + + checkSlices[int64](t, k, []int64{0, 0, 0, 0, -1, -1, -1, -1}) + + c.StoreSlice(s) + checkSlices[float64](t, s, []float64{8, 7, 6, 5, 5, 6, 7, 8}) + + d.StoreSlice(s) + checkSlices[float64](t, s, []float64{0, 0, 0, 0, 5, 6, 7, 8}) +} + +var ro uint8 = 2 + +func TestRotateAllVariable(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + got := make([]int32, 4) + simd.LoadInt32x4Slice([]int32{0b11, 0b11, 0b11, 0b11}).RotateAllLeft(ro).StoreSlice(got) + for _, v := range got { + if v != 0b1100 { + t.Errorf("Want 0b1100, got %b", v) + } + } +} + +func TestBroadcastUint32x4(t *testing.T) { + s := make([]uint32, 4, 4) + simd.BroadcastUint32x4(123456789).StoreSlice(s) + checkSlices(t, s, []uint32{123456789, 123456789, 123456789, 123456789}) +} + +func TestBroadcastFloat32x8(t *testing.T) { + s := make([]float32, 8, 8) + simd.BroadcastFloat32x8(123456789).StoreSlice(s) + checkSlices(t, s, []float32{123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789, 123456789}) +} + +func TestBroadcastFloat64x2(t *testing.T) { + s := make([]float64, 2, 2) + simd.BroadcastFloat64x2(123456789).StoreSlice(s) + checkSlices(t, s, []float64{123456789, 123456789}) +} + +func TestBroadcastUint64x2(t *testing.T) { + s := make([]uint64, 2, 2) + simd.BroadcastUint64x2(123456789).StoreSlice(s) + checkSlices(t, s, []uint64{123456789, 123456789}) +} + +func TestBroadcastUint16x8(t *testing.T) { + s := make([]uint16, 8, 8) + simd.BroadcastUint16x8(12345).StoreSlice(s) + checkSlices(t, s, []uint16{12345, 12345, 12345, 12345}) +} + +func TestBroadcastInt8x32(t *testing.T) { + s := make([]int8, 32, 32) + simd.BroadcastInt8x32(-123).StoreSlice(s) + checkSlices(t, s, []int8{-123, -123, -123, -123, -123, -123, -123, -123, + -123, -123, -123, -123, -123, -123, -123, -123, + -123, -123, -123, -123, -123, -123, -123, -123, + -123, -123, -123, -123, -123, -123, -123, -123, + }) +} + +func TestMaskOpt512(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + + k := make([]int64, 8, 8) + s := make([]float64, 8, 8) + + a := simd.LoadFloat64x8Slice([]float64{2, 0, 2, 0, 2, 0, 2, 0}) + b := simd.LoadFloat64x8Slice([]float64{1, 1, 1, 1, 1, 1, 1, 1}) + c := simd.LoadFloat64x8Slice([]float64{1, 2, 3, 4, 5, 6, 7, 8}) + d := simd.LoadFloat64x8Slice([]float64{2, 4, 6, 8, 10, 12, 14, 16}) + g := a.Greater(b) + e := c.Add(d).Masked(g) + e.StoreSlice(s) + g.AsInt64x8().StoreSlice(k) + checkSlices[int64](t, k, []int64{-1, 0, -1, 0, -1, 0, -1, 0}) + checkSlices[float64](t, s, []float64{3, 0, 9, 0, 15, 0, 21, 0}) +} + +// flattenedTranspose tranposes x and y, regarded as a pair of 2x2 +// matrices, but then flattens the rows in order, i.e +// x: ABCD ==> a: A1B2 +// y: 1234 b: C3D4 +func flattenedTranspose(x, y simd.Int32x4) (a, b simd.Int32x4) { + return x.InterleaveLo(y), x.InterleaveHi(y) +} + +func TestFlattenedTranspose(t *testing.T) { + r := make([]int32, 4, 4) + s := make([]int32, 4, 4) + + x := simd.LoadInt32x4Slice([]int32{0xA, 0xB, 0xC, 0xD}) + y := simd.LoadInt32x4Slice([]int32{1, 2, 3, 4}) + a, b := flattenedTranspose(x, y) + + a.StoreSlice(r) + b.StoreSlice(s) + + checkSlices[int32](t, r, []int32{0xA, 1, 0xB, 2}) + checkSlices[int32](t, s, []int32{0xC, 3, 0xD, 4}) + +} + +func TestClearAVXUpperBits(t *testing.T) { + // Test that ClearAVXUpperBits is safe even if there are SIMD values + // alive (although usually one should not do this). + if !simd.X86.AVX2() { + t.Skip("Test requires X86.AVX2, not available on this hardware") + return + } + + r := make([]int64, 4) + s := make([]int64, 4) + + x := simd.LoadInt64x4Slice([]int64{10, 20, 30, 40}) + y := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) + + x.Add(y).StoreSlice(r) + simd.ClearAVXUpperBits() + x.Sub(y).StoreSlice(s) + + checkSlices[int64](t, r, []int64{11, 22, 33, 44}) + checkSlices[int64](t, s, []int64{9, 18, 27, 36}) +} + +func TestLeadingZeros(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + + src := []uint64{0b1111, 0} + want := []uint64{60, 64} + got := make([]uint64, 2) + simd.LoadUint64x2Slice(src).LeadingZeros().StoreSlice(got) + for i := range 2 { + if want[i] != got[i] { + t.Errorf("Result incorrect at %d: want %d, got %d", i, want[i], got[i]) + } + } +} + +func TestIsZero(t *testing.T) { + v1 := simd.LoadUint64x2Slice([]uint64{0, 1}) + v2 := simd.LoadUint64x2Slice([]uint64{0, 0}) + if v1.IsZero() { + t.Errorf("Result incorrect, want false, got true") + } + if !v2.IsZero() { + t.Errorf("Result incorrect, want true, got false") + } + if !v1.And(v2).IsZero() { + t.Errorf("Result incorrect, want true, got false") + } + if v1.AndNot(v2).IsZero() { + t.Errorf("Result incorrect, want false, got true") + } + if !v2.And(v1).IsZero() { + t.Errorf("Result incorrect, want true, got false") + } + if !v2.AndNot(v1).IsZero() { + t.Errorf("Result incorrect, want true, got false") + } +} + +func TestSelect4FromPairConst(t *testing.T) { + x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) + y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) + + llll := x.SelectFromPair(0, 1, 2, 3, y) + hhhh := x.SelectFromPair(4, 5, 6, 7, y) + llhh := x.SelectFromPair(0, 1, 6, 7, y) + hhll := x.SelectFromPair(6, 7, 0, 1, y) + + lllh := x.SelectFromPair(0, 1, 2, 7, y) + llhl := x.SelectFromPair(0, 1, 7, 2, y) + lhll := x.SelectFromPair(0, 7, 1, 2, y) + hlll := x.SelectFromPair(7, 0, 1, 2, y) + + hhhl := x.SelectFromPair(4, 5, 6, 0, y) + hhlh := x.SelectFromPair(4, 5, 0, 6, y) + hlhh := x.SelectFromPair(4, 0, 5, 6, y) + lhhh := x.SelectFromPair(0, 4, 5, 6, y) + + lhlh := x.SelectFromPair(0, 4, 1, 5, y) + hlhl := x.SelectFromPair(4, 0, 5, 1, y) + lhhl := x.SelectFromPair(0, 4, 5, 1, y) + hllh := x.SelectFromPair(4, 0, 1, 5, y) + + r := make([]int32, 4, 4) + + foo := func(v simd.Int32x4, a, b, c, d int32) { + v.StoreSlice(r) + checkSlices[int32](t, r, []int32{a, b, c, d}) + } + + foo(llll, 0, 1, 2, 3) + foo(hhhh, 4, 5, 6, 7) + foo(llhh, 0, 1, 6, 7) + foo(hhll, 6, 7, 0, 1) + + foo(lllh, 0, 1, 2, 7) + foo(llhl, 0, 1, 7, 2) + foo(lhll, 0, 7, 1, 2) + foo(hlll, 7, 0, 1, 2) + + foo(hhhl, 4, 5, 6, 0) + foo(hhlh, 4, 5, 0, 6) + foo(hlhh, 4, 0, 5, 6) + foo(lhhh, 0, 4, 5, 6) + + foo(lhlh, 0, 4, 1, 5) + foo(hlhl, 4, 0, 5, 1) + foo(lhhl, 0, 4, 5, 1) + foo(hllh, 4, 0, 1, 5) +} + +//go:noinline +func selectFromPairInt32x4(x simd.Int32x4, a, b, c, d uint8, y simd.Int32x4) simd.Int32x4 { + return x.SelectFromPair(a, b, c, d, y) +} + +func TestSelect4FromPairVar(t *testing.T) { + x := simd.LoadInt32x4Slice([]int32{0, 1, 2, 3}) + y := simd.LoadInt32x4Slice([]int32{4, 5, 6, 7}) + + llll := selectFromPairInt32x4(x, 0, 1, 2, 3, y) + hhhh := selectFromPairInt32x4(x, 4, 5, 6, 7, y) + llhh := selectFromPairInt32x4(x, 0, 1, 6, 7, y) + hhll := selectFromPairInt32x4(x, 6, 7, 0, 1, y) + + lllh := selectFromPairInt32x4(x, 0, 1, 2, 7, y) + llhl := selectFromPairInt32x4(x, 0, 1, 7, 2, y) + lhll := selectFromPairInt32x4(x, 0, 7, 1, 2, y) + hlll := selectFromPairInt32x4(x, 7, 0, 1, 2, y) + + hhhl := selectFromPairInt32x4(x, 4, 5, 6, 0, y) + hhlh := selectFromPairInt32x4(x, 4, 5, 0, 6, y) + hlhh := selectFromPairInt32x4(x, 4, 0, 5, 6, y) + lhhh := selectFromPairInt32x4(x, 0, 4, 5, 6, y) + + lhlh := selectFromPairInt32x4(x, 0, 4, 1, 5, y) + hlhl := selectFromPairInt32x4(x, 4, 0, 5, 1, y) + lhhl := selectFromPairInt32x4(x, 0, 4, 5, 1, y) + hllh := selectFromPairInt32x4(x, 4, 0, 1, 5, y) + + r := make([]int32, 4, 4) + + foo := func(v simd.Int32x4, a, b, c, d int32) { + v.StoreSlice(r) + checkSlices[int32](t, r, []int32{a, b, c, d}) + } + + foo(llll, 0, 1, 2, 3) + foo(hhhh, 4, 5, 6, 7) + foo(llhh, 0, 1, 6, 7) + foo(hhll, 6, 7, 0, 1) + + foo(lllh, 0, 1, 2, 7) + foo(llhl, 0, 1, 7, 2) + foo(lhll, 0, 7, 1, 2) + foo(hlll, 7, 0, 1, 2) + + foo(hhhl, 4, 5, 6, 0) + foo(hhlh, 4, 5, 0, 6) + foo(hlhh, 4, 0, 5, 6) + foo(lhhh, 0, 4, 5, 6) + + foo(lhlh, 0, 4, 1, 5) + foo(hlhl, 4, 0, 5, 1) + foo(lhhl, 0, 4, 5, 1) + foo(hllh, 4, 0, 1, 5) +} + +func TestSelect4FromPairConstGrouped(t *testing.T) { + x := simd.LoadFloat32x8Slice([]float32{0, 1, 2, 3, 10, 11, 12, 13}) + y := simd.LoadFloat32x8Slice([]float32{4, 5, 6, 7, 14, 15, 16, 17}) + + llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) + hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) + llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y) + hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y) + + lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y) + llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y) + lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y) + hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y) + + hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y) + hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y) + hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y) + lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y) + + lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y) + hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y) + lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y) + hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y) + + r := make([]float32, 8, 8) + + foo := func(v simd.Float32x8, a, b, c, d float32) { + v.StoreSlice(r) + checkSlices[float32](t, r, []float32{a, b, c, d, 10 + a, 10 + b, 10 + c, 10 + d}) + } + + foo(llll, 0, 1, 2, 3) + foo(hhhh, 4, 5, 6, 7) + foo(llhh, 0, 1, 6, 7) + foo(hhll, 6, 7, 0, 1) + + foo(lllh, 0, 1, 2, 7) + foo(llhl, 0, 1, 7, 2) + foo(lhll, 0, 7, 1, 2) + foo(hlll, 7, 0, 1, 2) + + foo(hhhl, 4, 5, 6, 0) + foo(hhlh, 4, 5, 0, 6) + foo(hlhh, 4, 0, 5, 6) + foo(lhhh, 0, 4, 5, 6) + + foo(lhlh, 0, 4, 1, 5) + foo(hlhl, 4, 0, 5, 1) + foo(lhhl, 0, 4, 5, 1) + foo(hllh, 4, 0, 1, 5) +} + +func TestSelectFromPairConstGroupedUint32x16(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + x := simd.LoadUint32x16Slice([]uint32{0, 1, 2, 3, 10, 11, 12, 13, 20, 21, 22, 23, 30, 31, 32, 33}) + y := simd.LoadUint32x16Slice([]uint32{4, 5, 6, 7, 14, 15, 16, 17, 24, 25, 26, 27, 34, 35, 36, 37}) + + llll := x.SelectFromPairGrouped(0, 1, 2, 3, y) + hhhh := x.SelectFromPairGrouped(4, 5, 6, 7, y) + llhh := x.SelectFromPairGrouped(0, 1, 6, 7, y) + hhll := x.SelectFromPairGrouped(6, 7, 0, 1, y) + + lllh := x.SelectFromPairGrouped(0, 1, 2, 7, y) + llhl := x.SelectFromPairGrouped(0, 1, 7, 2, y) + lhll := x.SelectFromPairGrouped(0, 7, 1, 2, y) + hlll := x.SelectFromPairGrouped(7, 0, 1, 2, y) + + hhhl := x.SelectFromPairGrouped(4, 5, 6, 0, y) + hhlh := x.SelectFromPairGrouped(4, 5, 0, 6, y) + hlhh := x.SelectFromPairGrouped(4, 0, 5, 6, y) + lhhh := x.SelectFromPairGrouped(0, 4, 5, 6, y) + + lhlh := x.SelectFromPairGrouped(0, 4, 1, 5, y) + hlhl := x.SelectFromPairGrouped(4, 0, 5, 1, y) + lhhl := x.SelectFromPairGrouped(0, 4, 5, 1, y) + hllh := x.SelectFromPairGrouped(4, 0, 1, 5, y) + + r := make([]uint32, 16, 16) + + foo := func(v simd.Uint32x16, a, b, c, d uint32) { + v.StoreSlice(r) + checkSlices[uint32](t, r, []uint32{a, b, c, d, + 10 + a, 10 + b, 10 + c, 10 + d, + 20 + a, 20 + b, 20 + c, 20 + d, + 30 + a, 30 + b, 30 + c, 30 + d, + }) + } + + foo(llll, 0, 1, 2, 3) + foo(hhhh, 4, 5, 6, 7) + foo(llhh, 0, 1, 6, 7) + foo(hhll, 6, 7, 0, 1) + + foo(lllh, 0, 1, 2, 7) + foo(llhl, 0, 1, 7, 2) + foo(lhll, 0, 7, 1, 2) + foo(hlll, 7, 0, 1, 2) + + foo(hhhl, 4, 5, 6, 0) + foo(hhlh, 4, 5, 0, 6) + foo(hlhh, 4, 0, 5, 6) + foo(lhhh, 0, 4, 5, 6) + + foo(lhlh, 0, 4, 1, 5) + foo(hlhl, 4, 0, 5, 1) + foo(lhhl, 0, 4, 5, 1) + foo(hllh, 4, 0, 1, 5) +} + +func TestSelect128FromPair(t *testing.T) { + x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) + y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) + + aa := x.Select128FromPair(0, 0, y) + ab := x.Select128FromPair(0, 1, y) + bc := x.Select128FromPair(1, 2, y) + cd := x.Select128FromPair(2, 3, y) + da := x.Select128FromPair(3, 0, y) + dc := x.Select128FromPair(3, 2, y) + + r := make([]uint64, 4, 4) + + foo := func(v simd.Uint64x4, a, b uint64) { + a, b = 2*a, 2*b + v.StoreSlice(r) + checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) + } + + foo(aa, 0, 0) + foo(ab, 0, 1) + foo(bc, 1, 2) + foo(cd, 2, 3) + foo(da, 3, 0) + foo(dc, 3, 2) +} + +func TestSelect128FromPairError(t *testing.T) { + x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) + y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) + + defer func() { + if r := recover(); r != nil { + t.Logf("Saw expected panic %v", r) + } + }() + _ = x.Select128FromPair(0, 4, y) + + t.Errorf("Should have panicked") +} + +//go:noinline +func select128FromPair(x simd.Uint64x4, lo, hi uint8, y simd.Uint64x4) simd.Uint64x4 { + return x.Select128FromPair(lo, hi, y) +} + +func TestSelect128FromPairVar(t *testing.T) { + x := simd.LoadUint64x4Slice([]uint64{0, 1, 2, 3}) + y := simd.LoadUint64x4Slice([]uint64{4, 5, 6, 7}) + + aa := select128FromPair(x, 0, 0, y) + ab := select128FromPair(x, 0, 1, y) + bc := select128FromPair(x, 1, 2, y) + cd := select128FromPair(x, 2, 3, y) + da := select128FromPair(x, 3, 0, y) + dc := select128FromPair(x, 3, 2, y) + + r := make([]uint64, 4, 4) + + foo := func(v simd.Uint64x4, a, b uint64) { + a, b = 2*a, 2*b + v.StoreSlice(r) + checkSlices[uint64](t, r, []uint64{a, a + 1, b, b + 1}) + } + + foo(aa, 0, 0) + foo(ab, 0, 1) + foo(bc, 1, 2) + foo(cd, 2, 3) + foo(da, 3, 0) + foo(dc, 3, 2) +} + +func TestSelect2FromPairConst(t *testing.T) { + x := simd.LoadUint64x2Slice([]uint64{0, 1}) + y := simd.LoadUint64x2Slice([]uint64{2, 3}) + + ll := x.SelectFromPair(0, 1, y) + hh := x.SelectFromPair(3, 2, y) + lh := x.SelectFromPair(0, 3, y) + hl := x.SelectFromPair(2, 1, y) + + r := make([]uint64, 2, 2) + + foo := func(v simd.Uint64x2, a, b uint64) { + v.StoreSlice(r) + checkSlices[uint64](t, r, []uint64{a, b}) + } + + foo(ll, 0, 1) + foo(hh, 3, 2) + foo(lh, 0, 3) + foo(hl, 2, 1) +} + +func TestSelect2FromPairConstGroupedUint(t *testing.T) { + x := simd.LoadUint64x4Slice([]uint64{0, 1, 10, 11}) + y := simd.LoadUint64x4Slice([]uint64{2, 3, 12, 13}) + + ll := x.SelectFromPairGrouped(0, 1, y) + hh := x.SelectFromPairGrouped(3, 2, y) + lh := x.SelectFromPairGrouped(0, 3, y) + hl := x.SelectFromPairGrouped(2, 1, y) + + r := make([]uint64, 4, 4) + + foo := func(v simd.Uint64x4, a, b uint64) { + v.StoreSlice(r) + checkSlices[uint64](t, r, []uint64{a, b, a + 10, b + 10}) + } + + foo(ll, 0, 1) + foo(hh, 3, 2) + foo(lh, 0, 3) + foo(hl, 2, 1) +} + +func TestSelect2FromPairConstGroupedFloat(t *testing.T) { + x := simd.LoadFloat64x4Slice([]float64{0, 1, 10, 11}) + y := simd.LoadFloat64x4Slice([]float64{2, 3, 12, 13}) + + ll := x.SelectFromPairGrouped(0, 1, y) + hh := x.SelectFromPairGrouped(3, 2, y) + lh := x.SelectFromPairGrouped(0, 3, y) + hl := x.SelectFromPairGrouped(2, 1, y) + + r := make([]float64, 4, 4) + + foo := func(v simd.Float64x4, a, b float64) { + v.StoreSlice(r) + checkSlices[float64](t, r, []float64{a, b, a + 10, b + 10}) + } + + foo(ll, 0, 1) + foo(hh, 3, 2) + foo(lh, 0, 3) + foo(hl, 2, 1) +} + +func TestSelect2FromPairConstGroupedInt(t *testing.T) { + x := simd.LoadInt64x4Slice([]int64{0, 1, 10, 11}) + y := simd.LoadInt64x4Slice([]int64{2, 3, 12, 13}) + + ll := x.SelectFromPairGrouped(0, 1, y) + hh := x.SelectFromPairGrouped(3, 2, y) + lh := x.SelectFromPairGrouped(0, 3, y) + hl := x.SelectFromPairGrouped(2, 1, y) + + r := make([]int64, 4, 4) + + foo := func(v simd.Int64x4, a, b int64) { + v.StoreSlice(r) + checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10}) + } + + foo(ll, 0, 1) + foo(hh, 3, 2) + foo(lh, 0, 3) + foo(hl, 2, 1) +} + +func TestSelect2FromPairConstGroupedInt512(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + + x := simd.LoadInt64x8Slice([]int64{0, 1, 10, 11, 20, 21, 30, 31}) + y := simd.LoadInt64x8Slice([]int64{2, 3, 12, 13, 22, 23, 32, 33}) + + ll := x.SelectFromPairGrouped(0, 1, y) + hh := x.SelectFromPairGrouped(3, 2, y) + lh := x.SelectFromPairGrouped(0, 3, y) + hl := x.SelectFromPairGrouped(2, 1, y) + + r := make([]int64, 8, 8) + + foo := func(v simd.Int64x8, a, b int64) { + v.StoreSlice(r) + checkSlices[int64](t, r, []int64{a, b, a + 10, b + 10, a + 20, b + 20, a + 30, b + 30}) + } + + foo(ll, 0, 1) + foo(hh, 3, 2) + foo(lh, 0, 3) + foo(hl, 2, 1) +} + +func TestString(t *testing.T) { + x := simd.LoadUint32x4Slice([]uint32{0, 1, 2, 3}) + y := simd.LoadInt64x4Slice([]int64{-4, -5, -6, -7}) + z := simd.LoadFloat32x4Slice([]float32{0.5, 1.5, -2.5, 3.5e9}) + w := simd.LoadFloat64x4Slice([]float64{0.5, 1.5, -2.5, 3.5e9}) + + sx := "{0,1,2,3}" + sy := "{-4,-5,-6,-7}" + sz := "{0.5,1.5,-2.5,3.5e+09}" + sw := sz + + if x.String() != sx { + t.Errorf("x=%s wanted %s", x, sx) + } + if y.String() != sy { + t.Errorf("y=%s wanted %s", y, sy) + } + if z.String() != sz { + t.Errorf("z=%s wanted %s", z, sz) + } + if w.String() != sw { + t.Errorf("w=%s wanted %s", w, sw) + } + t.Logf("w=%s", w) + t.Logf("x=%s", x) + t.Logf("y=%s", y) + t.Logf("z=%s", z) +} + +// a returns an slice of 16 int32 +func a() []int32 { + return make([]int32, 16, 16) +} + +// applyTo3 returns a 16-element slice of the results of +// applying f to the respective elements of vectors x, y, and z. +func applyTo3(x, y, z simd.Int32x16, f func(x, y, z int32) int32) []int32 { + ax, ay, az := a(), a(), a() + x.StoreSlice(ax) + y.StoreSlice(ay) + z.StoreSlice(az) + + r := a() + for i := range r { + r[i] = f(ax[i], ay[i], az[i]) + } + return r +} + +// applyTo3 returns a 16-element slice of the results of +// applying f to the respective elements of vectors x, y, z, and w. +func applyTo4(x, y, z, w simd.Int32x16, f func(x, y, z, w int32) int32) []int32 { + ax, ay, az, aw := a(), a(), a(), a() + x.StoreSlice(ax) + y.StoreSlice(ay) + z.StoreSlice(az) + w.StoreSlice(aw) + + r := make([]int32, len(ax), len(ax)) + for i := range r { + r[i] = f(ax[i], ay[i], az[i], aw[i]) + } + return r +} + +func TestSelectTernOptInt32x16(t *testing.T) { + if !simd.X86.AVX512() { + t.Skip("Test requires X86.AVX512, not available on this hardware") + return + } + ax := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} + ay := []int32{0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1} + az := []int32{0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1} + aw := []int32{0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1} + am := []int32{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} + + x := simd.LoadInt32x16Slice(ax) + y := simd.LoadInt32x16Slice(ay) + z := simd.LoadInt32x16Slice(az) + w := simd.LoadInt32x16Slice(aw) + m := simd.LoadInt32x16Slice(am) + + foo := func(v simd.Int32x16, s []int32) { + r := make([]int32, 16, 16) + v.StoreSlice(r) + checkSlices[int32](t, r, s) + } + + t0 := w.Xor(y).Xor(z) + ft0 := func(w, y, z int32) int32 { + return w ^ y ^ z + } + foo(t0, applyTo3(w, y, z, ft0)) + + t1 := m.And(w.Xor(y).Xor(z.Not())) + ft1 := func(m, w, y, z int32) int32 { + return m & (w ^ y ^ ^z) + } + foo(t1, applyTo4(m, w, y, z, ft1)) + + t2 := x.Xor(y).Xor(z).And(x.Xor(y).Xor(z.Not())) + ft2 := func(x, y, z int32) int32 { + return (x ^ y ^ z) & (x ^ y ^ ^z) + } + foo(t2, applyTo3(x, y, z, ft2)) +} + +func TestMaskedMerge(t *testing.T) { + x := simd.LoadInt64x4Slice([]int64{1, 2, 3, 4}) + y := simd.LoadInt64x4Slice([]int64{5, 6, 1, 1}) + z := simd.LoadInt64x4Slice([]int64{-1, -2, -3, -4}) + res := make([]int64, 4) + expected := []int64{6, 8, -3, -4} + mask := x.Less(y) + if simd.X86.AVX512() { + x.Add(y).Merge(z, mask).StoreSlice(res) + } else { + x.Add(y).Merge(z, mask).StoreSlice(res) + } + for i := range 4 { + if res[i] != expected[i] { + t.Errorf("got %d wanted %d", res[i], expected[i]) + } + } +} + +func TestDotProductQuadruple(t *testing.T) { + if !simd.X86.AVXVNNI() { + t.Skip("Test requires X86.AVXVNNI, not available on this hardware") + return + } + xd := make([]int8, 16) + yd := make([]uint8, 16) + zd := make([]int32, 4) + wanted1 := make([]int32, 4) + wanted2 := make([]int32, 4) + res1 := make([]int32, 4) + res2 := make([]int32, 4) + for i := range 4 { + xd[i] = 5 + yd[i] = 6 + zd[i] = 3 + wanted1[i] = 30 + wanted2[i] = 30 + } + x := simd.LoadInt8x16Slice(xd) + y := simd.LoadUint8x16Slice(yd) + z := simd.LoadInt32x4Slice(zd) + x.DotProductQuadruple(y).StoreSlice(res1) + x.DotProductQuadruple(y).Add(z).StoreSlice(res1) + for i := range 4 { + if res1[i] != wanted1[i] { + t.Errorf("got %d wanted %d", res1[i], wanted1[i]) + } + if res2[i] != wanted2[i] { + t.Errorf("got %d wanted %d", res2[i], wanted2[i]) + } + } +} + +func TestPermuteScalars(t *testing.T) { + x := []int32{11, 12, 13, 14} + want := []int32{12, 13, 14, 11} + got := make([]int32, 4) + simd.LoadInt32x4Slice(x).PermuteScalars(1, 2, 3, 0).StoreSlice(got) + for i := range 4 { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestPermuteScalarsGrouped(t *testing.T) { + x := []int32{11, 12, 13, 14, 21, 22, 23, 24} + want := []int32{12, 13, 14, 11, 22, 23, 24, 21} + got := make([]int32, 8) + simd.LoadInt32x8Slice(x).PermuteScalarsGrouped(1, 2, 3, 0).StoreSlice(got) + for i := range 8 { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestPermuteScalarsHi(t *testing.T) { + x := []int16{-1, -2, -3, -4, 11, 12, 13, 14} + want := []int16{-1, -2, -3, -4, 12, 13, 14, 11} + got := make([]int16, len(x)) + simd.LoadInt16x8Slice(x).PermuteScalarsHi(1, 2, 3, 0).StoreSlice(got) + for i := range got { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestPermuteScalarsLo(t *testing.T) { + x := []int16{11, 12, 13, 14, 4, 5, 6, 7} + want := []int16{12, 13, 14, 11, 4, 5, 6, 7} + got := make([]int16, len(x)) + simd.LoadInt16x8Slice(x).PermuteScalarsLo(1, 2, 3, 0).StoreSlice(got) + for i := range got { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestPermuteScalarsHiGrouped(t *testing.T) { + x := []int16{-1, -2, -3, -4, 11, 12, 13, 14, -11, -12, -13, -14, 111, 112, 113, 114} + want := []int16{-1, -2, -3, -4, 12, 13, 14, 11, -11, -12, -13, -14, 112, 113, 114, 111} + got := make([]int16, len(x)) + simd.LoadInt16x16Slice(x).PermuteScalarsHiGrouped(1, 2, 3, 0).StoreSlice(got) + for i := range got { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} + +func TestPermuteScalarsLoGrouped(t *testing.T) { + x := []int16{11, 12, 13, 14, 4, 5, 6, 7, 111, 112, 113, 114, 14, 15, 16, 17} + want := []int16{12, 13, 14, 11, 4, 5, 6, 7, 112, 113, 114, 111, 14, 15, 16, 17} + got := make([]int16, len(x)) + simd.LoadInt16x16Slice(x).PermuteScalarsLoGrouped(1, 2, 3, 0).StoreSlice(got) + for i := range got { + if want[i] != got[i] { + t.Errorf("want and got differ at index %d, want=%d, got=%d", i, want[i], got[i]) + } + } +} |
